From 4c86e6aa6750e28f1e0fbdd9cc3ab1ff110af85b Mon Sep 17 00:00:00 2001 From: Michael Reuscher <michael.reuscher@desy.de> Date: Mon, 24 Jul 2023 12:13:11 +0200 Subject: [PATCH] bullshit in DataFrameCreator.py but i have to save --- simrunner/simrunner.py | 1 - visualizer/visualizer/DataFrameCreator.py | 86 ++++++++++++++++------- visualizer/visualizer/SeabornPlotter.py | 60 ++++++++++------ visualizer/visualizer/main.py | 4 +- 4 files changed, 101 insertions(+), 50 deletions(-) diff --git a/simrunner/simrunner.py b/simrunner/simrunner.py index a6de2c7..851c2cd 100644 --- a/simrunner/simrunner.py +++ b/simrunner/simrunner.py @@ -139,4 +139,3 @@ event_list = create_event_list() # Run detectorSimulation run_sim(event_list, data_path, simulation_path, stat_path) - diff --git a/visualizer/visualizer/DataFrameCreator.py b/visualizer/visualizer/DataFrameCreator.py index 50d3e94..4eaabcc 100644 --- a/visualizer/visualizer/DataFrameCreator.py +++ b/visualizer/visualizer/DataFrameCreator.py @@ -6,39 +6,73 @@ class DataFrameCreator: def __init__(self, f_path): self.folder_path = f_path - def create_dataframes_data(self): - dfs = {} - for root, _, folder in os.walk(self.folder_path): - if os.path.basename(root) == "statistics": - print("this was statistic") - continue - for filename in folder: - if filename.endswith('.dat'): - file_path = os.path.join(root, filename) - dataframe_name = os.path.splitext(filename)[0] - dataframe_type = dataframe_name.split('-')[1] - dataframe = pd.read_csv(file_path, delimiter=" ", header=0) - - if dataframe_type in dfs: - inner_dict = dfs[dataframe_type] - if dataframe_name in inner_dict: - # DataFrame hinzufügen - inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe], - ignore_index=True) - else: - inner_dict[dataframe_name] = dataframe - else: - dfs[dataframe_type] = { - dataframe_name: dataframe - } - + @staticmethod + def _merge_dataframes(dfs): merged_dfs = {} for outer_key in dfs: merged_dataframe = pd.concat(dfs[outer_key].values(), keys=dfs[outer_key].keys()) merged_dataframe_sorted = merged_dataframe.sort_index() merged_dfs[outer_key] = merged_dataframe_sorted + return merged_dfs + def read_data(self, root, _, folder): + dataframe, dataframe_type, dataframe_name = "", "", "" + for filename in folder: + if filename.endswith('.dat'): + file_path = os.path.join(root, filename) + dataframe_name = os.path.splitext(filename)[0] + dataframe_type = dataframe_name.split('-')[1] + dataframe = pd.read_csv(file_path, delimiter=" ", header=0) + return dataframe, dataframe_type, dataframe_name + + def create_dataframe_long(self, root, _, folder): + dfs = {} + for filename in folder: + if filename.endswith('.dat'): + dataframe, dataframe_type, dataframe_name = self.read_data(root, _, folder) + if dataframe_type in dfs: + inner_dict = dfs[dataframe_type] + if dataframe_name in inner_dict: + inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe], + ignore_index=True) + else: + inner_dict[dataframe_name] = dataframe + else: + dfs[dataframe_type] = { + dataframe_name: dataframe + } + print(dfs) + return dfs + + def create_dataframe_statistics(self, root, _, folder): + print("statistics" + root + folder) + + def create_dataframes_various(self, root, _, folder): + dfs = {} + + dataframe, dataframe_type, dataframe_name = self.read_data(root, _, folder) + if dataframe_type in dfs: + inner_dict = dfs[dataframe_type] + if dataframe_name in inner_dict: + inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe], ignore_index=True) + else: + inner_dict[dataframe_name] = dataframe + else: + dfs[dataframe_type] = { + dataframe_name: dataframe + } + merged_dfs = self._merge_dataframes(dfs) return merged_dfs + def create(self): + various_frames = {} + for root, _, folder in os.walk(self.folder_path): + if os.path.basename(root) == "statistics": + statistic_frames = self.create_dataframe_statistics(root, _, folder) + elif os.path.basename(root) == "Data_Set_Long": + long_frames = self.create_dataframe_long(root, _, folder) + else: + self.create_dataframes_various(root, _, folder) + return various_frames diff --git a/visualizer/visualizer/SeabornPlotter.py b/visualizer/visualizer/SeabornPlotter.py index 48a836f..186aae4 100644 --- a/visualizer/visualizer/SeabornPlotter.py +++ b/visualizer/visualizer/SeabornPlotter.py @@ -14,45 +14,63 @@ class SeabornPlotter: self.name = name self.current_plot = None - def plot(self): - df = self.dataframe - x = df['size'] - y = df['duration'] - title = "Procedure: {}".format(self.name) + @staticmethod + def _add_logo(logo_path, logo_size, logo_x, logo_y): + current_directory = os.path.dirname(os.path.abspath(__file__)) + logo_path = os.path.join(current_directory, logo_path) + logo_img = plt.imread(logo_path) + imagebox = OffsetImage(logo_img, zoom=logo_size) + ab = AnnotationBbox(imagebox, (logo_x, logo_y), xycoords='axes fraction', frameon=False) + plt.gca().add_artist(ab) - # Bin Grenzen festlegen + @staticmethod + def _prepare_bins(df): n = df['size'].nunique() bins_x = np.zeros(n + 1, dtype=np.float64) bins_x[0] = 0.75 * (1 << 4) for i in range(1, n + 1): bins_x[i] = bins_x[i - 1] * 2 bins_y = np.logspace(-6, 0, num=101) + return bins_x, bins_y - # Achsen und Plot definieren + @staticmethod + def plot_histogram_various(x, y, title, bins_x, bins_y): f, ax = plt.subplots(figsize=(16, 9)) - sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y],) - - # Setze die Skala der x-Achse auf logarithmisch + sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y]) ax.set_xscale('log') ax.set_yscale('log') + ax.set_xlabel("Data size [Bytes]") + ax.set_ylabel("Time/Event [s]") + plt.title(title) + plt.grid(True) + plt.xticks(rotation=45) - # Setze die Beschriftungen + @staticmethod + def plot_histogram_long(x, y, title, bins_x, bins_y): + f, ax = plt.subplots(figsize=(16, 9)) + sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y]) + ax.set_xscale('log') + ax.set_yscale('log') ax.set_xlabel("Data size [Bytes]") ax.set_ylabel("Time/Event [s]") plt.title(title) plt.grid(True) plt.xticks(rotation=45) - current_directory = os.path.dirname(os.path.abspath(__file__)) - logo_path = os.path.join(current_directory, "desy_logo.png") - logo_img = plt.imread(logo_path) - logo_size = 0.15 - logo_x = 1.065 - logo_y = 1.04 - imagebox = OffsetImage(logo_img, zoom=logo_size) - ab = AnnotationBbox(imagebox, (logo_x, logo_y), xycoords='axes fraction', frameon=False) - plt.gca().add_artist(ab) - # Speichern und Anzeigen + def plot(self): + df = self.dataframe + + # if self.name + x = df['size'] + y = df['duration'] + title = "Procedure: {}".format(self.name) + + bins_x, bins_y = self._prepare_bins(df) + self.plot_histogram_various(x, y, title, bins_x, bins_y) + + self._add_logo("desy_logo.png", logo_size=0.15, logo_x=1.065, logo_y=1.04) + + # Save and View plt.savefig(title + ".svg", format='svg') plt.show() diff --git a/visualizer/visualizer/main.py b/visualizer/visualizer/main.py index 7235961..0742a25 100644 --- a/visualizer/visualizer/main.py +++ b/visualizer/visualizer/main.py @@ -16,12 +16,12 @@ def main(): # Import Data creator = DataFrameCreator(data_path) - dataframes = creator.create_dataframes_data() + dataframes = creator.create() for frame in dataframes.keys(): if not dataframes[frame].empty: # Setup Plotter frame_name = frame - + print(frame_name) plotter = SeabornPlotter(dataframes[frame], frame_name) # Plotter run -- GitLab