Skip to content
Snippets Groups Projects
Commit 4c86e6aa authored by Michael Reuscher's avatar Michael Reuscher
Browse files

bullshit in DataFrameCreator.py but i have to save

parent 73ea73aa
No related branches found
No related tags found
1 merge request!4Python driver and data analysis part
...@@ -139,4 +139,3 @@ event_list = create_event_list() ...@@ -139,4 +139,3 @@ event_list = create_event_list()
# Run detectorSimulation # Run detectorSimulation
run_sim(event_list, data_path, simulation_path, stat_path) run_sim(event_list, data_path, simulation_path, stat_path)
...@@ -6,39 +6,73 @@ class DataFrameCreator: ...@@ -6,39 +6,73 @@ class DataFrameCreator:
def __init__(self, f_path): def __init__(self, f_path):
self.folder_path = f_path self.folder_path = f_path
def create_dataframes_data(self): @staticmethod
dfs = {} def _merge_dataframes(dfs):
for root, _, folder in os.walk(self.folder_path):
if os.path.basename(root) == "statistics":
print("this was statistic")
continue
for filename in folder:
if filename.endswith('.dat'):
file_path = os.path.join(root, filename)
dataframe_name = os.path.splitext(filename)[0]
dataframe_type = dataframe_name.split('-')[1]
dataframe = pd.read_csv(file_path, delimiter=" ", header=0)
if dataframe_type in dfs:
inner_dict = dfs[dataframe_type]
if dataframe_name in inner_dict:
# DataFrame hinzufügen
inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
ignore_index=True)
else:
inner_dict[dataframe_name] = dataframe
else:
dfs[dataframe_type] = {
dataframe_name: dataframe
}
merged_dfs = {} merged_dfs = {}
for outer_key in dfs: for outer_key in dfs:
merged_dataframe = pd.concat(dfs[outer_key].values(), keys=dfs[outer_key].keys()) merged_dataframe = pd.concat(dfs[outer_key].values(), keys=dfs[outer_key].keys())
merged_dataframe_sorted = merged_dataframe.sort_index() merged_dataframe_sorted = merged_dataframe.sort_index()
merged_dfs[outer_key] = merged_dataframe_sorted merged_dfs[outer_key] = merged_dataframe_sorted
return merged_dfs
def read_data(self, root, _, folder):
dataframe, dataframe_type, dataframe_name = "", "", ""
for filename in folder:
if filename.endswith('.dat'):
file_path = os.path.join(root, filename)
dataframe_name = os.path.splitext(filename)[0]
dataframe_type = dataframe_name.split('-')[1]
dataframe = pd.read_csv(file_path, delimiter=" ", header=0)
return dataframe, dataframe_type, dataframe_name
def create_dataframe_long(self, root, _, folder):
dfs = {}
for filename in folder:
if filename.endswith('.dat'):
dataframe, dataframe_type, dataframe_name = self.read_data(root, _, folder)
if dataframe_type in dfs:
inner_dict = dfs[dataframe_type]
if dataframe_name in inner_dict:
inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
ignore_index=True)
else:
inner_dict[dataframe_name] = dataframe
else:
dfs[dataframe_type] = {
dataframe_name: dataframe
}
print(dfs)
return dfs
def create_dataframe_statistics(self, root, _, folder):
print("statistics" + root + folder)
def create_dataframes_various(self, root, _, folder):
dfs = {}
dataframe, dataframe_type, dataframe_name = self.read_data(root, _, folder)
if dataframe_type in dfs:
inner_dict = dfs[dataframe_type]
if dataframe_name in inner_dict:
inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe], ignore_index=True)
else:
inner_dict[dataframe_name] = dataframe
else:
dfs[dataframe_type] = {
dataframe_name: dataframe
}
merged_dfs = self._merge_dataframes(dfs)
return merged_dfs return merged_dfs
def create(self):
various_frames = {}
for root, _, folder in os.walk(self.folder_path):
if os.path.basename(root) == "statistics":
statistic_frames = self.create_dataframe_statistics(root, _, folder)
elif os.path.basename(root) == "Data_Set_Long":
long_frames = self.create_dataframe_long(root, _, folder)
else:
self.create_dataframes_various(root, _, folder)
return various_frames
...@@ -14,45 +14,63 @@ class SeabornPlotter: ...@@ -14,45 +14,63 @@ class SeabornPlotter:
self.name = name self.name = name
self.current_plot = None self.current_plot = None
def plot(self): @staticmethod
df = self.dataframe def _add_logo(logo_path, logo_size, logo_x, logo_y):
x = df['size'] current_directory = os.path.dirname(os.path.abspath(__file__))
y = df['duration'] logo_path = os.path.join(current_directory, logo_path)
title = "Procedure: {}".format(self.name) logo_img = plt.imread(logo_path)
imagebox = OffsetImage(logo_img, zoom=logo_size)
ab = AnnotationBbox(imagebox, (logo_x, logo_y), xycoords='axes fraction', frameon=False)
plt.gca().add_artist(ab)
# Bin Grenzen festlegen @staticmethod
def _prepare_bins(df):
n = df['size'].nunique() n = df['size'].nunique()
bins_x = np.zeros(n + 1, dtype=np.float64) bins_x = np.zeros(n + 1, dtype=np.float64)
bins_x[0] = 0.75 * (1 << 4) bins_x[0] = 0.75 * (1 << 4)
for i in range(1, n + 1): for i in range(1, n + 1):
bins_x[i] = bins_x[i - 1] * 2 bins_x[i] = bins_x[i - 1] * 2
bins_y = np.logspace(-6, 0, num=101) bins_y = np.logspace(-6, 0, num=101)
return bins_x, bins_y
# Achsen und Plot definieren @staticmethod
def plot_histogram_various(x, y, title, bins_x, bins_y):
f, ax = plt.subplots(figsize=(16, 9)) f, ax = plt.subplots(figsize=(16, 9))
sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y],) sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y])
# Setze die Skala der x-Achse auf logarithmisch
ax.set_xscale('log') ax.set_xscale('log')
ax.set_yscale('log') ax.set_yscale('log')
ax.set_xlabel("Data size [Bytes]")
ax.set_ylabel("Time/Event [s]")
plt.title(title)
plt.grid(True)
plt.xticks(rotation=45)
# Setze die Beschriftungen @staticmethod
def plot_histogram_long(x, y, title, bins_x, bins_y):
f, ax = plt.subplots(figsize=(16, 9))
sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y])
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel("Data size [Bytes]") ax.set_xlabel("Data size [Bytes]")
ax.set_ylabel("Time/Event [s]") ax.set_ylabel("Time/Event [s]")
plt.title(title) plt.title(title)
plt.grid(True) plt.grid(True)
plt.xticks(rotation=45) plt.xticks(rotation=45)
current_directory = os.path.dirname(os.path.abspath(__file__))
logo_path = os.path.join(current_directory, "desy_logo.png")
logo_img = plt.imread(logo_path)
logo_size = 0.15
logo_x = 1.065
logo_y = 1.04
imagebox = OffsetImage(logo_img, zoom=logo_size)
ab = AnnotationBbox(imagebox, (logo_x, logo_y), xycoords='axes fraction', frameon=False)
plt.gca().add_artist(ab)
# Speichern und Anzeigen def plot(self):
df = self.dataframe
# if self.name
x = df['size']
y = df['duration']
title = "Procedure: {}".format(self.name)
bins_x, bins_y = self._prepare_bins(df)
self.plot_histogram_various(x, y, title, bins_x, bins_y)
self._add_logo("desy_logo.png", logo_size=0.15, logo_x=1.065, logo_y=1.04)
# Save and View
plt.savefig(title + ".svg", format='svg') plt.savefig(title + ".svg", format='svg')
plt.show() plt.show()
......
...@@ -16,12 +16,12 @@ def main(): ...@@ -16,12 +16,12 @@ def main():
# Import Data # Import Data
creator = DataFrameCreator(data_path) creator = DataFrameCreator(data_path)
dataframes = creator.create_dataframes_data() dataframes = creator.create()
for frame in dataframes.keys(): for frame in dataframes.keys():
if not dataframes[frame].empty: if not dataframes[frame].empty:
# Setup Plotter # Setup Plotter
frame_name = frame frame_name = frame
print(frame_name)
plotter = SeabornPlotter(dataframes[frame], frame_name) plotter = SeabornPlotter(dataframes[frame], frame_name)
# Plotter run # Plotter run
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment