Skip to content
Snippets Groups Projects
Commit 4c86e6aa authored by Michael Reuscher's avatar Michael Reuscher
Browse files

bullshit in DataFrameCreator.py but i have to save

parent 73ea73aa
No related branches found
No related tags found
1 merge request!4Python driver and data analysis part
......@@ -139,4 +139,3 @@ event_list = create_event_list()
# Run detectorSimulation
run_sim(event_list, data_path, simulation_path, stat_path)
......@@ -6,39 +6,73 @@ class DataFrameCreator:
def __init__(self, f_path):
self.folder_path = f_path
def create_dataframes_data(self):
dfs = {}
for root, _, folder in os.walk(self.folder_path):
if os.path.basename(root) == "statistics":
print("this was statistic")
continue
for filename in folder:
if filename.endswith('.dat'):
file_path = os.path.join(root, filename)
dataframe_name = os.path.splitext(filename)[0]
dataframe_type = dataframe_name.split('-')[1]
dataframe = pd.read_csv(file_path, delimiter=" ", header=0)
if dataframe_type in dfs:
inner_dict = dfs[dataframe_type]
if dataframe_name in inner_dict:
# DataFrame hinzufügen
inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
ignore_index=True)
else:
inner_dict[dataframe_name] = dataframe
else:
dfs[dataframe_type] = {
dataframe_name: dataframe
}
@staticmethod
def _merge_dataframes(dfs):
merged_dfs = {}
for outer_key in dfs:
merged_dataframe = pd.concat(dfs[outer_key].values(), keys=dfs[outer_key].keys())
merged_dataframe_sorted = merged_dataframe.sort_index()
merged_dfs[outer_key] = merged_dataframe_sorted
return merged_dfs
def read_data(self, root, _, folder):
dataframe, dataframe_type, dataframe_name = "", "", ""
for filename in folder:
if filename.endswith('.dat'):
file_path = os.path.join(root, filename)
dataframe_name = os.path.splitext(filename)[0]
dataframe_type = dataframe_name.split('-')[1]
dataframe = pd.read_csv(file_path, delimiter=" ", header=0)
return dataframe, dataframe_type, dataframe_name
def create_dataframe_long(self, root, _, folder):
dfs = {}
for filename in folder:
if filename.endswith('.dat'):
dataframe, dataframe_type, dataframe_name = self.read_data(root, _, folder)
if dataframe_type in dfs:
inner_dict = dfs[dataframe_type]
if dataframe_name in inner_dict:
inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
ignore_index=True)
else:
inner_dict[dataframe_name] = dataframe
else:
dfs[dataframe_type] = {
dataframe_name: dataframe
}
print(dfs)
return dfs
def create_dataframe_statistics(self, root, _, folder):
print("statistics" + root + folder)
def create_dataframes_various(self, root, _, folder):
dfs = {}
dataframe, dataframe_type, dataframe_name = self.read_data(root, _, folder)
if dataframe_type in dfs:
inner_dict = dfs[dataframe_type]
if dataframe_name in inner_dict:
inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe], ignore_index=True)
else:
inner_dict[dataframe_name] = dataframe
else:
dfs[dataframe_type] = {
dataframe_name: dataframe
}
merged_dfs = self._merge_dataframes(dfs)
return merged_dfs
def create(self):
various_frames = {}
for root, _, folder in os.walk(self.folder_path):
if os.path.basename(root) == "statistics":
statistic_frames = self.create_dataframe_statistics(root, _, folder)
elif os.path.basename(root) == "Data_Set_Long":
long_frames = self.create_dataframe_long(root, _, folder)
else:
self.create_dataframes_various(root, _, folder)
return various_frames
......@@ -14,45 +14,63 @@ class SeabornPlotter:
self.name = name
self.current_plot = None
def plot(self):
df = self.dataframe
x = df['size']
y = df['duration']
title = "Procedure: {}".format(self.name)
@staticmethod
def _add_logo(logo_path, logo_size, logo_x, logo_y):
current_directory = os.path.dirname(os.path.abspath(__file__))
logo_path = os.path.join(current_directory, logo_path)
logo_img = plt.imread(logo_path)
imagebox = OffsetImage(logo_img, zoom=logo_size)
ab = AnnotationBbox(imagebox, (logo_x, logo_y), xycoords='axes fraction', frameon=False)
plt.gca().add_artist(ab)
# Bin Grenzen festlegen
@staticmethod
def _prepare_bins(df):
n = df['size'].nunique()
bins_x = np.zeros(n + 1, dtype=np.float64)
bins_x[0] = 0.75 * (1 << 4)
for i in range(1, n + 1):
bins_x[i] = bins_x[i - 1] * 2
bins_y = np.logspace(-6, 0, num=101)
return bins_x, bins_y
# Achsen und Plot definieren
@staticmethod
def plot_histogram_various(x, y, title, bins_x, bins_y):
f, ax = plt.subplots(figsize=(16, 9))
sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y],)
# Setze die Skala der x-Achse auf logarithmisch
sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y])
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel("Data size [Bytes]")
ax.set_ylabel("Time/Event [s]")
plt.title(title)
plt.grid(True)
plt.xticks(rotation=45)
# Setze die Beschriftungen
@staticmethod
def plot_histogram_long(x, y, title, bins_x, bins_y):
f, ax = plt.subplots(figsize=(16, 9))
sns.histplot(x=x, y=y, color="blue", bins=[bins_x, bins_y])
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel("Data size [Bytes]")
ax.set_ylabel("Time/Event [s]")
plt.title(title)
plt.grid(True)
plt.xticks(rotation=45)
current_directory = os.path.dirname(os.path.abspath(__file__))
logo_path = os.path.join(current_directory, "desy_logo.png")
logo_img = plt.imread(logo_path)
logo_size = 0.15
logo_x = 1.065
logo_y = 1.04
imagebox = OffsetImage(logo_img, zoom=logo_size)
ab = AnnotationBbox(imagebox, (logo_x, logo_y), xycoords='axes fraction', frameon=False)
plt.gca().add_artist(ab)
# Speichern und Anzeigen
def plot(self):
df = self.dataframe
# if self.name
x = df['size']
y = df['duration']
title = "Procedure: {}".format(self.name)
bins_x, bins_y = self._prepare_bins(df)
self.plot_histogram_various(x, y, title, bins_x, bins_y)
self._add_logo("desy_logo.png", logo_size=0.15, logo_x=1.065, logo_y=1.04)
# Save and View
plt.savefig(title + ".svg", format='svg')
plt.show()
......
......@@ -16,12 +16,12 @@ def main():
# Import Data
creator = DataFrameCreator(data_path)
dataframes = creator.create_dataframes_data()
dataframes = creator.create()
for frame in dataframes.keys():
if not dataframes[frame].empty:
# Setup Plotter
frame_name = frame
print(frame_name)
plotter = SeabornPlotter(dataframes[frame], frame_name)
# Plotter run
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment