Skip to content
Snippets Groups Projects
Commit c939c90e authored by Michael Reuscher's avatar Michael Reuscher
Browse files

save commit

parent 7d0c68f5
No related branches found
No related tags found
1 merge request!4Python driver and data analysis part
import os import os
import pandas as pd import pandas as pd
import re
class DataFrameCreator: class DataFrameCreator:
...@@ -7,6 +8,7 @@ class DataFrameCreator: ...@@ -7,6 +8,7 @@ class DataFrameCreator:
self.folder_path = f_path self.folder_path = f_path
self.v_dfs = {} self.v_dfs = {}
self.l_dfs = {} self.l_dfs = {}
self.s_dfs = {}
@staticmethod @staticmethod
def _merge_dataframes(dfs): def _merge_dataframes(dfs):
...@@ -26,12 +28,29 @@ class DataFrameCreator: ...@@ -26,12 +28,29 @@ class DataFrameCreator:
dataframe = pd.read_csv(file_path, delimiter=" ", header=0) dataframe = pd.read_csv(file_path, delimiter=" ", header=0)
return dataframe, dataframe_type, dataframe_name return dataframe, dataframe_type, dataframe_name
@staticmethod
def _extract_statistics(data, filename):
result = {}
group = re.search(r"(.*):", data).group(1)
result["Group"] = group
values = re.findall(r"\d+\.\d+", data)
result["min"] = float(values[0])
result["mean"] = float(values[1])
result["10%"] = float(values[2])
result["med"] = float(values[3])
result["90%"] = float(values[4])
result["max"] = float(values[5])
result["speed"] = float(values[6])
return result
def read_various_data(self, root, filename): def read_various_data(self, root, filename):
dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename) dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename)
if dataframe_type in self.v_dfs: if dataframe_type in self.v_dfs:
inner_dict = self.v_dfs[dataframe_type] inner_dict = self.v_dfs[dataframe_type]
if dataframe_name in inner_dict: if dataframe_name in inner_dict:
# DataFrame hinzufügen # Add Dataframe
inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe], inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
ignore_index=True) ignore_index=True)
else: else:
...@@ -41,9 +60,38 @@ class DataFrameCreator: ...@@ -41,9 +60,38 @@ class DataFrameCreator:
dataframe_name: dataframe dataframe_name: dataframe
} }
def read_statistic_data(self, root, filename):
if filename.endswith('.dat'):
data_list = []
path = os.path.join(root, filename)
with open(path, "r") as file:
lines = file.readlines()
for line in lines:
data = line.strip()
if data.startswith("worker") or data.startswith("posix"):
data_dict = self._extract_statistics(data, filename)
data_list.append(data_dict)
dataframe = pd.DataFrame(data_list)
dataframe_type = filename
dataframe_name = filename.split('.')[0] # Verwende den Dateinamen ohne Erweiterung als DataFrame-Namen
if dataframe_type in self.s_dfs:
inner_dict = self.s_dfs[dataframe_type]
if dataframe_name in inner_dict:
# Add DataFrame
inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
ignore_index=True)
else:
inner_dict[dataframe_name] = dataframe
else:
self.s_dfs[dataframe_type] = {
dataframe_name: dataframe
}
def read_long_data(self, root, filename): def read_long_data(self, root, filename):
dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename) dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename)
# Berechnung der Zeitdifferenz in Sekunden ab dem ersten Wert # Calculation of time difference in seconds from the first value
if not dataframe.empty: if not dataframe.empty:
dataframe['start'] = pd.to_datetime(dataframe['start'], unit='s') dataframe['start'] = pd.to_datetime(dataframe['start'], unit='s')
start_time = dataframe['start'].iloc[0] start_time = dataframe['start'].iloc[0]
...@@ -52,7 +100,7 @@ class DataFrameCreator: ...@@ -52,7 +100,7 @@ class DataFrameCreator:
if dataframe_type in self.l_dfs: if dataframe_type in self.l_dfs:
inner_dict = self.l_dfs[dataframe_type] inner_dict = self.l_dfs[dataframe_type]
if dataframe_name in inner_dict: if dataframe_name in inner_dict:
# DataFrame hinzufügen # Add Dataframe
inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe], inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
ignore_index=True) ignore_index=True)
else: else:
...@@ -66,8 +114,7 @@ class DataFrameCreator: ...@@ -66,8 +114,7 @@ class DataFrameCreator:
for root, _, folder in os.walk(self.folder_path): for root, _, folder in os.walk(self.folder_path):
if os.path.basename(root) == "statistics": if os.path.basename(root) == "statistics":
for filename in folder: for filename in folder:
print("this was statistic" + filename) self.read_statistic_data(root, filename)
continue
elif os.path.basename(root) == "Data_Set_Long": elif os.path.basename(root) == "Data_Set_Long":
for filename in folder: for filename in folder:
self.read_long_data(root, filename) self.read_long_data(root, filename)
...@@ -77,4 +124,5 @@ class DataFrameCreator: ...@@ -77,4 +124,5 @@ class DataFrameCreator:
various_dfs = self._merge_dataframes(self.v_dfs) various_dfs = self._merge_dataframes(self.v_dfs)
long_dfs = self._merge_dataframes(self.l_dfs) long_dfs = self._merge_dataframes(self.l_dfs)
return various_dfs, long_dfs statistics_dfs = self._merge_dataframes(self.s_dfs)
return various_dfs, long_dfs, statistics_dfs
...@@ -66,3 +66,15 @@ class SeabornPlotter: ...@@ -66,3 +66,15 @@ class SeabornPlotter:
plt.xticks(rotation=45) plt.xticks(rotation=45)
plt.savefig(title + ".svg", format='svg') plt.savefig(title + ".svg", format='svg')
plt.show() plt.show()
def plot_statistics(self):
df = self.dataframe
for frame in df
plt.figure(figsize=(10, 6))
sns.boxplot(data=df[['min', 'mean', '10%', 'med', '90%', 'max']], palette='pastel')
plt.xlabel('Statistical Measures')
plt.ylabel('Time (Y-Axis)')
plt.title('Boxplot of Statistical Measures for Each Group')
plt.grid(True)
plt.show()
...@@ -16,9 +16,13 @@ def main(): ...@@ -16,9 +16,13 @@ def main():
# Import Data # Import Data
creator = DataFrameCreator(data_path) creator = DataFrameCreator(data_path)
v_dataframes, l_dataframes = creator.create() v_dataframes, l_dataframes, s_dataframes = creator.create()
# Plot # Plot Statistics
s_plotter = SeabornPlotter(s_dataframes, 'statistics')
s_plotter.plot_statistics()
# Plot Data
for frame in v_dataframes.keys(): for frame in v_dataframes.keys():
if not v_dataframes[frame].empty: if not v_dataframes[frame].empty:
# Setup Plotter # Setup Plotter
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment