From 7d0c68f55134dbcfb98564548e4fff695ac51227 Mon Sep 17 00:00:00 2001
From: Michael Reuscher <michael.reuscher@desy.de>
Date: Tue, 25 Jul 2023 10:32:09 +0200
Subject: [PATCH] create dataframes refactored

---
 visualizer/visualizer/DataFrameCreator.py | 99 +++++++++++------------
 visualizer/visualizer/SeabornPlotter.py   | 14 ----
 visualizer/visualizer/main.py             |  4 +-
 3 files changed, 50 insertions(+), 67 deletions(-)

diff --git a/visualizer/visualizer/DataFrameCreator.py b/visualizer/visualizer/DataFrameCreator.py
index 2975f7c..5815510 100644
--- a/visualizer/visualizer/DataFrameCreator.py
+++ b/visualizer/visualizer/DataFrameCreator.py
@@ -17,67 +17,64 @@ class DataFrameCreator:
             merged_dfs[outer_key] = merged_dataframe_sorted
         return merged_dfs
 
-    def read_various_data(self, root, folder):
-        for filename in folder:
-            if filename.endswith('.dat'):
-                file_path = os.path.join(root, filename)
-                dataframe_name = os.path.splitext(filename)[0]
-                dataframe_type = dataframe_name.split('-')[1]
-                dataframe = pd.read_csv(file_path, delimiter=" ", header=0)
+    @staticmethod
+    def _set_dataframe(root, filename):
+        if filename.endswith('.dat'):
+            file_path = os.path.join(root, filename)
+            dataframe_name = os.path.splitext(filename)[0]
+            dataframe_type = dataframe_name.split('-')[1]
+            dataframe = pd.read_csv(file_path, delimiter=" ", header=0)
+            return dataframe, dataframe_type, dataframe_name
 
-                if dataframe_type in self.v_dfs:
-                    inner_dict = self.v_dfs[dataframe_type]
-                    if dataframe_name in inner_dict:
-                        # DataFrame hinzufügen
-                        inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
-                                                               ignore_index=True)
-                    else:
-                        inner_dict[dataframe_name] = dataframe
-                else:
-                    self.v_dfs[dataframe_type] = {
-                        dataframe_name: dataframe
-                    }
+    def read_various_data(self, root, filename):
+        dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename)
+        if dataframe_type in self.v_dfs:
+            inner_dict = self.v_dfs[dataframe_type]
+            if dataframe_name in inner_dict:
+                # DataFrame hinzufügen
+                inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
+                                                       ignore_index=True)
+            else:
+                inner_dict[dataframe_name] = dataframe
+        else:
+            self.v_dfs[dataframe_type] = {
+                dataframe_name: dataframe
+            }
 
-    def read_long_data(self, root, folder):
-        for filename in folder:
-            if filename.endswith('.dat'):
-                file_path = os.path.join(root, filename)
-                dataframe_name = os.path.splitext(filename)[0]
-                dataframe_type = dataframe_name.split('-')[1]
-                dataframe = pd.read_csv(file_path, delimiter=" ", header=0)
-                # Berechnung der Zeitdifferenz in Sekunden ab dem ersten Wert
-                if not dataframe.empty:
-                    dataframe['start'] = pd.to_datetime(dataframe['start'], unit='s')
-                    start_time = dataframe['start'].iloc[0]
-                    dataframe['seconds_since_start'] = (dataframe['start'] - start_time).dt.total_seconds()
+    def read_long_data(self, root, filename):
+        dataframe, dataframe_type, dataframe_name = self._set_dataframe(root, filename)
+        # Berechnung der Zeitdifferenz in Sekunden ab dem ersten Wert
+        if not dataframe.empty:
+            dataframe['start'] = pd.to_datetime(dataframe['start'], unit='s')
+            start_time = dataframe['start'].iloc[0]
+            dataframe['seconds_since_start'] = (dataframe['start'] - start_time).dt.total_seconds()
 
-                if dataframe_type in self.l_dfs:
-                    inner_dict = self.l_dfs[dataframe_type]
-                    if dataframe_name in inner_dict:
-                        # DataFrame hinzufügen
-                        inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
-                                                               ignore_index=True)
-                    else:
-                        inner_dict[dataframe_name] = dataframe
-                else:
-                    self.l_dfs[dataframe_type] = {
-                        dataframe_name: dataframe
-                    }
+        if dataframe_type in self.l_dfs:
+            inner_dict = self.l_dfs[dataframe_type]
+            if dataframe_name in inner_dict:
+                # DataFrame hinzufügen
+                inner_dict[dataframe_name] = pd.concat([inner_dict[dataframe_name], dataframe],
+                                                       ignore_index=True)
+            else:
+                inner_dict[dataframe_name] = dataframe
+        else:
+            self.l_dfs[dataframe_type] = {
+                dataframe_name: dataframe
+            }
 
     def create(self):
-        dfs = {}
         for root, _, folder in os.walk(self.folder_path):
             if os.path.basename(root) == "statistics":
-                print("this was statistic")
-                continue
+                for filename in folder:
+                    print("this was statistic" + filename)
+                    continue
             elif os.path.basename(root) == "Data_Set_Long":
-                self.read_long_data(root, folder)
+                for filename in folder:
+                    self.read_long_data(root, filename)
             else:
-                self.read_various_data(root, folder)
+                for filename in folder:
+                    self.read_various_data(root, filename)
 
         various_dfs = self._merge_dataframes(self.v_dfs)
         long_dfs = self._merge_dataframes(self.l_dfs)
         return various_dfs, long_dfs
-
-
-
diff --git a/visualizer/visualizer/SeabornPlotter.py b/visualizer/visualizer/SeabornPlotter.py
index 740dfc0..940c0a0 100644
--- a/visualizer/visualizer/SeabornPlotter.py
+++ b/visualizer/visualizer/SeabornPlotter.py
@@ -3,8 +3,6 @@ import matplotlib.pyplot as plt
 import seaborn as sns
 from matplotlib.offsetbox import OffsetImage, AnnotationBbox
 import os
-import pandas as pd
-
 sns.set_theme()
 
 
@@ -68,15 +66,3 @@ class SeabornPlotter:
         plt.xticks(rotation=45)
         plt.savefig(title + ".svg", format='svg')
         plt.show()
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/visualizer/visualizer/main.py b/visualizer/visualizer/main.py
index 2647010..489cc2a 100644
--- a/visualizer/visualizer/main.py
+++ b/visualizer/visualizer/main.py
@@ -17,12 +17,13 @@ def main():
     # Import Data
     creator = DataFrameCreator(data_path)
     v_dataframes, l_dataframes = creator.create()
+
+    # Plot
     for frame in v_dataframes.keys():
         if not v_dataframes[frame].empty:
             # Setup Plotter
             frame_name = frame
             v_plotter = SeabornPlotter(v_dataframes[frame], frame_name)
-
             # Plotter run
             v_plotter.plot_histogram_various()
 
@@ -31,7 +32,6 @@ def main():
             # Setup Plotter
             frame_name = frame
             l_plotter = SeabornPlotter(l_dataframes[frame], frame_name)
-
             # Plotter run
             l_plotter.plot_histogram_long()
 
-- 
GitLab