Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
DetectorWriteSim
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Container registry
Model registry
Analyze
Value stream analytics
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
EWMSCP
DetectorWriteSim
Commits
a21584d8
Commit
a21584d8
authored
1 year ago
by
Michael Reuscher
Browse files
Options
Downloads
Patches
Plain Diff
statistics plot 1.2
parent
f7d3245f
No related branches found
No related tags found
1 merge request
!4
Python driver and data analysis part
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
visualizer/visualizer/DataFrameCreator.py
+15
-15
15 additions, 15 deletions
visualizer/visualizer/DataFrameCreator.py
visualizer/visualizer/SeabornPlotter.py
+24
-54
24 additions, 54 deletions
visualizer/visualizer/SeabornPlotter.py
with
39 additions
and
69 deletions
visualizer/visualizer/DataFrameCreator.py
+
15
−
15
View file @
a21584d8
...
...
@@ -37,8 +37,20 @@ class DataFrameCreator:
datasize
=
last_numbers
else
:
datasize
=
0
group
=
re
.
search
(
r
"
(.*):
"
,
data
).
group
(
1
)
result
[
"
Group
"
]
=
group
group_match
=
re
.
search
(
r
"
(.*):
"
,
data
)
if
group_match
:
group
=
group_match
.
group
(
1
)
thread_match
=
re
.
search
(
r
"
thread \d+
"
,
group
)
if
thread_match
:
thread
=
thread_match
.
group
()
result
[
"
thread
"
]
=
thread
group
=
group
.
replace
(
thread
,
""
).
strip
()
else
:
result
[
"
thread
"
]
=
""
result
[
"
Group
"
]
=
group
else
:
result
[
"
Group
"
]
=
""
result
[
"
thread
"
]
=
""
result
[
"
size
"
]
=
int
(
datasize
)
values
=
re
.
findall
(
r
"
\d+\.\d+
"
,
data
)
...
...
@@ -52,16 +64,6 @@ class DataFrameCreator:
return
result
@staticmethod
def
_extract_avg
(
data
):
result
=
{}
match
=
re
.
search
(
r
"
(\d+\.\d+)
"
,
data
)
if
match
:
result
[
"
avg
"
]
=
float
(
match
.
group
())
else
:
result
[
"
avg
"
]
=
0.0
return
result
def
read_various_data
(
self
,
root
,
filename
):
dataframe
,
dataframe_type
,
dataframe_name
=
self
.
_set_dataframe
(
root
,
filename
)
if
dataframe_type
in
self
.
v_dfs
:
...
...
@@ -88,9 +90,6 @@ class DataFrameCreator:
if
data
.
startswith
(
"
worker
"
)
or
data
.
startswith
(
"
posix
"
):
data_dict
=
self
.
_extract_statistics
(
data
,
filename
)
data_list
.
append
(
data_dict
)
elif
data
.
startswith
(
"
average
"
):
data_dict
=
self
.
_extract_avg
(
data
)
data_list
.
append
(
data_dict
)
dataframe
=
pd
.
DataFrame
(
data_list
)
dataframe_type
=
filename
...
...
@@ -147,3 +146,4 @@ class DataFrameCreator:
statistics_dfs
=
self
.
_merge_dataframes
(
self
.
s_dfs
)
return
various_dfs
,
long_dfs
,
statistics_dfs
This diff is collapsed.
Click to expand it.
visualizer/visualizer/SeabornPlotter.py
+
24
−
54
View file @
a21584d8
...
...
@@ -12,15 +12,6 @@ class SeabornPlotter:
self
.
dataframe
=
dataframe
self
.
name
=
name
@staticmethod
def
format_speed
(
speed_gibs
):
# Convert speed from GiB/s to MiB/s
speed_mibs
=
speed_gibs
*
1024
# Round the speed to two decimal places
speed_mibs_rounded
=
round
(
speed_mibs
,
2
)
return
speed_mibs_rounded
@staticmethod
def
_add_logo
(
logo_path
,
logo_size
,
logo_x
,
logo_y
):
current_directory
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
...
...
@@ -77,51 +68,30 @@ class SeabornPlotter:
plt
.
savefig
(
title
+
"
.svg
"
,
format
=
'
svg
'
)
plt
.
show
()
def
plot_avg_speed
(
self
,
speed_dict
):
data_list
=
list
(
speed_dict
.
items
())
df
=
pd
.
DataFrame
(
data_list
,
columns
=
[
"
Data_set
"
,
"
MiB/s
"
])
# Plot erstellen
plt
.
figure
(
figsize
=
(
12
,
6
))
sns
.
lineplot
(
x
=
df
[
"
Data_set
"
],
y
=
df
[
"
MiB/s
"
],
marker
=
'
o
'
)
plt
.
xticks
(
rotation
=
90
)
# Rotiere die x-Achsenbeschriftungen für bessere Lesbarkeit
self
.
_add_logo
(
"
desy_logo.png
"
,
logo_size
=
0.1
,
logo_x
=
1.065
,
logo_y
=
1.07
)
plt
.
xlabel
(
'
Data_Set
'
)
plt
.
yscale
(
'
log
'
)
plt
.
ylabel
(
'
MiB/s
'
)
plt
.
title
(
'
AVG Speed
'
)
plt
.
tight_layout
()
plt
.
show
()
def
plot_statistics
(
self
):
dataframes
=
self
.
dataframe
avg_speed_dict
=
{}
# Sort the dataframes based on the 'size' column in ascending order
sorted_dataframes
=
sorted
(
dataframes
.
items
(),
key
=
lambda
x
:
x
[
1
][
'
size
'
].
min
())
num_plots
=
len
(
sorted_dataframes
)
# Number of DataFrames in the dictionary
rows
=
6
# Number of rows in the grid
cols
=
(
num_plots
+
1
)
//
rows
# Number of columns in the grid
# Subplots creation
fig
,
axes
=
plt
.
subplots
(
rows
,
cols
,
figsize
=
(
180
,
120
),
constrained_layout
=
True
)
fig
.
suptitle
(
'
Statistical Measures for Each Data_Set
'
,
fontsize
=
14
)
# For each DataFrame, create a boxplot and place it into the corresponding subplot
for
idx
,
(
df_key
,
df
)
in
enumerate
(
sorted_dataframes
):
sns
.
boxplot
(
data
=
df
,
order
=
[
'
min
'
,
'
max
'
,
'
mean
'
,
'
med
'
,
'
10%
'
,
'
90%
'
],
ax
=
axes
[
idx
//
cols
,
idx
%
cols
],
palette
=
'
pastel
'
)
axes
[
idx
//
cols
,
idx
%
cols
].
set_yscale
(
'
log
'
)
axes
[
idx
//
cols
,
idx
%
cols
].
set_ylabel
(
'
Time (Y-Axis)
'
)
axes
[
idx
//
cols
,
idx
%
cols
].
set_title
(
f
'
Group
{
df_key
}
'
,
fontsize
=
12
)
axes
[
idx
//
cols
,
idx
%
cols
].
grid
(
True
)
first_valid_index
=
df
[
'
avg
'
].
first_valid_index
()
speed_value
=
df
.
loc
[
first_valid_index
,
'
avg
'
]
avg_speed_dict
[
df_key
]
=
self
.
format_speed
(
speed_value
)
# Hide empty subplots, if any
for
idx
in
range
(
num_plots
,
rows
*
cols
):
fig
.
delaxes
(
axes
.
flatten
()[
idx
])
plt
.
show
()
self
.
plot_avg_speed
(
avg_speed_dict
)
for
df_key
,
df
in
dataframes
.
items
():
operations
=
df
[
'
Group
'
].
unique
()
num_plots
=
len
(
operations
)
rows
=
2
# Number of rows in the grid
cols
=
(
num_plots
+
1
)
//
rows
# Number of columns in the grid
fig
,
axes
=
plt
.
subplots
(
rows
,
cols
,
figsize
=
(
15
,
8
),
constrained_layout
=
True
)
fig
.
suptitle
(
f
'
Measures for
{
df_key
}
'
,
fontsize
=
14
)
global_min_speed
=
min
(
df
[
'
speed
'
].
min
()
for
df
in
dataframes
.
values
())
+
1e-5
global_max_speed
=
max
(
df
[
'
speed
'
].
max
()
for
df
in
dataframes
.
values
())
# Create a grid of boxplots for each operation
for
idx
,
operation
in
enumerate
(
operations
):
operation_df
=
df
[
df
[
'
Group
'
]
==
operation
]
ax
=
axes
[
idx
//
cols
,
idx
%
cols
]
sns
.
barplot
(
data
=
operation_df
,
errorbar
=
None
,
order
=
[
'
min
'
,
'
max
'
,
'
mean
'
,
'
med
'
,
'
10%
'
,
'
90%
'
],
ax
=
ax
,
palette
=
'
pastel
'
)
ax
.
set_yscale
(
'
log
'
)
ax
.
set_ylabel
(
'
Time (Y-Axis)
'
)
ax
.
set_title
(
f
'
Operation:
{
operation
}
'
,
fontsize
=
12
)
ax
.
grid
(
True
)
ax
.
set_ylim
(
bottom
=
global_min_speed
,
top
=
global_max_speed
)
# Set the Y-axis limits
plt
.
savefig
(
df_key
+
"
.svg
"
,
format
=
'
svg
'
)
plt
.
show
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment