Commit cd8e0457 authored by Lukas's avatar Lukas
Browse files

fixed jupyter notebook

parent 245d64f1
%% Cell type:code id: tags:
``` python
# BGNet (Background prediction with neural nets for Belle II at SuperKEKB)
# Author: The BGNet developers
#
# See git log for contributors and copyright holders.
# This file is licensed under MIT licence, see LICENSE.md.
# Luminosity: Jupyter notebook for Belle II luminosity studies
#
# This notebooks trains a model that predicts the luminosity and computes feature attributions
# that give insight into the reasons for changes of the beam life
#
# This notebook is part of bg_net project: https://gitlab.desy.de/benjamin.schwenker/bg_net.git
# Access to data can be provided by author on reasonable request.
#
# This notebook uses the path_explain project (https://github.com/suinleelab/path_explain) to compute feature
# attributions and plot them.
```
%% Cell type:markdown id: tags:
# Imports
%% Cell type:code id: tags:
``` python
import yaml
import os
from path_explain import PathExplainerTF, summary_plot
from sklearn.model_selection import train_test_split
import bg_net.lumi as lumi
import bg_net.bgnet as bgnet
import bg_net.run_training as run_training
import bg_net.report_utils
from bg_net.archiver.indexer_utils import str_to_jst_datetime
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import shutil
from pathlib import Path
from bg_net.archiver.skb_mr_optic_pvs import SKB2_MR_OPTICS_HER_PVS as SKB2_MR_OPTICS_HER_PVS
from bg_net.archiver.skb_mr_optic_pvs import SKB2_MR_OPTICS_LER_PVS as SKB2_MR_OPTICS_LER_PVS
```
%% Cell type:markdown id: tags:
# Loading the Data
%% Cell type:code id: tags:
``` python
# Set a single observable for optimization
obs = lumi.OBS
obs_unit = lumi.OBS_UNIT
# Set data path
data_path = os.path.expandvars("${B2_ARCHIVER}")
# Time interval to look at
start = str_to_jst_datetime("{:04d}-{:02d}-{:02d} 00:00:00".format(2022, 3, 1))
end = str_to_jst_datetime("{:04d}-{:02d}-{:02d} 23:59:59".format(2022, 3, 31))
# Get a default config file
with open('configs/training_lumi.yaml') as f:
config = yaml.load(f, Loader=yaml.FullLoader)
# Make format conversions (str-> timestamps)
config = lumi.convert_config(config)
# Setup config for training
config = run_training.create_train_config(config, obs=obs, data_path=data_path, times=[start,end], local_dir='./lumi_results')
# Add optic PVs to config
optic_pvs= [PV.replace(':','_') for PV in SKB2_MR_OPTICS_HER_PVS] + [PV.replace(':','_') for PV in SKB2_MR_OPTICS_LER_PVS]
config['preparation']['lumi_pvs'] = config['preparation']['lumi_pvs'] + optic_pvs
```
%% Cell type:code id: tags:
``` python
# Load the data
x, y, index = lumi.load_data(config, start_time=start, end_time=end)
```
%% Cell type:code id: tags:
``` python
import importlib
importlib.reload(lumi)
```
%% Cell type:code id: tags:
``` python
x = lumi.remove_bad_features(x, index, config, optic_pvs)
```
%% Cell type:code id: tags:
``` python
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
```
%% Cell type:code id: tags:
``` python
scaler_x, scaler_y = lumi.train_scalers_on_preprocessed_data(x_train, y_train)
```
%% Cell type:code id: tags:
``` python
config['scalers']=(scaler_x, scaler_y)
```
%% Cell type:code id: tags:
``` python
x_train_scaled = scaler_x.transform(x_train)
y_train_scaled = scaler_y.transform(y_train)
x_test_scaled = scaler_x.transform(x_test)
y_test_scaled = scaler_y.transform(y_test)
x_scaled = scaler_x.transform(x)
y_scaled = scaler_y.transform(y)
```
%% Cell type:markdown id: tags:
# Create the Model
%% Cell type:code id: tags:
``` python
model = lumi.get_model(config)
```
%% Cell type:markdown id: tags:
# Training the Model
%% Cell type:code id: tags:
``` python
callbacks = bgnet.setup_default_callbacks(config)
# Run training procedure
model = lumi.run_training(
model,
config=config,
callbacks=callbacks,
x=x_train_scaled,
y=y_train_scaled,
verbose=2
)
```
%% Cell type:markdown id: tags:
# Validate the training
%% Cell type:code id: tags:
``` python
train_loss, train_mse, train_mae = model.evaluate(x_train_scaled, y_train_scaled, batch_size=64, verbose=0)
test_loss, test_mse, test_mae = model.evaluate(x_test_scaled, y_test_scaled, batch_size=64, verbose=0)
```
%% Cell type:code id: tags:
``` python
print('Train MSE: {:.4f}\tTrain MAE: {:.4f}'.format(train_mse, train_mae))
print('Test MSE: {:.4f}\tTest MAE: {:.4f}'.format(test_mse, test_mae))
```
%% Cell type:code id: tags:
``` python
y_test_pred_scaled = model.predict(x_test_scaled)
y_test_pred = scaler_y.inverse_transform(y_test_pred_scaled)
```
%% Cell type:code id: tags:
``` python
import seaborn as sns
ax = sns.regplot(x=y_test_pred, y=y_test)
ax.set_xlabel("Predicted {} on test data ".format(obs), fontsize = 10)
ax.set_ylabel("Measured {} on test data ".format(obs), fontsize = 10)
```
%% Cell type:markdown id: tags:
# Predict and plot an all data
%% Cell type:code id: tags:
``` python
y_pred_scaled = model.predict(x_scaled)
y_pred = scaler_y.inverse_transform(y_pred_scaled)
```
%% Cell type:code id: tags:
``` python
fig = go.Figure()
fig.add_trace(go.Scatter(x=index, y=y[:, 0], name='Observed', mode='markers'))
fig.add_trace(go.Scatter(x=index, y=y_pred[:, 0], name='Predicted', mode='markers'))
fig.show()
```
%% Cell type:markdown id: tags:
# Interpreting the Model
%% Cell type:markdown id: tags:
Path explain API requires a test and a reference dataset. It then calculates an attribution value for each
input at each point in the test dataset. These attribution values reflect how important that input is for the
difference between that point and the reference dataset.
%% Cell type:code id: tags:
``` python
# Define reference and test times for path explain. Each dataset reqires at least one timeframe, more are possible.
reference_data_times = (
['2022-03-06 22:00:00+0900', '2022-03-07 22:00:00+0900'],
)
test_data_times = (
['2022-03-07 23:00:00+0900', '2022-03-08 23:00:00+0900'],
)
```
%% Cell type:code id: tags:
``` python
reference_dataset, test_dataset = lumi.get_pathexplain_datasets(
x,
index,
reference_data_times,
test_data_times
)
reference_dataset_scaled = scaler_x.transform(reference_dataset)
test_dataset_scaled = scaler_x.transform(test_dataset)
```
%% Cell type:code id: tags:
``` python
reference_dataset, test_dataset = train_test_split(x, test_size=0.50, random_state=42)
reference_dataset_scaled = scaler_x.transform(reference_dataset)
test_dataset_scaled = scaler_x.transform(test_dataset)
```
%% Cell type:code id: tags:
``` python
feature_names = lumi.get_feature_names(config, obs=obs)
```
%% Cell type:markdown id: tags:
The next step is to define our explainer object.
%% Cell type:code id: tags:
``` python
explainer = PathExplainerTF(model)
```
%% Cell type:markdown id: tags:
Now we actually have to run the attribution function. Here we used expected gradients because the argument use_expectation is equal to true and the baseline is set to be the reference data. However, we could set use_expectation and the baseline to be the zeros vector to use integrated gradients instead.
%% Cell type:code id: tags:
``` python
attributions = explainer.attributions(inputs=test_dataset_scaled.astype('float32'),
baseline=reference_dataset_scaled.astype('float32'),
batch_size=100,
num_samples=200,
use_expectation=True,
output_indices=0,
verbose=True)
# To have attributions in unit of y
attributions = scaler_y.inverse_transform(attributions)
```
%% Cell type:code id: tags:
``` python
plot_top_k = 20
if len(feature_names) < plot_top_k:
plot_top_k = len(feature_names)
summary_plot(attributions,
test_dataset,
feature_names=feature_names,
plot_top_k=plot_top_k,
figsize=[8,4]
)
#plt.savefig("./lumi_plots/random_attribution_no_emitt.pdf", bbox_inches='tight')
```
%% Cell type:markdown id: tags:
# Reporting
%% Cell type:code id: tags:
``` python
shutil.rmtree("./lumi_plots", ignore_errors=True)
Path("./lumi_plots").mkdir(parents=True, exist_ok=True)
bg_net.report_utils.make_default_plots(
obs=config['obs'],
index=index,
x=x,
y=y,
attributions=attributions,
feature_names=config['preparation']['lumi_pvs'],
reference_data_times=reference_data_times,
test_data_times=test_data_times,
save_path="./lumi_plots",
y_pred=y_pred,
test_dataset=test_dataset,
obs_unit=obs_unit,
show=True,
look_back=31,
look_back=7,
freq=config['preparation']['freq'],
)
```
%% Cell type:code id: tags:
``` python
```
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment