goReporter.py 42.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# BGNet (Background prediction with neural nets for Belle II at SuperKEKB)
# Author: The BGNet developers
#
# See git log for contributors and copyright holders.
# This file is licensed under MIT licence, see LICENSE.md.


"""
goReporter.py
=============

Script to create reports on background analysis using BGNet.


The script generates one report for each background observable that can be predicted with a
neural net. The report will be compiled as a pdf by creating a number of plots and inserting
them into pre defined report template.

22
23
Currently, the following observables are supported: Injection duration (HER/LER), Beam live (HER/LER),
Hitrates (CDC, VXD_RAD detectors) and specific ECL luminosity.
24
25
26

Usage:
------
27
28
29

For the regular daily report:

30
>>> python3 goReporter.py --year 2022 --month 3 --day 1 --archive path/to/archive
31

32
33
There is also an extended command line interface offering to supply time intervals
for feature attribution for special reports.
34
35
36
37
38
39
40
41
42
43
44

>>> python3 goReporter.py --year 2022 --month 3 --day 23 --look_back 4 --archive path/to/archive   \
                        --train_start "2022-03-01 00:00:00"    \
                        --train_end "2022-03-24 12:00:00"      \
                        --ref_start "2022-03-23 14:50:00"    \
                        --ref_end "2022-03-23 21:35:00"      \
                        --test_start "2022-03-23 23:40:00"   \
                        --test_end  "2022-03-24 09:00:00"    \
                        --tag "LER beam abort report"


45
46
"""

47
__version__ = "0.4.1"
48

49
50
from bg_net.archiver.indexer_utils import str_to_jst_datetime
import bg_net.bgnet as bgnet
51
import bg_net.run_training
52
import bg_net.lumi as lumi
53
import bg_net.injection_duration as injection_duration
54
import bg_net.beam_life as beam_life
55
import bg_net.hitrate as hitrate
56
import bg_net.report_utils
Benjamin Schwenker's avatar
Benjamin Schwenker committed
57
import bg_net.report_to_elog
58
from bg_net.report_to_webserver import ReportsToWebServer
59
import bg_net.archiver.indexer_utils as indexer_utils
60
from bg_net.archiver.skb_naming import get_unit
61
import bg_net.archiver.connect_vpn as connect_vpn
62

63
import math
64
65
import os
import yaml
66
import sys
67
68
import subprocess
import logging
69
import traceback
70
71
72
73
74
75
from sklearn.model_selection import train_test_split
from path_explain import PathExplainerTF
import shutil
import tempfile
from pathlib import Path
from argparse import ArgumentParser
76
77
from datetime import datetime, timedelta
from calendar import monthrange
78

79
80
81
82
83
from bg_net.report_utils import hitrate_prediction_plot, hitrate_two_axis_plot, complete_index
import matplotlib.pyplot as plt
from path_explain import summary_plot
import numpy as np
import pandas as pd
84

85
86
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')

87

88
89
90
91
def parse_args():
    """Parse command line arguments."""
    parser = ArgumentParser(description='Create reports of background analysis')
    add_arg = parser.add_argument
92
93
94
    add_arg('-y', '--year', type=int, default=2022, help='Calendar year as int')
    add_arg('-m', '--month', type=int, default=3, help='Month in year as int 1-12')
    add_arg('-d', '--day', type=int, default=23, help='Day in month as int')
95
    add_arg('-l', '--look_back', type=int, default=4, help='Plot history during last N days')
96
    add_arg('--train_back', type=int, default=5, help='Train model on data from last N days')
97
    add_arg('-a', '--archive', type=str, help='Path to archive, the data source')
98
99
100
101
102
103
104
105
    add_arg('-t', '--tag', type=str, default='Daily report', help='Type of report')
    add_arg('-e', '--exclude_last', type=int, default=0, help='Exclude last N days starting from report day')
    add_arg('--train_start', default=None, type=str, help='Start time for train times')
    add_arg('--train_end', default=None, type=str, help='End time for train times')
    add_arg('--ref_start', default=None, type=str, help='Start time for reference times')
    add_arg('--ref_end', default=None, type=str, help='End time for reference times')
    add_arg('--test_start', default=None, type=str, help='Start time for test times')
    add_arg('--test_end', default=None, type=str, help='End time for test times')
Benjamin Schwenker's avatar
Benjamin Schwenker committed
106
    add_arg('--elog', action='store_true', help='Upload report to elog')
107
    add_arg('--rocketchat', action='store_true', help='Post rocketchat message')
108
    add_arg('--elog_cred', default=None, type=str, help='Path to elog credentials file')
109
    add_arg('--skbsrv_cred', default=None, type=str, help='Path to skbsrv credentials file')
110
    add_arg('--b2rc_cred', default=None, type=str, help='Path to b2rc credentials file')
111
    add_arg('--report_path', default='./report_archive', type=str, help='Path for ouput reports')
112
    add_arg('--latest_day', action='store_true', help='Make report for latest (most recent) day in archive')
113
    add_arg('--config_path', type=str, default='./configs', help='Path to directory with model configs')
114
    add_arg('--shift', type=str, default='24h', help='Mode for daily reporting, defined values: 8h, 16h, 24h')
115
116
    add_arg('--vpn_cmd', type=str, default='/opt/cisco/anyconnect/bin/vpn', help="Shell command to create VPN connection to KEK")
    add_arg('--vpn_cred', type=str, default='', help="Path to user credentials for VPN")
117
118
    return parser.parse_args()

Benjamin Schwenker's avatar
Benjamin Schwenker committed
119

120
121
122
123
124
def get_report_day(year, month, day):
    """Set time window for leading data in the config dictionary"""
    report_day = str_to_jst_datetime("{:04d}-{:02d}-{:02d} 00:00:00".format(year, month, day))
    return report_day

Benjamin Schwenker's avatar
Benjamin Schwenker committed
125

126
127
128
129
130
131
132
def get_jobname(tag, channel, report_day, shift):
    if "Daily report" in tag:
        jobname = 'daily_report_{}_{}_{}_{}_{}'.format(shift, channel, report_day.year, report_day.month, report_day.day)
    else:
        jobname = '{}_{}_{}_{}_{}'.format(tag.replace(" ", "_"), channel, report_day.year, report_day.month, report_day.day)

    return jobname
133

Benjamin Schwenker's avatar
Benjamin Schwenker committed
134

135
def get_data_folder(archive):
136
    """Return output folder (directory) name for data"""
137
    return os.path.expandvars(archive)
138
139


140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def get_plot_times(ref_times, test_times, train_times):
    """Returns time interval for loading data for making prediction and attribution plots"""
    import copy
    plot_times = list(copy.deepcopy(train_times))

    for interval in ref_times:
        plot_times[0] = min(interval[0], plot_times[0])
        plot_times[1] = max(interval[1], plot_times[1])

    for interval in test_times:
        plot_times[0] = min(interval[0], plot_times[0])
        plot_times[1] = max(interval[1], plot_times[1])

    return tuple(plot_times)

Benjamin Schwenker's avatar
Benjamin Schwenker committed
155

156
157
158
159
160
def get_config(config_path):
    with open(config_path) as f:
        config = yaml.load(f,  Loader=yaml.FullLoader)
    return config

161

162
163
164
def get_latexed_feature(feature_name):
    """Return latex compatible string representation of feature name"""
    return feature_name.replace("_", "\\_")
Benjamin Schwenker's avatar
Benjamin Schwenker committed
165

166

167
168
169
def add_table_header(title):
    """Returns latex code string for header of table slide"""
    ret = "\\begin{frame}{MyTitle}\n"
170
171
172
173
    ret += "    \\begin{tiny}\n"
    ret += "    \\begin{table}[]\n"
    ret += "    \\centering\n"
    ret += "    \\begin{tabular}{|c|c|c|}\n"
174
    ret = ret.replace('MyTitle', get_latexed_feature(title))
175
176
177
    return ret


178
179
def add_table_footer(title):
    """Returns latex code string for footer of table slide"""
180
    ret = "    \\end{tabular}\n"
Benjamin Schwenker's avatar
Benjamin Schwenker committed
181
182
183
184
    ret += "    \\caption{MyTitle}\n"
    ret += "    \\end{table}\n"
    ret += "    \\end{tiny}\n"
    ret += "\\end{frame}\n\n"
185
    ret = ret.replace('MyTitle', get_latexed_feature(title))
186
187
188
189
    return ret


def add_table_content(feature_names, ncols=3):
190
    """Returns latex code string for content of table slide"""
191
192
193
194
195
196
    latexed_features = [name.replace("_", "\\_") for name in feature_names]
    list_of_rows = ["&".join(latexed_features[i:i+ncols])+r"\\" for i in range(0, len(latexed_features), ncols)]
    table_text = "\n".join(list_of_rows)
    return table_text + "\n"


197
198
def add_table(feature_names, title):
    """Returns latex code string for table slide"""
Benjamin Schwenker's avatar
Benjamin Schwenker committed
199
    return add_table_header(title) + add_table_content(feature_names) + add_table_footer(title)
200
201


202
203
def compile_latex_inputs(features_dict):
    """Returns latex code string for compiling tables with input variable names"""
204

Benjamin Schwenker's avatar
Benjamin Schwenker committed
205
    # Each table has up to 90 feature names
206
207
    nsplit = 90

Benjamin Schwenker's avatar
Benjamin Schwenker committed
208
    # Latex code for compiling tables
209
210
    latex_code = ""

211
212
213
214
215
216
217
218
    for model, feature_names in features_dict.items():
        # Split features in groups for table
        feature_names_split = [feature_names[i:i+nsplit] for i in range(0, len(feature_names), nsplit)]
        ntables = math.ceil(len(feature_names)/nsplit)

        for itable in range(ntables):
            title = "List of input PVs for model {}, part {}".format(model, itable+1)
            latex_code += add_table(feature_names_split[itable], title)
219
220
221

    return latex_code

Benjamin Schwenker's avatar
Benjamin Schwenker committed
222

223
224
225
def compile_latex_report(
        channel,
        obs,
226
        tag,
227
        shift,
228
229
230
231
232
        template_path,
        look_back,
        report_day,
        train_times,
        attr_ref_times,
233
        attr_test_times,
234
235
        features_dict,
        report_path):
236
    """Compile report as pdf and copy report pdf into report_path"""
237

238
239
240
241
    year = report_day.year
    month = report_day.month
    day = report_day.day

242
    with tempfile.TemporaryDirectory() as tmpdirname:
243
244
245

        jobname = get_jobname(tag, channel, report_day, shift)
        filename = jobname + '.pdf'
246
247
248
249
250

        # Read in the file
        with open(template_path, 'r') as file:
            filedata = file.read()

Benjamin Schwenker's avatar
Benjamin Schwenker committed
251
        # Count number of inputs, over all submodels
252
253
254
255
        ninputs = 0
        for _, feature_names in features_dict.items():
            ninputs += len(feature_names)

256
257
        # Replace placeholders in template latex file
        filedata = filedata.replace('REPORT_DAY_VAR', '{}-{}-{}'.format(year, month, day))
258
        filedata = filedata.replace('OBS_VAR', '{}'.format(get_latexed_feature(obs)))
259
        filedata = filedata.replace('TAG_VAR', '{}'.format(tag))
260
        filedata = filedata.replace('CHANNEL_VAR', '{}'.format(channel))
261
262
        filedata = filedata.replace('TRAIN_START_VAR', '{}'.format(train_times[0].strftime('%Y-%m-%d %X')))
        filedata = filedata.replace('TRAIN_STOP_VAR', '{}'.format(train_times[1].strftime('%Y-%m-%d %X')))
263
264
265
266
        filedata = filedata.replace('REF_START_VAR', '{}'.format(attr_ref_times[0].strftime('%Y-%m-%d %X')))
        filedata = filedata.replace('REF_STOP_VAR', '{}'.format(attr_ref_times[1].strftime('%Y-%m-%d %X')))
        filedata = filedata.replace('TEST_START_VAR', '{}'.format(attr_test_times[0].strftime('%Y-%m-%d %X')))
        filedata = filedata.replace('TEST_STOP_VAR', '{}'.format(attr_test_times[1].strftime('%Y-%m-%d %X')))
267
268
        filedata = filedata.replace('LOOK_BACK_VAR', '{}'.format(look_back))
        filedata = filedata.replace('VERSION_VAR', '{}'.format(__version__))
269
        filedata = filedata.replace('NOW_VAR', '{}'.format(datetime.now().strftime('%Y-%m-%d %X')))
270
271
        filedata = filedata.replace('NINPUTS_VAR', '{}'.format(ninputs))
        filedata = filedata.replace('INPUTS_VAR', compile_latex_inputs(features_dict))
272

273
        template_base_path = os.path.split(template_path)[0]
Benjamin Schwenker's avatar
Benjamin Schwenker committed
274
275
        filedata = filedata.replace('GOE_LOGO_VAR', '{}'.format(os.path.join(template_base_path, "goe_Logo_2.jpg")))
        filedata = filedata.replace('BELLE2_LOGO_VAR', '{}'.format(os.path.join(template_base_path, "belle2-logo.png")))
276

277
278
279
280
281
282
283
284
285
286
287
288
289
        # Write the file out again
        texfile = os.path.join(tmpdirname, os.path.basename(template_path))
        with open(texfile, 'w') as file:
            file.write(filedata)

        for _ in range(2):
            x = subprocess.call([
                'pdflatex',
                '-jobname',
                jobname,
                '-output-directory',
                tmpdirname,
                texfile])
290
291

        if x != 0:
292
            logging.info('Exit code !=0 for pdflatex {}'.format(template_path))
293
        else:
294
            logging.info('Exit code ==0 for pdflatex {}'.format(template_path))
295

296
        shutil.copyfile(os.path.join(tmpdirname, filename), os.path.join(report_path, filename))
297
298
299
300

    return x


301
302
def report_on_channel(
        channel,
303
        tag,
304
305
306
307
308
309
310
311
312
313
        obs,
        obs_unit,
        config,
        feature_names,
        template_path,
        year,
        month,
        day,
        data_path,
        look_back,
314
315
316
        ref_times,
        test_times,
        train_times,
317
        top_k,
318
        freq,
319
        report_path,
320
321
322
323
324
325
326
        convert_config,
        load_data_from_path,
        train_scalers_on_preprocessed_data,
        get_model,
        run_training,
        get_pathexplain_datasets,
        make_default_plots,
327
        shift,
328
329
330
        ):
    """Make report on background channel"""

331
    logging.info('Make report on {} \n'.format(channel))
332
333
334

    # Set config for training
    config = convert_config(config)
335
    config.update(obs=obs)
336
337

    # Output path for plots
338
    save_path = "./{}_plots".format(channel)
339

340
341
342
343
344
345
346
347
348
349
350
351
352
    # Load the data, for training ONLY!
    try:
        x, y, index = load_data_from_path(
            config,
            data_path=data_path,
            start_time=train_times[0],
            end_time=train_times[1],
            downsample_fraction=0.33,
            holdout_times=[]
        )
    except Exception as e:
        logging.error(traceback.format_exc())
        return
353

354
    if x.size == 0:
355
        logging.error('Error: No data for training found. Quit reporting on {}'.format(obs))
356
357
        return

358
359
360
361
    # Make train test split
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

    # Train scalers
362
    scaler_x, scaler_y = train_scalers_on_preprocessed_data(x_train, y_train)
363
364
365
366
367
368
369
370

    # Apply scaling to data
    x_train_scaled = scaler_x.transform(x_train)
    y_train_scaled = scaler_y.transform(y_train)
    x_test_scaled = scaler_x.transform(x_test)
    y_test_scaled = scaler_y.transform(y_test)
    x_scaled = scaler_x.transform(x)

371
372
    # Create model, random initialized
    model = get_model(config)
373
374

    # Run training procedure
375
376
    callbacks = bgnet.setup_default_callbacks(config)
    model = run_training(
377
378
379
380
381
382
383
384
385
386
387
388
389
390
        model,
        config=config,
        callbacks=callbacks,
        x=x_train_scaled,
        y=y_train_scaled,
        verbose=2
    )

    train_loss, train_mse, train_mae = model.evaluate(x_train_scaled, y_train_scaled, batch_size=64, verbose=0)
    test_loss, test_mse, test_mae = model.evaluate(x_test_scaled, y_test_scaled, batch_size=64, verbose=0)

    logging.info('Train MSE: {:.4f}\tTrain MAE: {:.4f}'.format(train_mse, train_mae))
    logging.info('Test MSE: {:.4f}\tTest MAE: {:.4f}'.format(test_mse, test_mae))

391
392
393
394
395
396
397
398
    # Load the data for prediction and attribution plots
    plot_times = get_plot_times(ref_times, test_times, train_times)
    if not plot_times == train_times:
        x, y, index = load_data_from_path(
            config,
            data_path=data_path,
            start_time=plot_times[0],
            end_time=plot_times[1],
Lukas's avatar
Lukas committed
399
            downsample_fraction=0,
400
401
402
            holdout_times=[])

    x_scaled = scaler_x.transform(x)
403
404
405
    y_pred_scaled = model.predict(x_scaled)
    y_pred = scaler_y.inverse_transform(y_pred_scaled)

406
    reference_dataset, test_dataset = get_pathexplain_datasets(
407
408
        x,
        index,
409
410
        ref_times,
        test_times,
411
    )
412
413
414
415
416
417
418
419

    try:
        reference_dataset_scaled = scaler_x.transform(reference_dataset)
        test_dataset_scaled = scaler_x.transform(test_dataset)
    except ValueError as e:
        logging.info(e)
        logging.info('Skip reporting on this channel!')
        return
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435

    explainer = PathExplainerTF(model)

    attributions = explainer.attributions(
        inputs=test_dataset_scaled.astype('float32'),
        baseline=reference_dataset_scaled.astype('float32'),
        batch_size=100,
        num_samples=200,
        use_expectation=True,
        output_indices=0,
        verbose=True)

    # To have unit of attributions same as unit of y
    attributions = scaler_y.inverse_transform(attributions)

    # Create plots
436
437
    shutil.rmtree(save_path, ignore_errors=True)
    Path(save_path).mkdir(parents=True, exist_ok=True)
438

439
    make_default_plots(
440
        obs=obs,
441
442
443
444
        index=index,
        x=x,
        y=y,
        attributions=attributions,
445
        feature_names=feature_names,
446
447
        reference_data_times=ref_times,
        test_data_times=test_times,
448
        save_path=save_path,
449
450
        y_pred=y_pred,
        test_dataset=test_dataset,
451
        obs_unit=obs_unit,
452
        look_back=look_back,
453
        top_k=top_k,
454
        channel=channel,
455
        freq=freq,
456
457
    )

458
459
    report_day = get_report_day(year, month, day)

460
    compile_latex_report(
461
462
        channel=channel,
        obs=obs,
463
        tag=tag,
464
        shift=shift,
465
        template_path=template_path,
466
        look_back=look_back,
467
        report_day=report_day,
468
469
470
        train_times=train_times,
        attr_ref_times=ref_times[0],
        attr_test_times=test_times[0],
471
        features_dict={channel: feature_names},
472
        report_path=report_path,
473
    )
474

475
    return
476
477


478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
def report_on_hitrate(
        channel,
        tag,
        obs,
        obs_unit,
        config,
        feature_names,
        template_path,
        year,
        month,
        day,
        data_path,
        look_back,
        ref_times,
        test_times,
        train_times,
        top_k,
        freq,
        report_path,
        convert_config,
        load_data_from_path,
        train_scalers_on_preprocessed_data,
        get_model,
        run_training,
        get_pathexplain_datasets,
        make_default_plots,
504
        resample_frequency='5min',
505
        shift='24h',
506
507
508
509
510
511
512
        ):
    """Make report on background channel"""

    logging.info('Make report on {}'.format(channel))

    # Set config for training
    config = convert_config(config)
513
    config.update(obs=obs)
514
515
516
517

    # Output path for plots
    save_path = "./{}_plots".format(channel)

518
519
520
521
522
523
524
525
526
527
528
529
530
    # Load the data, for training ONLY!
    try:
        x, y, index = load_data_from_path(
            config,
            data_path=data_path,
            start_time=train_times[0],
            end_time=train_times[1],
            downsample_fraction=0.33,
            holdout_times=[])
    except Exception as e:
        logging.error(traceback.format_exc())
        return

531
    if x[0].size == 0:
532
        logging.info('Error: No data for training found. Quit reporting on {}'.format(obs))
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
        return

    # Make train test split
    (x_train, y_train), (x_test, y_test) = bgnet.split_tensors(x+(y,), random_state=42)

    # Compute sample weights
    sample_weight = hitrate.get_sample_weights(
        X=x_train[0],
        weights_single_beams=config['preparation']['weights_scale_single_beam'],
        weights_both_beams=config['preparation']['weights_scale_both_beams']
    )

    # Compute mask for data w/o injection bg (used for prefit of hitrate model)
    mask = bgnet.get_no_injection_mask(x_train[6], x_train[8])

    # Train scalers
    scaler_x, scaler_y = train_scalers_on_preprocessed_data(x_train, y_train)

    # Apply scaler to data
    x_train_scaled = bgnet.scale_x(x_train, scaler_x)
    y_train_scaled = scaler_y.transform(y_train)
    x_test_scaled = bgnet.scale_x(x_test, scaler_x)
    y_test_scaled = scaler_y.transform(y_test)
    x_scaled = bgnet.scale_x(x, scaler_x)

    # Create model, random initialized
559
    const_her_beamgas = bg_net.run_training.process_const_model_params(
560
561
        config['training']['const_her_beamgas_params'], model='her_beamgas', scalers=(scaler_x, scaler_y)
    )
562
    const_her_touschek = bg_net.run_training.process_const_model_params(
563
564
        config['training']['const_her_touschek_params'], model='her_touschek', scalers=(scaler_x, scaler_y)
    )
565
    const_ler_beamgas = bg_net.run_training.process_const_model_params(
566
567
        config['training']['const_ler_beamgas_params'], model='ler_beamgas', scalers=(scaler_x, scaler_y)
    )
568
    const_ler_touschek = bg_net.run_training.process_const_model_params(
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
        config['training']['const_ler_touschek_params'], model='ler_touschek', scalers=(scaler_x, scaler_y)
    )

    model = get_model(
        injection_pvs_her=config['preparation']['her_inj_pvs'],
        injection_pvs_ler=config['preparation']['ler_inj_pvs'],
        sensitivity_pvs_her_beamgas=config['preparation']['her_beamgas_sens_pvs'],
        sensitivity_pvs_her_touschek=config['preparation']['her_touschek_sens_pvs'],
        sensitivity_pvs_ler_beamgas=config['preparation']['ler_beamgas_sens_pvs'],
        sensitivity_pvs_ler_touschek=config['preparation']['ler_touschek_sens_pvs'],
        obs=config['obs'],
        useDetNoise=config['training']['useDetNoise'],
        sensitivity_net_hidden_units=config['training']['sensitivity_net_hidden_units'],
        sensitivity_net_hidden_layers=config['training']['sensitivity_net_hidden_layers'],
        injection_net_hidden_units=config['training']['injection_net_hidden_units'],
        injection_net_hidden_layers=config['training']['injection_net_hidden_layers'],
        const_her_beamgas=const_her_beamgas,
        const_her_touschek=const_her_touschek,
        const_ler_beamgas=const_ler_beamgas,
        const_ler_touschek=const_ler_touschek,
    )

    # Run training procedure
    callbacks = bgnet.setup_default_callbacks(config)

    model = run_training(
        model,
        config=config,
        callbacks=callbacks,
        x=x_train_scaled,
        y=y_train_scaled,
        sample_weight=sample_weight,
        mask=mask,
        verbose=2
    )

    train_loss, train_mse, train_mae = model.evaluate(x_train_scaled, y_train_scaled, batch_size=64, verbose=0)
    test_loss, test_mse, test_mae = model.evaluate(x_test_scaled, y_test_scaled, batch_size=64, verbose=0)

608
609
    logging.info('Train MSE: {:.4f}\tTrain MAE: {:.4f}'.format(train_mse, train_mae))
    logging.info('Test MSE: {:.4f}\tTest MAE: {:.4f}'.format(test_mse, test_mae))
610
611
612

    # Load the data for prediction and attribution plots
    plot_times = get_plot_times(ref_times, test_times, train_times)
Lukas's avatar
Lukas committed
613
614
615
616
617
618
619
620
    # if not plot_times == train_times:
    x, y, index = load_data_from_path(
        config,
        data_path=data_path,
        start_time=plot_times[0],
        end_time=plot_times[1],
        downsample_fraction=0,
        holdout_times=[])
621
622
623
624
625
626
627

    x_scaled = bgnet.scale_x(x, scaler_x)

    # Create plots
    shutil.rmtree(save_path, ignore_errors=True)
    Path(save_path).mkdir(parents=True, exist_ok=True)

628
629
630
631
632
633
634
635
636
637
638
639
640
641
    decomp_bgnet = hitrate.get_predictions(
        model=model, x_scaled=x_scaled, scalers_x=scaler_x, scaler_y=scaler_y
    )
    for comp in decomp_bgnet:
        decomp_bgnet[comp] = decomp_bgnet[comp].squeeze(-1)
    decomp_bgnet["VXD_Rad_BP_BW_325_DoseRate"] = y[:, 0]
    decomp_bgnet = pd.DataFrame(data=decomp_bgnet, index=index)
    decomp_bgnet = decomp_bgnet[~decomp_bgnet.index.duplicated(keep='first')]
    decomp_bgnet = decomp_bgnet.reindex(pd.date_range(start=decomp_bgnet.index[0], end=decomp_bgnet.index[-1], freq="1s"))
    decomp_bgnet = decomp_bgnet.resample(resample_frequency).mean()

    hitrate_prediction_plot(
        obs,
        decomp_bgnet,
Benjamin Schwenker's avatar
Benjamin Schwenker committed
642
643
        reference_data_times=ref_times,
        test_data_times=test_times,
644
645
646
        look_back=look_back)
    plt.savefig(os.path.join(save_path, "prediction_{}.pdf".format(channel)), bbox_inches='tight')
    plt.close('all')
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
    for sub_model_name in hitrate.sub_models:

        # Create list of names for all input features
        feature_names = hitrate.get_interpretable_feature_names(config, name=sub_model_name)

        if len(feature_names) <= 1:
            continue

        # Create an interpretable model for sub_model
        interpret_model = hitrate.get_intepretable_model(model, num_inputs=len(feature_names), name=sub_model_name)

        # Get explainer for model
        explainer = PathExplainerTF(interpret_model)

        # Path explain expects one input tensor. We concatenate inputs and only
        # take size rows
663
        interpret_x_scaled = hitrate.get_interpretable_inputs(x_scaled, sub_model_name)
664
        interpret_x = hitrate.get_interpretable_inputs(x, sub_model_name)
665
666
667
668
669
670
671

        reference_dataset, test_dataset = get_pathexplain_datasets(
            interpret_x_scaled,
            index,
            ref_times,
            test_times,
        )
672

673
674
675
        reference_dataset = bgnet.downsample_data_to_npoints(reference_dataset, 500)
        test_dataset = bgnet.downsample_data_to_npoints(test_dataset, 500)

676
        attributions = explainer.attributions(
677
678
            inputs=test_dataset.astype('float32'),
            baseline=reference_dataset.astype('float32'),
679
680
681
682
683
684
685
            batch_size=100,
            num_samples=200,
            use_expectation=True,
            output_indices=0,
            verbose=False,
        )

686
        if attributions.shape[0] > 0:
687
688
            # To have unit of attributions same as unit of y
            attributions = scaler_y.inverse_transform(attributions)
Benjamin Schwenker's avatar
Benjamin Schwenker committed
689

690
691
692
693
694
            if len(feature_names) < top_k:
                plot_top_k = len(feature_names)
            else:
                plot_top_k = top_k

Benjamin Schwenker's avatar
Benjamin Schwenker committed
695
696
697
698
            summary_plot(
                attributions=attributions,
                feature_values=test_dataset,
                feature_names=feature_names,
699
                plot_top_k=plot_top_k,
Benjamin Schwenker's avatar
Benjamin Schwenker committed
700
701
                figsize=[8, 4])

702
703
704
705
706
707
708
            plt.savefig(
                os.path.join(
                    save_path,
                    "feature_attribution_{}_{}.pdf".format(
                        channel,
                        sub_model_name)),
                bbox_inches='tight')
Benjamin Schwenker's avatar
Benjamin Schwenker committed
709
710
            plt.close('all')

711
712
            sub_model_pred_name = hitrate.map_sub_models_to_decomp[sub_model_name]
            sub_model_pred_y = decomp_bgnet[sub_model_pred_name]
713
714
            mean_abs = np.mean(np.abs(attributions), axis=0)
            ranking = [x for (y, x) in sorted(zip(mean_abs.tolist(), feature_names), key=lambda pair: pair[0], reverse=True)]
715
716
            plotting_y, _ = complete_index(y, index, freq="1s", resample_frequency=resample_frequency)
            interpret_x, plotting_index = complete_index(interpret_x, index, freq="1s", resample_frequency=resample_frequency)
717
718
719
720

            count_two_axis_plots = 0
            for i in range(plot_top_k):
                PV_name = ranking[i]
721
                PV_pos = feature_names.index(PV_name)
722

Benjamin Schwenker's avatar
Benjamin Schwenker committed
723
                if "GATE" not in PV_name and count_two_axis_plots <= 3:
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
                    plot = hitrate_two_axis_plot(
                        obs,
                        sub_model_pred_name=sub_model_pred_name,
                        index=plotting_index,
                        x=interpret_x,
                        y=plotting_y,
                        sub_model_pred_y=sub_model_pred_y,
                        PV=PV_name,
                        PV_pos=PV_pos,
                        reference_data_times=ref_times,
                        test_data_times=test_times,
                        obs_unit=obs_unit,
                        PV_unit="NA",
                        look_back=look_back,)

Benjamin Schwenker's avatar
Benjamin Schwenker committed
739
740
741
                    plot.title(
                        "Attribution rank: {}   Mean attribution value: {:.3f} {}".format(
                            i, mean_abs[PV_pos], obs_unit), y=1.2)
742
                    plot.tight_layout()
Benjamin Schwenker's avatar
Benjamin Schwenker committed
743
744
745
746
747
748
749
750
                    plot.savefig(
                        os.path.join(
                            save_path,
                            "rank{}_{}_{}.pdf".format(
                                count_two_axis_plots,
                                channel,
                                sub_model_name)),
                        bbox_inches='tight')
751
752
753

                    plot.close('all')
                    count_two_axis_plots += 1
754
        else:
755
756
            logging.info('Empty attributions ', attributions.shape)
            logging.info('Skip attribution plots!')
757
758
759
760
761
762
763

    report_day = get_report_day(year, month, day)

    compile_latex_report(
        channel=channel,
        obs=obs,
        tag=tag,
764
        shift=shift,
765
766
767
768
769
770
        template_path=template_path,
        look_back=look_back,
        report_day=report_day,
        train_times=train_times,
        attr_ref_times=ref_times[0],
        attr_test_times=test_times[0],
Benjamin Schwenker's avatar
Benjamin Schwenker committed
771
772
773
774
        features_dict={
            sub_model: hitrate.get_interpretable_feature_names(
                config,
                name=sub_model) for sub_model in hitrate.sub_models},
775
        report_path=report_path,
Benjamin Schwenker's avatar
Benjamin Schwenker committed
776
             )
777
778
779
780

    return


781
def make_reports(year, month, day, data_path, tag, shift, look_back, ref_times, test_times, train_times, report_path, config_path):
782
783
784
785
786
787
788
    """
    Make reports

    Returns list of channels that got reported.
    """

    channels = []
789

790
791
792
    # Create report archive if not exists
    Path(report_path).mkdir(parents=True, exist_ok=True)

793
    # Report on hitrate
Benjamin Schwenker's avatar
Benjamin Schwenker committed
794
    config = get_config(os.path.join(config_path, 'training_default.yaml'))
795
    channel = "VXD_Rad_BP_BW_325_DoseRate"
796
797

    report_on_hitrate(
798
        channel=channel,
799
        tag=tag,
800
        obs=channel,
801
802
        obs_unit="mRad/s (uncalibrated)",
        config=config,
803
        feature_names=[],
Benjamin Schwenker's avatar
Benjamin Schwenker committed
804
        template_path=os.path.join(config_path, 'templates/daily_report_hitrate_template.tex'),
805
806
807
808
809
810
811
812
        year=year,
        month=month,
        day=day,
        data_path=data_path,
        look_back=look_back,
        ref_times=ref_times,
        test_times=test_times,
        train_times=train_times,
813
        top_k=15,
814
        freq="5min",
815
816
817
818
819
        report_path=report_path,
        convert_config=hitrate.convert_config,
        load_data_from_path=hitrate.load_data_from_path,
        train_scalers_on_preprocessed_data=hitrate.train_scalers_on_preprocessed_data,
        get_model=hitrate.get_model,
820
821
        run_training=hitrate.run_training,
        get_pathexplain_datasets=hitrate.get_pathexplain_datasets,
822
823
824
        make_default_plots=bg_net.report_utils.make_default_plots,
        shift=shift,
    )
825
    channels.append(channel)
826
827

    # Report on hitrate
Benjamin Schwenker's avatar
Benjamin Schwenker committed
828
    config = get_config(os.path.join(config_path, 'training_default.yaml'))
829
    channel = "CDC_CUR_AVERAGE"
830
831

    report_on_hitrate(
832
        channel=channel,
833
        tag=tag,
834
        obs=channel,
835
836
        obs_unit="mA",
        config=config,
Benjamin Schwenker's avatar
Benjamin Schwenker committed
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
        feature_names=[],
        template_path=os.path.join(config_path, 'templates/daily_report_hitrate_template.tex'),
        year=year,
        month=month,
        day=day,
        data_path=data_path,
        look_back=look_back,
        ref_times=ref_times,
        test_times=test_times,
        train_times=train_times,
        top_k=15,
        freq="5min",
        report_path=report_path,
        convert_config=hitrate.convert_config,
        load_data_from_path=hitrate.load_data_from_path,
        train_scalers_on_preprocessed_data=hitrate.train_scalers_on_preprocessed_data,
        get_model=hitrate.get_model,
        run_training=hitrate.run_training,
        get_pathexplain_datasets=hitrate.get_pathexplain_datasets,
        make_default_plots=bg_net.report_utils.make_default_plots,
        shift=shift,
    )
    channels.append(channel)

    # Report on hitrate
    config = get_config(os.path.join(config_path, 'training_default.yaml'))
    channel = "TOP_TYPE2_avg_scalerRate_PMT17-32"

    report_on_hitrate(
        channel=channel,
        tag=tag,
        obs=channel,
        obs_unit="Hz",
        config=config,
871
        feature_names=[],
Benjamin Schwenker's avatar
Benjamin Schwenker committed
872
        template_path=os.path.join(config_path, 'templates/daily_report_hitrate_template.tex'),
873
874
875
876
877
878
879
880
        year=year,
        month=month,
        day=day,
        data_path=data_path,
        look_back=look_back,
        ref_times=ref_times,
        test_times=test_times,
        train_times=train_times,
881
        top_k=15,
882
883
884
885
886
887
        freq="5min",
        report_path=report_path,
        convert_config=hitrate.convert_config,
        load_data_from_path=hitrate.load_data_from_path,
        train_scalers_on_preprocessed_data=hitrate.train_scalers_on_preprocessed_data,
        get_model=hitrate.get_model,
888
889
        run_training=hitrate.run_training,
        get_pathexplain_datasets=hitrate.get_pathexplain_datasets,
890
891
892
        make_default_plots=bg_net.report_utils.make_default_plots,
        shift=shift,
    )
893
    channels.append(channel)
894
895

    # Report on channel lumi
Benjamin Schwenker's avatar
Benjamin Schwenker committed
896
    config = get_config(os.path.join(config_path, 'training_lumi.yaml'))
897
    channel = 'lumi'
898

899
    report_on_channel(
900
        channel=channel,
901
        tag=tag,
902
        obs=lumi.OBS,
903
        obs_unit=get_unit(lumi.OBS),
904
905
        config=config,
        feature_names=config['preparation']['lumi_pvs'],
Benjamin Schwenker's avatar
Benjamin Schwenker committed
906
        template_path=os.path.join(config_path, 'templates/daily_report_lumi_template.tex'),
907
908
909
910
911
        year=year,
        month=month,
        day=day,
        data_path=data_path,
        look_back=look_back,
912
913
914
        ref_times=ref_times,
        test_times=test_times,
        train_times=train_times,
915
        top_k=20,
916
        freq=config['preparation']['freq'],
917
        report_path=report_path,
918
919
920
921
922
923
        convert_config=lumi.convert_config,
        load_data_from_path=lumi.load_data_from_path,
        train_scalers_on_preprocessed_data=lumi.train_scalers_on_preprocessed_data,
        get_model=lumi.get_model,
        run_training=lumi.run_training,
        get_pathexplain_datasets=lumi.get_pathexplain_datasets,
924
925
926
        make_default_plots=bg_net.report_utils.make_default_plots,
        shift=shift,
    )
927
    channels.append(channel)
928
929

    # Report on channel injection duration HER
Benjamin Schwenker's avatar
Benjamin Schwenker committed
930
    config = get_config(os.path.join(config_path, 'training_inj_duration.yaml'))
931
    channel = 'HER_INJ'
932
933

    report_on_channel(
934
        channel=channel,
935
        tag=tag,
936
        obs=injection_duration.OBS_HER,
937
        obs_unit=get_unit(injection_duration.OBS_HER),
938
939
        config=config,
        feature_names=config['preparation']['her_inj_pvs'],
Benjamin Schwenker's avatar
Benjamin Schwenker committed
940
        template_path=os.path.join(config_path, 'templates/daily_report_inj_her_template.tex'),
941
942
943
944
945
        year=year,
        month=month,
        day=day,
        data_path=data_path,
        look_back=look_back,
946
947
948
        ref_times=ref_times,
        test_times=test_times,
        train_times=train_times,
949
        top_k=10,
950
        freq=config['preparation']['freq'],
951
        report_path=report_path,
952
953
954
955
956
957
        convert_config=injection_duration.convert_config,
        load_data_from_path=injection_duration.load_data_from_path,
        train_scalers_on_preprocessed_data=injection_duration.train_scalers_on_preprocessed_data,
        get_model=injection_duration.get_model,
        run_training=injection_duration.run_training,
        get_pathexplain_datasets=injection_duration.get_pathexplain_datasets,
958
959
960
        make_default_plots=bg_net.report_utils.make_default_plots,
        shift=shift,
    )
961
    channels.append(channel)
962
963

    # Report on channel injection duration LER
Benjamin Schwenker's avatar
Benjamin Schwenker committed
964
    config = get_config(os.path.join(config_path, 'training_inj_duration.yaml'))
965
    channel = 'LER_INJ'
966
967

    report_on_channel(
968
        channel=channel,
969
        tag=tag,
970
        obs=injection_duration.OBS_LER,
971
        obs_unit=get_unit(injection_duration.OBS_LER),
972
973
        config=config,
        feature_names=config['preparation']['ler_inj_pvs'],
Benjamin Schwenker's avatar
Benjamin Schwenker committed
974
        template_path=os.path.join(config_path, 'templates/daily_report_inj_ler_template.tex'),
975
976
977
978
979
        year=year,
        month=month,
        day=day,
        data_path=data_path,
        look_back=look_back,
980
981
982
        ref_times=ref_times,
        test_times=test_times,
        train_times=train_times,
983
        top_k=10,
984
        freq=config['preparation']['freq'],
985
        report_path=report_path,
986
987
988
989
990
991
        convert_config=injection_duration.convert_config,
        load_data_from_path=injection_duration.load_data_from_path,
        train_scalers_on_preprocessed_data=injection_duration.train_scalers_on_preprocessed_data,
        get_model=injection_duration.get_model,
        run_training=injection_duration.run_training,
        get_pathexplain_datasets=injection_duration.get_pathexplain_datasets,
992
993
994
        make_default_plots=bg_net.report_utils.make_default_plots,
        shift=shift,
    )
995
    channels.append(channel)
996

997
    # Report on channel beam life HER
Benjamin Schwenker's avatar
Benjamin Schwenker committed
998
    config = get_config(os.path.join(config_path, 'training_beam_life.yaml'))
999
    channel = 'HER_LIFE'
1000

For faster browsing, not all history is shown. View entire blame