Commit 62b8e82c authored by Benjamin Schwenker's avatar Benjamin Schwenker
Browse files

Merge branch '46-parquet-dumps-from-reporting' into 'master'

add option for writing 1Hz predictions to reporting for closer look

Closes #46

See merge request !48
parents 9487bad2 3cf372b9
......@@ -71,6 +71,7 @@ from sklearn.model_selection import train_test_split
from path_explain import PathExplainerTF
import shutil
import tempfile
import time
from pathlib import Path
from argparse import ArgumentParser
from datetime import datetime, timedelta
......@@ -109,6 +110,7 @@ def parse_args():
add_arg('--skbsrv_cred', default=None, type=str, help='Path to skbsrv credentials file')
add_arg('--b2rc_cred', default=None, type=str, help='Path to b2rc credentials file')
add_arg('--report_path', default='./report_archive', type=str, help='Path for ouput reports')
add_arg('--output_path', default='./output_archive', type=str, help='Path the predicted 1Hz time series as parquet file')
add_arg('--latest_day', action='store_true', help='Make report for latest (most recent) day in archive')
add_arg('--config_path', type=str, default='./configs', help='Path to directory with model configs')
add_arg('--shift', type=str, default='24h', help='Mode for daily reporting, defined values: 8h, 16h, 24h')
......@@ -501,8 +503,10 @@ def report_on_hitrate(
run_training,
get_pathexplain_datasets,
make_default_plots,
output_path,
resample_frequency='5min',
shift='24h',
dump_data=False,
):
"""Make report on background channel"""
......@@ -634,6 +638,15 @@ def report_on_hitrate(
decomp_bgnet = pd.DataFrame(data=decomp_bgnet, index=index)
decomp_bgnet = decomp_bgnet[~decomp_bgnet.index.duplicated(keep='first')]
decomp_bgnet = decomp_bgnet.reindex(pd.date_range(start=decomp_bgnet.index[0], end=decomp_bgnet.index[-1], freq="1s"))
if dump_data:
# Dump 1Hz timeseries here
report_day = "{:04d}-{:02d}-{:02d}".format(year, month, day)
dump_prediction_path = output_path + '/{}'.format(report_day)
Path(dump_prediction_path).mkdir(parents=True, exist_ok=True)
decomp_bgnet.to_parquet(dump_prediction_path + '/{}_{}.parquet'.format(obs, time.strftime("%Y%m%d-%H%M%S")) )
# Resample time series
decomp_bgnet = decomp_bgnet.resample(resample_frequency).mean()
hitrate_prediction_plot(
......@@ -778,7 +791,7 @@ def report_on_hitrate(
return
def make_reports(year, month, day, data_path, tag, shift, look_back, ref_times, test_times, train_times, report_path, config_path):
def make_reports(year, month, day, data_path, tag, shift, look_back, ref_times, test_times, train_times, report_path, config_path, output_path):
"""
Make reports
......@@ -813,6 +826,7 @@ def make_reports(year, month, day, data_path, tag, shift, look_back, ref_times,
top_k=15,
freq="5min",
report_path=report_path,
output_path=output_path,
convert_config=hitrate.convert_config,
load_data_from_path=hitrate.load_data_from_path,
train_scalers_on_preprocessed_data=hitrate.train_scalers_on_preprocessed_data,
......@@ -847,6 +861,7 @@ def make_reports(year, month, day, data_path, tag, shift, look_back, ref_times,
top_k=15,
freq="5min",
report_path=report_path,
output_path=output_path,
convert_config=hitrate.convert_config,
load_data_from_path=hitrate.load_data_from_path,
train_scalers_on_preprocessed_data=hitrate.train_scalers_on_preprocessed_data,
......@@ -881,6 +896,7 @@ def make_reports(year, month, day, data_path, tag, shift, look_back, ref_times,
top_k=15,
freq="5min",
report_path=report_path,
output_path=output_path,
convert_config=hitrate.convert_config,
load_data_from_path=hitrate.load_data_from_path,
train_scalers_on_preprocessed_data=hitrate.train_scalers_on_preprocessed_data,
......@@ -1159,6 +1175,7 @@ def main():
train_times,
args.report_path,
args.config_path,
args.output_path,
)
# Get list of reports that where produced
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment