Skip to content
Snippets Groups Projects
Commit cf8b526e authored by Clotilde Lemettais's avatar Clotilde Lemettais
Browse files

fit S+B working, add test reco script

parent d407bc42
No related branches found
No related tags found
No related merge requests found
Showing
with 481 additions and 61 deletions
......@@ -14,4 +14,5 @@ test/
# Other
__pycache__/
.rucioget/
*/.ipynb_checkpoints/*
\ No newline at end of file
*/.ipynb_checkpoints/*
.vscode/
\ No newline at end of file
......@@ -246,6 +246,8 @@ cd /gpfs/home/belle2/clemetta/topoAna/
In working directory:
tests :
```
/home/belle2/clemetta/topoAna/bin/topoana.exe input_files/topoAna/card_KstTauEll_OSmu_signal_tauDecay.card -i /group/belle2/TMP/clemetta/KstTauL/MC_reco/OSmu_signalMCrd.root -o topoana_out/OSmu_signal_tauDecay
/home/belle2/clemetta/topoAna/bin/topoana.exe input_files/card_KstTauEll_OSe_mixed.card -i /group/belle2/TMP/clemetta/KstTauL/MC_reco/OSe_mixedMCrd.root -o topoana_out/OSe_mixed
/home/belle2/clemetta/topoAna/bin/topoana.exe input_files/card_KstTauEll_OSe_mixed_sepCharge.card -i /group/belle2/TMP/clemetta/KstTauL/MC_reco/OSe_mixedMCrd.root -o topoana_out/OSe_mixed_sepCharge
......
......@@ -10,6 +10,7 @@ import seaborn as sb
import pickle
import yaml
import os
import ROOT
# BDT libriries
import sklearn.model_selection
......@@ -31,8 +32,10 @@ measure_efficiency = KstTauEll["analysis_steps"]["measure_efficiency"]
plot_vars_MC = KstTauEll["analysis_steps"]["plot_vars_MC"]
train_BDT = KstTauEll["analysis_steps"]["train_BDT"]
fit_mTau = KstTauEll["analysis_steps"]["fit_mTau"]
calc_UL = KstTauEll["analysis_steps"]["calc_UL"]
verbose = KstTauEll["verbose"]
option = KstTauEll["option"]
experiments = KstTauEll["experiments"]
channels = KstTauEll["channels"]
......@@ -72,14 +75,16 @@ for chan in channels :
cutToApply = CONFIG[exp][chan]["cuts"]["bkg"]
if verbose : print("samples will be loaded with cut : ", cutToApply)
else :
cutToApply = [cut.join(" and") for cut in CONFIG[exp][chan]["cuts"].values()]
# cutToApply = [cut.join(" and") for cut in CONFIG[exp][chan]["cuts"].values()]
cutToApply = [cut.join(" and") for cut in CONFIG[exp][chan]["cuts"].values() if not (option == "inclTau" and cut == "pi0Veto")]
if verbose : print("samples will be loaded with cut : ", cutToApply)
for smp in CONFIG[exp]['bkgSamples'] :
if verbose : print(f"Load sample {smp}")
file_name = f"{chan}_{smp}MCrd{exp_suffix}.root"
dfDict[smp] = anaFunc.loadNtuple(CONFIG["pathNtuples"]+file_name, CONFIG[exp][chan]["treeName"], vars, cutToApply)
dfDict["signal"] = anaFunc.loadNtuple(CONFIG["pathNtuples"]+f"{chan}_signalMCrd{exp_suffix}.root", CONFIG[exp][chan]["treeName"], vars, cutToApply)
dfDict[smp] = anaFunc.loadNtuple(CONFIG["pathNtuples"]+option+'/'+file_name, CONFIG[exp][chan]["treeName"], vars, cutToApply)
dfDict["signal"] = anaFunc.loadNtuple(CONFIG["pathNtuples"]+option+'/'+f"{chan}_signalMCrd{exp_suffix}.root", CONFIG[exp][chan]["treeName"], vars, cutToApply)
### Create additionnal dataframes for TM signal, qqbar and BBbar ###
dfDict['signalTM'] = dfDict['signal'].query("Bsig_isSignalAcceptMissing == 1 or (Kst_isSignal ==1 and ell_mcErrors == 2048)").copy()
......@@ -111,8 +116,9 @@ for chan in channels :
############################################
if measure_efficiency :
if verbose : print("Efficiency measurement")
NevtsDict = anaFunc.prepareEfficiencyNevts(dfDict, CONFIG[exp][chan]["cuts"], lumiSF)
anaFunc.efficiencyTable(NevtsDict, CONFIG[exp][chan]["NsigGen"], chan, exp, TMsig=False)
cutDict = CONFIG[exp][chan]["cuts"].copy() if option != "inclTau" else {key: value for key, value in CONFIG[exp][chan]["cuts"].items() if key != "pi0Veto"}
NevtsDict = anaFunc.prepareEfficiencyNevts(dfDict, cutDict, lumiSF)
anaFunc.efficiencyTable(NevtsDict, CONFIG[exp][chan]["NsigGen"], chan, exp, option, TMsig=False)
if verbose : print("Efficiency measurement done, dfDict should have all cuts applied")
############################################
......@@ -136,7 +142,7 @@ for chan in channels :
for var, var_info in vars_to_plot.items() :
if verbose : print(f"Plot {var}")
# anaFunc.plotVariable(dfDict, var, var_info, lumiSF, sig_scale, multiple, f"analysisFigures/sigbkgComp/{exp}/{chan}/inclTau/", chan, exp)
anaFunc.plotVariable(dfDict, var, var_info, lumiSF, sig_scale, multiple, f"analysisFigures/sigbkgComp/{exp}/{chan}/", chan, exp)
anaFunc.plotVariable(dfDict, var, var_info, lumiSF, sig_scale, multiple, f"analysisFigures/sigbkgComp/{exp}/{chan}/{option}/", chan, exp)
############################################
########## Train BDT #######################
......@@ -169,24 +175,24 @@ for chan in channels :
# Sample largest dataset to be same size as smaller dataset and merge signal and background datasets
df_final = anaFunc.prepare_data(df_traintest, verbose=verbose)
out_path = f'analysisOutput/{exp}/{chan}/'
out_path = f'analysisOutput/{exp}/{chan}/{option}/'
# out_path = f'analysisOutput/{exp}/{chan}/inclTau/'
if not os.path.exists(out_path) :
os.makedirs(out_path)
# Train BDT, optimise and store best classifier in pickle
if not verbose : optuna.logging.set_verbosity(optuna.logging.WARNING)
anaFunc.optimise(df_final, CONFIG[exp][chan]["BDTVars"].keys(), chan, exp)
anaFunc.optimise(df_final, CONFIG[exp][chan]["BDTVars"].keys(), chan, exp, option)
# Load best classifier from pickle
bestCLF = FastBDT.Classifier()
bestCLF.load(f'{out_path}bdt_classifier_{chan}_{exp}.pkl')
anaFunc.rankFeature(bestCLF, CONFIG[exp][chan]["BDTVars"], chan, exp)
anaFunc.rankFeature(bestCLF, CONFIG[exp][chan]["BDTVars"], chan, exp, option)
# Validate BDT
df_all = pd.concat([df_validation["signal"], df_validation["BBbar"], df_validation["qqbar"]], ignore_index=True)
BDTcut = anaFunc.validate(df_all, CONFIG[exp][chan]["BDTVars"], bestCLF, CONFIG[exp][chan]["NsigGen"], lumiSF, chan, exp)
BDTcut = anaFunc.validate(df_all, CONFIG[exp][chan]["BDTVars"], bestCLF, CONFIG[exp][chan]["NsigGen"], lumiSF, chan, exp, option)
with open(f'{out_path}/BDTbestCut_{chan}_{exp}.pkl', 'wb') as f:
pickle.dump(BDTcut, f)
......@@ -194,7 +200,7 @@ for chan in channels :
else :
if verbose : print("BDT training not requested : load BDT model and optimised BDT cut from last training")
out_path = f'analysisOutput/{exp}/{chan}/'
out_path = f'analysisOutput/{exp}/{chan}/{option}/'
# out_path = f'analysisOutput/{exp}/{chan}/inclTau/'
bestCLF = FastBDT.Classifier()
bestCLF.load(f'{out_path}/bdt_classifier_{chan}_{exp}.pkl')
......@@ -212,7 +218,7 @@ for chan in channels :
if measure_efficiency :
if verbose : print("Efficiency measurement after BDT cut")
anaFunc.addBDTefficiency(dfDict, lumiSF, CONFIG[exp][chan]["NsigGen"], chan, exp, TMsig=False)
anaFunc.addBDTefficiency(dfDict, lumiSF, CONFIG[exp][chan]["NsigGen"], chan, exp, option, TMsig=False)
dfDict_final[exp] = {}
for smp in ['signal','signalTM','bkgAll'] :
......@@ -225,17 +231,58 @@ for chan in channels :
dfDict_final["total"] = {}
for smp in ['signal','signalTM','bkgAll'] :
dfDict_final["total"][smp] = pd.concat([dfDict_final[exp][smp] for exp in experiments], ignore_index=True)
# Few variables that can be useful (combination of Belle + belle II)
NsigGen_tot = sum([CONFIG[exp][chan]["NsigGen"] for exp in experiments])
lumiData_tot = sum(CONFIG[exp]["lumiData"] for exp in experiments)
lumiMC_tot = sum(CONFIG[exp]["lumiMC"] for exp in experiments)
lumiSF_tot = lumiData_tot/lumiMC_tot
##################################
########## Fit mTau ##############
##################################
fit_range = (CONFIG[exp][chan]["fitRange"][0], CONFIG[exp][chan]["fitRange"][1])
if fit_mTau :
if verbose :
print("Fit mTau")
print("Fit Signal shape with TBD")
pdf_sig, result_sig = anaFunc.fitSignal(dfDict_final["total"]["signal"], chan)
pdf_bkg, result_sig = anaFunc.fitBackground(dfDict_final["total"]["bkgAll"], chan)
pdf_sig, result_sig = anaFunc.fitSignal(dfDict_final["total"]["signal"], fit_range, chan)
pdf_bkg, result_sig = anaFunc.fitBackground(dfDict_final["total"]["bkgAll"], fit_range, chan)
if verbose : print("Now fit signal + background together assuming Br(sig) = 1e-5")
eff_sig = dfDict_final["total"]["signal"]["weight_final"].sum()/NsigGen_tot
scale_sig = anaFunc.getNsig(eff_sig, 1e-5).n/dfDict_final["total"]["signal"]["weight_final"].sum()
scale_bkg = lumiSF_tot
pdf_tot, result_tot = anaFunc.fitTotal(dfDict_final["total"]["signal"], dfDict_final["total"]["bkgAll"], scale_sig, scale_bkg, pdf_sig, pdf_bkg, fit_range, chan)
########################################
########## Derive upper limits #########
########################################
if calc_UL : # TO DO : refit total w/ 0 signal or load fit from file / addapt UL function
if verbose : print("Derive upper limits")
# Get signal and background pdfs
f_sig = ROOT.TFile.Open(f'analysisOutput/{chan}_signalWS.root')
ws_sig = f_sig.Get("ws")
sig_pdf = ws_sig.pdf(f"pdf_sig")
f_bkg = ROOT.TFile.Open(f'analysisOutput/{chan}_backgroundWS.root')
ws_bkg = f_bkg.Get("ws")
bkg_pdf = ws_bkg.pdf(f"pdf_bkg")
# # make total workspace for UL derivation
# w = ROOT.RooWorkspace("w_tot")
# # make total model
# total_pdf, bkg_yield, sig_yield = anaFunc.makeTotalPdf(sig_pdf, bkg_pdf)
# getattr(w, 'import')(total_pdf)
# getattr(w, 'import')(bkg_yield)
# getattr(w, 'import')(sig_yield)
# Generate toy dataset based on background
m_tau_var_bkg = ws_sig.var("mTau")
n_bkg_exp = bkg_pdf.expectedEvents(ROOT.RooArgSet(m_tau_var_bkg))*lumiSF
toy_ds = bkg_pdf.generate(ROOT.RooArgSet(m_tau_var_bkg), n_bkg_exp)
getattr(w, 'import')(m_tau_var_bkg, ROOT.RooFit.Rename("mTau_bkg"))
getattr(w, 'import')(toy_ds, ROOT.RooFit.Rename("toy_ds"))
This diff is collapsed.
......@@ -6,8 +6,10 @@ Belle2 :
OSmu :
treeName : "tauPmuM"
NsigGen : 19806988
fitRange : [1.0, 2.5]
cuts :
bkg : "BKG_all_CUT == 1 and (t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0)))"
bkg : "BKG_all_CUT == 1"
pi0Veto : "t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0))"
mTau : "mTau > 1 and mTau < 2.5"
JPsiVeto : "not(invM_EllT > 3.0 and invM_EllT < 3.15)"
KpipiVeto : "not((invM_KstEll > 1.83) and (invM_KstEll < 1.9))"
......@@ -33,8 +35,10 @@ Belle2 :
SSmu :
treeName : "tauMmuP"
NsigGen : 19807020
fitRange : [1.0, 2.5]
cuts :
bkg : "BKG_all_CUT == 1 and (t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0)))"
bkg : "BKG_all_CUT == 1"
pi0Veto : "t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0))"
mTau : "mTau > 1 and mTau < 2.5"
JPsiVeto : "not(invM_EllT > 3.0 and invM_EllT < 3.15)"
KpipiVeto : "not((invM_KstT > 1.83) and (invM_KstT < 1.9))"
......@@ -59,8 +63,10 @@ Belle2 :
OSe :
treeName : "tauPeM"
NsigGen : 19806978
fitRange : [1.0, 2.5]
cuts :
bkg : "BKG_all_CUT == 1 and (t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0)))"
bkg : "BKG_all_CUT == 1"
pi0Veto : "t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0))"
mTau : "mTau > 1 and mTau < 2.5"
gammaVeto : "invM_EllT > 0.15"
JPsiVeto : "not(invM_EllT > 3.0 and invM_EllT < 3.15)"
......@@ -86,8 +92,10 @@ Belle2 :
SSe :
treeName : "tauMeP"
NsigGen : 19806979
fitRange : [1.0, 2.5]
cuts :
bkg : "BKG_all_CUT == 1 and (t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0)))"
bkg : "BKG_all_CUT == 1"
pi0Veto : "t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0))"
mTau : "mTau > 1 and mTau < 2.5"
gammaVeto : "invM_EllT > 0.15"
JPsiVeto : "not(invM_EllT > 3.0 and invM_EllT < 3.15)"
......@@ -120,8 +128,10 @@ Belle :
OSmu :
treeName : "tauPmuM"
NsigGen : 15296662
fitRange : [1.0, 2.5]
cuts :
bkg : "BKG_all_CUT == 1 and (t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0)))"
bkg : "BKG_all_CUT == 1"
pi0Veto : "t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0))"
mTau : "mTau > 1 and mTau < 2.5"
JPsiVeto : "not(invM_EllT > 3.0 and invM_EllT < 3.15)"
KpipiVeto : "not((invM_KstEll > 1.83) and (invM_KstEll < 1.9))"
......@@ -147,8 +157,10 @@ Belle :
SSmu :
treeName : "tauMmuP"
NsigGen : 15248521
fitRange : [1.0, 2.5]
cuts :
bkg : "BKG_all_CUT == 1 and (t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0)))"
bkg : "BKG_all_CUT == 1"
pi0Veto : "t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0))"
mTau : "mTau > 1 and mTau < 2.5"
JPsiVeto : "not(invM_EllT > 3.0 and invM_EllT < 3.15)"
KpipiVeto : "not((invM_KstT > 1.83) and (invM_KstT < 1.9))"
......@@ -173,8 +185,10 @@ Belle :
OSe :
treeName : "tauPeM"
NsigGen : 15210625
fitRange : [1.0, 2.5]
cuts :
bkg : "BKG_all_CUT == 1 and (t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0)))"
bkg : "BKG_all_CUT == 1"
pi0Veto : "t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0))"
mTau : "mTau > 1 and mTau < 2.5"
gammaVeto : "invM_EllT > 0.15"
JPsiVeto : "not(invM_EllT > 3.0 and invM_EllT < 3.15)"
......@@ -200,8 +214,10 @@ Belle :
SSe :
treeName : "tauMeP"
NsigGen : 15297124
fitRange : [1.0, 2.5]
cuts :
bkg : "BKG_all_CUT == 1 and (t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0)))"
bkg : "BKG_all_CUT == 1"
pi0Veto : "t1prong_mode !=1 or (t1prong_mode == 1 and not(n_pi0_inroe > 0))"
mTau : "mTau > 1 and mTau < 2.5"
gammaVeto : "invM_EllT > 0.15"
JPsiVeto : "not(invM_EllT > 3.0 and invM_EllT < 3.15)"
......@@ -231,7 +247,7 @@ varsToLoad :
general : ['__weight__', 'mTau', 't1prong_mode','Bt1prong_trackDR','Bt1prong_trackDZ']
selection : ['BKG_all_CUT','sphericity','Btag_cosTBTO','Btag_deltaE','Btag_fei_logP','Kst_M','Btag_Mbc',
'nROE_Charged_cleanMask','n_pi0_inroe','invM_EllT','invM_KstEll','invM_KstT']
truthmatching : ['Bsig_isSignalAcceptMissing','Btag_isSignal','Bt1prong_isSignal','ell_PDG','ell_mcPDG','Kst_isSignal','ell_mcErrors']
truthmatching : ['Bsig_isSignalAcceptMissing','Btag_isSignal','Bt1prong_isSignal','ell_PDG','ell_mcPDG','Kst_isSignal','ell_mcErrors','Bt1prong_mcPDG','Bt1prong_moth']
lidCorrectionsB2 : ['ell_weight_muonID_noSVD_eff_FixedThresh09','ell_weight_pidChargedBDTScore_e_eff_FixedThresh09']
lidCorrectionsB1 : []
feiCorrections : ['Btag_fei_mode']
......
analysis_steps :
measure_efficiency : True
plot_vars_MC : True
train_BDT : True
fit_mTau : False
plot_vars_MC : False
train_BDT : False
fit_mTau : True
calc_UL : False
experiments : ["Belle2"] #, "Belle"]
channels : ['OSmu'] #,'SSmu','OSe','SSe']
channels : ['OSmu','SSmu','OSe','SSe']
verbose : True
option : '' #'inclTau' #'' for no particular option
vars_to_plot :
### general variables ###
......@@ -28,6 +30,28 @@ vars_to_plot :
var_lab : '$\#\pi^0(ROE)$'
range : [0,6]
nBins : 6
### tests ###
t1prong_mcPDG :
var_name : 'Bt1prong_mcPDG'
var_lab : '$\tau$ mcPDG'
range : [-250,250]
nBins : 250
t1prong_moth :
var_name : 'Bt1prong_moth'
var_lab : '$\tau$ mother'
range : [-500,500]
nBins : 500
t1prong_isSignal :
var_name : 'Bt1prong_isSignal'
var_lab : '$\tau$ isSignal'
range : [0,2]
nBins : 2
Btag_isSignal :
var_name : 'Btag_isSignal'
var_lab : 'Btag isSignal'
range : [0,2]
nBins : 2
### preselection ###
# Btag_cosTBTO : r'cosTBTO'
......
% TTree name
{
tauPmuM
}
% Component analysis --- cascade decay branches of particles
{
B0 B0 - 3
anti-B0 aB0 - 3
}
% Component analysis --- inclusive decay branches
{
}
% Process charge conjugate objects together (Two options: Y and N. Default: N)
{
Y
}
% Ignore ISR photons (Three options: Ys, Yg and N. Default: N)
{
Yg
}
% Ignore FSR photons (Three options: Ys, Yg and N. Default: N)
{
Yg
}
# Here, AOI, VOI, MSI, MSF, MSD, and MSID are short for array of integers, vector of integers, multiple scalar integers, multiple scalar floats, multiple scalar doubles, and multiple scalar integer (nMCGen) and doubles (MCGenPDG_* and MCGenMothIndex_*), respectively.
% Storage type of input raw topology truth information (Six options: AOI, VOI, MSI, MSF, MSD, and MSID. Default: AOI)
{
MSID
}
% Cut to select entries
{
(__candidate__==0)
}
% Signal identification --- particles
{
}
% Component analysis --- production branches of particles
{
}
% Remove input tbranches from output root files (Two options: Y and N. Default: N)
{
Y
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment