Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
c84547a
OK, at least I can write one pt-rap hist
conformist89 Sep 16, 2021
303acfb
Can write few hists to out file, need to test tomorrow
conformist89 Sep 16, 2021
6f06aaa
Can plot variables distributions for signal and background, train and…
conformist89 Sep 17, 2021
cef708d
Plotted roc curve as root object
conformist89 Sep 17, 2021
6e02ce3
Set graph titles
conformist89 Sep 17, 2021
f4d3293
Corrected train roc-curve's title
conformist89 Sep 17, 2021
b4f1162
Correlation matrix using hipe4ml (but need to check carefully order)
conformist89 Sep 17, 2021
aae6b0c
Changed label's font size
conformist89 Oct 4, 2021
6a45ae5
Set lablel title for output file
conformist89 Oct 4, 2021
896f853
Hipe4ML based bayesian optimization, train, test and model saving
conformist89 Oct 13, 2021
9a13d68
Applied train and test predictions, got distributions and estimated m…
conformist89 Oct 15, 2021
8cdda43
Model is private variable
conformist89 Oct 15, 2021
ea16a12
Save model as treelite library
conformist89 Oct 18, 2021
9cc8952
Added getter to return a model
conformist89 Oct 18, 2021
c854ccb
Deleted a comment
conformist89 Oct 18, 2021
424fa9f
Distributions to root file
conformist89 Oct 18, 2021
41f74f8
Deleted files with an old version of XGBoost code
conformist89 Oct 18, 2021
d30b1ea
Quality insurance added
conformist89 Oct 21, 2021
be89cc7
Added names of roc plots
conformist89 Oct 21, 2021
1695282
Reading information from config files
conformist89 Oct 25, 2021
4ac5999
Changed config reader module's name
conformist89 Oct 25, 2021
48e7123
Put files to installation folder
conformist89 Oct 25, 2021
c5770cf
Setip tools added
conformist89 Oct 25, 2021
1905959
MIT licwnse added
conformist89 Oct 25, 2021
c650800
Added files
conformist89 Oct 25, 2021
6e74feb
Deleted old folder
conformist89 Oct 25, 2021
bd9fab2
Deleted old file
conformist89 Oct 25, 2021
140630d
Added requirements
conformist89 Oct 26, 2021
dcde05f
Added hipe4ml to requirements
conformist89 Oct 26, 2021
dd326cb
New module for saving hists as ROOT objects
conformist89 Nov 10, 2021
debe349
Chabged argument of transform_df_to_log. Doesn't read config file to …
conformist89 Nov 10, 2021
399394f
Deleted part with saving hists as ROOT objects(moved to separate hist…
conformist89 Nov 10, 2021
33dde1f
Added treelite(to convert predictions to C++ libraty) to dependencies
conformist89 Nov 10, 2021
bd1481b
Matplotlib was deleted from requirements
conformist89 Nov 16, 2021
6cd450f
Created one method to compute threshold and apply it to dataset
conformist89 Nov 22, 2021
eae53dd
Corrected non-log variables return
conformist89 Nov 22, 2021
dac89de
Changed font sizes
conformist89 Nov 22, 2021
e7bbc38
corrected signal and background diff
conformist89 Nov 22, 2021
02478e1
hist_variables requires dataframe and labels and computes signal and …
conformist89 Dec 3, 2021
0d5cdd6
Changed the way of log transformation
conformist89 Dec 3, 2021
af8cf1e
Only one dataframe is required
conformist89 Dec 3, 2021
7783a23
Added comments to ApplyXGB members
conformist89 Feb 17, 2022
cfef579
One can adjust theshold manually or use optized AMS
conformist89 Feb 17, 2022
b2834b6
get_predictions returns XGBoost BDT predictions, apply_prob_cut retur…
conformist89 Feb 23, 2022
f49e08c
Added roc curve plot
conformist89 Feb 23, 2022
51e16a9
Deleted roc curve plot from AMS function, one can only compute test a…
conformist89 Feb 23, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 0 additions & 22 deletions DFconverter.py

This file was deleted.

21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2021 CandidatesClassifier

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
58 changes: 0 additions & 58 deletions MLconfig_XGboostParameters.py

This file was deleted.

86 changes: 43 additions & 43 deletions MLconfig_variables.py → cand_class/MLconfig_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@
from scipy.stats import binned_statistic as b_s
import matplotlib as mpl

from hipe4ml import plot_utils

def correlation_matrix(bgr, sign, vars_to_draw, leg_labels, output_path):
res_s_b = plot_utils.plot_corr([bgr, sign], vars_to_draw, leg_labels)
res_s_b[0].savefig(output_path+'/'+'corr_matrix_bgr.png')
res_s_b[1].savefig(output_path+'/'+'corr_matrix_sign.png')



def calculate_correlation(df, vars_to_corr, target_var) :
"""
Calculates correlations with target variable variable and standart errors
Expand Down Expand Up @@ -56,18 +65,18 @@ def plot1Dcorrelation(vars_to_draw,var_to_corr, corr_signal, corr_signal_errors,
background covariance standart error of the mean
output_path:
path that contains output plot

"""

fig, ax = plt.subplots(figsize=(20,10))
plt.errorbar(vars_to_draw, corr_signal, yerr=corr_signal_errors, fmt='')
plt.errorbar(vars_to_draw, corr_bg, yerr=corr_bg_errors, fmt='')
fig, ax = plt.subplots(figsize=(10,6))
plt.errorbar(vars_to_draw, corr_signal, yerr=corr_signal_errors, fmt='--o')
plt.errorbar(vars_to_draw, corr_bg, yerr=corr_bg_errors, fmt='--o')
ax.grid(zorder=0)
ax.set_xticklabels(vars_to_draw, fontsize=25, rotation =70)
ax.set_yticklabels([-0.5,-0.4, -0.2,0, -0.2, 0.4], fontsize=25)
plt.legend(('signal','background'), fontsize = 25)
plt.title('Correlation of all variables with '+ var_to_corr+' along with SEM', fontsize = 25)
plt.ylabel('Correlation coefficient', fontsize = 25)
ax.set_xticklabels(vars_to_draw, fontsize=15, rotation =70)
# ax.set_yticklabels([-0.5,-0.4, -0.2,0, -0.2, 0.4], fontsize=25)
ax.yaxis.set_tick_params(labelsize=15)
plt.legend(('signal','background'), fontsize = 15)
plt.title('Correlation of all variables with '+ var_to_corr+' along with SEM', fontsize = 18)
plt.ylabel('Correlation coefficient', fontsize = 15)
fig.tight_layout()
fig.savefig(output_path+'/all_vars_corr-'+ var_to_corr+'.png')

Expand All @@ -78,33 +87,24 @@ def profile_mass(df,variable_xaxis, sign, peak, edge_left, edge_right, pdf_key):
This function takes the entries of the variables and distributes them in 25 bins.
The function then plots the bin centers of the first variable on the x-axis and
the mean values of the bins of the second variable on the y-axis, along with its bin stds.

Parameters
------------------------------------------------
df: pandas.DataFrame
input DataFrame

variable_xaxis: str
variable to be plotted on x axis (invariant mass)

x_unit: str
x axis variable units

variable_yaxis: str
variable to be plotted on y axis

sgn: int(0 or 1)
signal definition(0 background, 1 signal)

pdf_key: matplotlib.backends.backend_pdf.PdfPages
output pdf file

peak: int
invariant mass peak position

edge_left: int
left edge of x axis variable

edge_right: int
left edge of y axis variable
"""
Expand All @@ -119,7 +119,7 @@ def profile_mass(df,variable_xaxis, sign, peak, edge_left, edge_right, pdf_key):
for var in df.columns:
if var != variable_xaxis:

fig, axs = plt.subplots(figsize=(20, 15))
fig, axs = plt.subplots(figsize=(10, 6))

bin_means, bin_edges, binnumber = b_s(df[variable_xaxis],df[var], statistic='mean', bins=25)
bin_std, bin_edges, binnumber = b_s(df[variable_xaxis],df[var], statistic='std', bins=25)
Expand All @@ -134,19 +134,21 @@ def profile_mass(df,variable_xaxis, sign, peak, edge_left, edge_right, pdf_key):
bin_std = np.delete(bin_std , nan_ind)


plt.errorbar(x=bin_centers, y=bin_means, yerr=(bin_std/np.sqrt(bin_count)), linestyle='none', marker='.',mfc='red', ms=10)

plt.errorbar(x=bin_centers, y=bin_means, yerr=(bin_std/np.sqrt(bin_count)), linestyle='none', linewidth = 2, marker='.',mfc='red', ms=15)

plt.locator_params(axis='y', nbins=5)
plt.locator_params(axis='x', nbins=5)

plt.title('Mean of ' +var+ ' plotted versus bin centers of '+variable_xaxis+ \
'('+keyword+')', fontsize=25)
plt.xlabel('Mass', fontsize=25)
plt.ylabel("Mean of each bin with the SEM ($\dfrac{bin\ std}{\sqrt{bin\ count}}$) of bin", fontsize=25)
plt.title('Mean of ' +var+ ' vs bin centers of '+variable_xaxis+ \
'('+keyword+')', fontsize=19)
plt.xlabel('Mass', fontsize=17)
plt.ylabel(" SEM ($\dfrac{bin\ std}{\sqrt{bin\ count}}$) of bin", fontsize=17)


plt.vlines(x=peak,ymin=bin_means.min(),ymax=bin_means.max(), color='r', linestyle='-')

plt.vlines(x=peak,ymin=bin_means.min(),ymax=bin_means.max(), color='r', linestyle='-', linewidth = 3)

axs.xaxis.set_tick_params(labelsize=16)
axs.yaxis.set_tick_params(labelsize=16)
fig.tight_layout()
plt.savefig(pdf_key,format='pdf')

Expand All @@ -160,13 +162,10 @@ def plot2D_all(df, sample, sgn, pdf_key):
------------------------------------------------
df: pandas.DataFrame
input dataframe

sample: str
title of the sample

sgn: int(0 or 1)
signal definition(0 background, 1 signal)

pdf_key: matplotlib.backends.backend_pdf.PdfPages
output pdf file
"""
Expand Down Expand Up @@ -206,48 +205,49 @@ def plot2D_mass(df, sample, mass_var, mass_range, sgn, peak, pdf_key):
------------------------------------------------
df: pandas.DataFrame
input dataframe

sample: str
title of the sample


mass_var: str
name of the invariant mass variable

mass_range: list
mass range to be plotted

sgn: int(0 or 1)
signal definition(0 background, 1 signal)

peak: int
invariant mass value

pdf_key: matplotlib.backends.backend_pdf.PdfPages
output pdf file
"""

for var in df.columns:
if var != mass_var:
fig, axs = plt.subplots(figsize=(15, 10))
fig, axs = plt.subplots(figsize=(6, 4))
cax = plt.hist2d(df[mass_var],df[var],range=[mass_range, [df[var].min(), df[var].max()]], bins=100,
norm=mpl.colors.LogNorm(), cmap=plt.cm.viridis)


if sgn==1:
plt.title('Signal candidates ' + sample, fontsize = 25)
plt.title('Signal candidates ' + sample, fontsize = 15)

if sgn==0:
plt.title('Background candidates ' + sample, fontsize = 25)
plt.title('Background candidates ' + sample, fontsize = 15)


plt.xlabel(mass_var, fontsize=25)
plt.ylabel(var, fontsize=25)
plt.xlabel(mass_var, fontsize=16)
plt.ylabel(var, fontsize=16)

plt.vlines(x=peak,ymin=df[var].min(),ymax=df[var].max(), color='r', linestyle='-')
plt.vlines(x=peak,ymin=df[var].min(),ymax=df[var].max(), color='r', linestyle='-', linewidth = 4)

mpl.pyplot.colorbar()


axs.xaxis.set_tick_params(labelsize=11)
axs.yaxis.set_tick_params(labelsize=11)

plt.locator_params(axis='y', nbins=5)
plt.locator_params(axis='x', nbins=5)


plt.legend(shadow=True,title =str(len(df))+ " samples")

fig.tight_layout()
Expand Down
Loading