Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 35 additions & 35 deletions machine_learning_hep/analysis/analyzer_jets.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions machine_learning_hep/analysis/do_systematics.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def __init__(self, path_database_analysis: str, typean: str, var: str):
elif binning := self.cfg(f'observables.{var}.bins_fix'):
bins_tmp = bin_array(*binning)
else:
self.logger.error('no binning specified for %s, using defaults', var)
self.logger.error('No binning specified for %s, using defaults', var)
bins_tmp = bin_array(10, 0., 1.)
binning_obs_rec = bins_tmp
self.n_bins_obs_rec = len(binning_obs_rec) - 1
Expand All @@ -151,7 +151,7 @@ def __init__(self, path_database_analysis: str, typean: str, var: str):
elif binning := self.cfg(f'observables.{var}.bins_fix'):
bins_tmp = bin_array(*binning)
else:
self.logger.error('no binning specified for %s, using defaults', var)
self.logger.error('No binning specified for %s, using defaults', var)
bins_tmp = bin_array(10, 0., 1.)
binning_obs_gen = bins_tmp
self.n_bins_obs_gen = len(binning_obs_gen) - 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# You should have received a copy of the GNU General Public License #
# along with this program. if not, see <https://www.gnu.org/licenses/>. #

D0jet_pp:
D0Jet_pp:
doml: true
mass: 1.864
sel_reco_unp: "fPt > 1."
Expand Down
6 changes: 3 additions & 3 deletions machine_learning_hep/globalfitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ def update_check_signal_fit(self):
return ""

def derive_yields(self):
self.logger.info("calculate signal, backgroud, S/B, significance")
self.logger.info("Calculate signal, backgroud, S/B, significance")
self.mean_fit = self.sig_fit_func.GetParameter(1)
self.mean_err_fit = self.sig_fit_func.GetParError(1)
# Could be negative together with the integral pre-factor
Expand Down Expand Up @@ -393,7 +393,7 @@ def fit(self):
self.logger.info("Initial parameters for signal fit are")
print(f"mean = {self.mean}\nsigma = {self.sigma}")

self.logger.debug("fit background (just side bands)")
self.logger.debug("Fit background (just side bands)")
self.histo_to_fit.Fit(self.bkg_sideband_fit_func, ("R,%s,+,0" % (self.fit_options)))

# Prepare a function to store the signal parameters which will finally be extracted
Expand All @@ -413,7 +413,7 @@ def fit(self):
self.sig_fit_func.SetParameter(1, self.mean)
self.sig_fit_func.SetParameter(2, self.sigma)

self.logger.info("fit all (signal + background)")
self.logger.info("Fit all (signal + background)")
self.tot_fit_func.SetLineColor(4)
parmin = Double()
parmax = Double()
Expand Down
2 changes: 1 addition & 1 deletion machine_learning_hep/multiprocesser.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def multi_histomass(self):
for indexp, _ in enumerate(self.process_listsample):
if self.p_useperiod[indexp] == 1:
self.process_listsample[indexp].process_histomass()
self.logger.debug('merging all')
self.logger.debug('Merging all')
with tempfile.TemporaryDirectory() as tmp_merged_dir:
mergerootfiles(self.lper_filemass, self.filemass_mergedall, tmp_merged_dir)

Expand Down
4 changes: 2 additions & 2 deletions machine_learning_hep/optimiser.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def __init__(self, data_param, case, typean, model_config, binmin,
self.f_mltest_applied = f"{self.dirmlout}/testsample_{self.s_suffix}_mldecision.pkl"
self.df_mltest_applied = None

self.logger.info('training variables: %s', training_var)
self.logger.info('Training variables: %s', training_var)

def create_suffix(self):
string_selection = createstringselection(self.v_bin, self.p_binmin, self.p_binmax)
Expand Down Expand Up @@ -249,7 +249,7 @@ def prepare_data_mc_mcgen(self):


def preparesample(self): # pylint: disable=too-many-branches
self.logger.info("Prepare Sample")
self.logger.info("Prepare sample")

filename_train = \
os.path.join(self.dirmlout, f"df_train_{self.p_binmin}_{self.p_binmax}.pkl")
Expand Down
22 changes: 11 additions & 11 deletions machine_learning_hep/processer.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ def dfmerge(dfl, dfr, **kwargs):
try:
return pd.merge(dfl, dfr, **kwargs)
except Exception as e:
self.logger.error('merging failed: %s', str(e))
self.logger.error('Merging failed: %s', str(e))
dfl.info()
dfr.info()
raise e
Expand All @@ -339,20 +339,20 @@ def dfuse(df_spec):
(level in ('mc', 'gen', 'det') and self.mcordata == 'mc') or
(level in ('data') and self.mcordata == 'data'))

self.logger.info('unpacking: %s', self.l_root[file_index])
self.logger.info('Unpacking: %s', self.l_root[file_index])
dfs = {}
self.logger.debug(' -> reading')
with uproot.open(self.l_root[file_index]) as rfile:
df_processed = set()
keys = rfile.keys(recursive=False, filter_name='DF_*')
self.logger.info('found %d dataframes, reading %s', len(keys), max_no_keys or "all")
self.logger.info('Found %d dataframes, reading %s', len(keys), max_no_keys or "all")
for (idx, key) in enumerate(keys[:max_no_keys]):
if not (df_key := re.match('^DF_(\\d+);', key)):
continue
if (df_no := int(df_key.group(1))) in df_processed:
self.logger.warning('multiple versions of DF %d', df_no)
self.logger.warning('Multiple versions of DF %d', df_no)
continue
self.logger.debug('processing DF %d - %d / %d', df_no, idx, len(keys))
self.logger.debug('Processing DF %d - %d / %d', df_no, idx, len(keys))
df_processed.add(df_no)
rdir = rfile[key]

Expand Down Expand Up @@ -410,19 +410,19 @@ def dfuse(df_spec):
out = m_spec.get('out', base)
if all([dfuse(self.df_read[base]), dfuse(self.df_read[ref])]):
if (on := m_spec.get('use', None)) is not None:
self.logger.info('merging %s with %s on %s into %s', base, ref, on, out)
self.logger.info('Merging %s with %s on %s into %s', base, ref, on, out)
if not isinstance(on, list) or 'df' not in on:
on = ['df', on]
dfs[out] = dfmerge(dfs[base], dfs[ref], on=on)
elif (on := m_spec.get('left_on', None)) is not None:
self.logger.info('merging %s with %s on %s into %s', base, ref, on, out)
self.logger.info('Merging %s with %s on %s into %s', base, ref, on, out)
if not is_numeric_dtype(dfs[base][on]):
self.logger.info('exploding dataframe %s on variable %s', base, on)
self.logger.info('Exploding dataframe %s on variable %s', base, on)
dfs[base] = dfs[base].explode(on)
dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', on], right_index=True)
else:
var = self.df_read[ref]['index']
self.logger.info('merging %s with %s on %s (default) into %s', base, ref, var, out)
self.logger.info('Merging %s with %s on %s (default) into %s', base, ref, var, out)
dfs[out] = dfmerge(dfs[base], dfs[ref], left_on=['df', var], right_index=True)
if 'extra' in m_spec:
self.logger.debug(' %s -> extra', out)
Expand All @@ -432,7 +432,7 @@ def dfuse(df_spec):
if self.df_write:
for df_name, df_spec in self.df_write.items():
if dfuse(df_spec):
self.logger.info('writing %s to %s', df_name, df_spec['file'])
self.logger.info('Writing %s to %s', df_name, df_spec['file'])
src = df_spec.get('source', df_name)
dfo = dfquery(dfs[src], df_spec.get('filter', None))
path = os.path.join(self.d_pkl, self.l_path[file_index], df_spec['file'])
Expand Down Expand Up @@ -530,7 +530,7 @@ def process_applymodel_par(self):
self.parallelizer(self.applymodel, arguments, self.p_chunksizeskim)

def process_mergeforml(self):
self.logger.info("doing merging for ml %s %s", self.mcordata, self.period)
self.logger.info("Doing merging for ml %s %s", self.mcordata, self.period)
indices_for_evt = []
for ipt in range(self.p_nptbins):
nfiles = len(self.mptfiles_recosk[ipt])
Expand Down
16 changes: 8 additions & 8 deletions machine_learning_hep/processer_jet.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self, case, datap, run_param, mcordata, p_maxfiles, # pylint: disab
elif binning := self.cfg(f'observables.{v}.bins_fix'):
self.binarrays_obs[level][v] = bin_array(*binning)
else:
self.logger.error('no binning specified for %s, using defaults', v)
self.logger.error('No binning specified for %s, using defaults', v)
self.binarrays_obs[level][v] = bin_array(10, 0., 1.)

if binning := self.cfg(f'observables.{v}.bins_ptjet'):
Expand Down Expand Up @@ -121,7 +121,7 @@ def _verify_variables(self, dfi):


def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name
self.logger.info('calculating variables')
self.logger.info('Calculating variables')
if len(df) == 0:
return df
df['dr'] = np.sqrt((df.fJetEta - df.fEta)**2 + ((df.fJetPhi - df.fPhi + math.pi) % math.tau - math.pi)**2)
Expand Down Expand Up @@ -150,7 +150,7 @@ def _calculate_variables(self, df, verify=False): # pylint: disable=invalid-name
(lambda ar: np.log(ar.fPtSubLeading * np.sin(ar.fTheta))), axis=1)
df['lntheta'] = df['fTheta'].apply(lambda x: -np.log(x))
# df['lntheta'] = np.array(-np.log(df.fTheta))
self.logger.debug('done')
self.logger.debug('Done')
if verify:
self._verify_variables(df)
return df
Expand All @@ -175,7 +175,7 @@ def process_histomass_single(self, index):
dfcollcnt = read_df(self.l_collcnt[index])
ser_collcnt = dfcollcnt[self.cfg('cnt_events_read', 'fReadSelectedCounts')]
collcnt = functools.reduce(lambda x,y: float(x)+float(y), (ar[0] for ar in ser_collcnt))
self.logger.info('sampled %g collisions', collcnt)
self.logger.info('Sampled %g collisions', collcnt)
histonorm.SetBinContent(2, collcnt)
get_axis(histonorm, 0).SetBinLabel(1, 'N_{evt}')
get_axis(histonorm, 0).SetBinLabel(2, 'N_{coll}')
Expand Down Expand Up @@ -225,10 +225,10 @@ def process_histomass_single(self, index):
self._calculate_variables(df)

for obs, spec in self.cfg('observables', {}).items():
self.logger.debug('preparing histograms for %s', obs)
self.logger.debug('Preparing histograms for %s', obs)
var = obs.split('-')
if not all(v in df for v in var):
self.logger.error('dataframe does not contain %s', var)
self.logger.error('Dataframe does not contain %s', var)
continue
h = create_hist(
f'h_mass-ptjet-pthf-{obs}',
Expand Down Expand Up @@ -347,12 +347,12 @@ def process_efficiency_single(self, index):
if '-' in var or self.cfg(f'observables.{var}.arraycols'):
continue
if self.cfg('closure.use_matched'):
self.logger.info('using matched for truth')
self.logger.info('Using matched for truth')
df_mcana, _ = self.split_df(dfmatch[cat], self.cfg('frac_mcana', .2))
else:
df_mcana, _ = self.split_df(dfgen[cat], self.cfg('frac_mcana', .2))
if f := self.cfg('closure.exclude_feeddown_gen'):
self.logger.debug('excluding feeddown gen')
self.logger.debug('Excluding feeddown gen')
dfquery(df_mcana, f, inplace=True)
fill_hist(h_mctruth[(cat, var)], df_mcana[['fJetPt_gen', 'fPt_gen', f'{var}_gen']])

Expand Down
2 changes: 1 addition & 1 deletion machine_learning_hep/steer_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def do_entire_analysis(data_config: dict, data_param: dict, data_param_overwrite
exdirs.extend(checkdirs(dirresultsdatatot))

if len(exdirs) > 0:
logger.info('existing directories must be deleted')
logger.info('Existing directories must be deleted')
for d in exdirs:
print(f'rm -rf {d}')
delete = False
Expand Down
8 changes: 4 additions & 4 deletions machine_learning_hep/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,16 @@ def openfile(filename, attr):


def write_df(dfo, path):
logger.debug("writing df to <%s>", path)
logger.debug("Writing df to <%s>", path)
if path.endswith(".parquet"):
start = time.time()
dfo.to_parquet(path)
logger.debug("written to parquet in %.2f s", time.time() - start)
logger.debug("Written to parquet in %.2f s", time.time() - start)
else:
start = time.time()
with openfile(path, "wb") as file:
pickle.dump(dfo, file, pickle.HIGHEST_PROTOCOL)
logger.debug("written to pickle in %.2f s", time.time() - start)
logger.debug("Written to pickle in %.2f s", time.time() - start)


def read_df(path, **kwargs):
Expand All @@ -100,7 +100,7 @@ def read_df(path, **kwargs):
else:
df = pickle.load(openfile(path, "rb"))
except Exception as e: # pylint: disable=broad-except
logger.critical("failed to open file <%s>: %s", path, str(e))
logger.critical("Failed to open file <%s>: %s", path, str(e))
sys.exit()
return df

Expand Down
6 changes: 3 additions & 3 deletions machine_learning_hep/utilities_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def list_folders(main_dir: str, filenameinput: str, maxfiles: int, select=None):
:param select: iterable of substrings that must be contained in folders
"""
if not os.path.isdir(main_dir):
logger.error("input directory <%s> does not exist", main_dir)
logger.error("Input directory <%s> does not exist", main_dir)

files = glob.glob(f"{main_dir}/**/{filenameinput}", recursive=True)
listfolders = [os.path.relpath(os.path.dirname(file), main_dir) for file in files]
Expand Down Expand Up @@ -78,7 +78,7 @@ def checkmakedir(mydir: str):
if os.path.exists(mydir):
logger.warning("Using existing folder %s", mydir)
return
logger.debug("creating folder %s", mydir)
logger.debug("Creating folder %s", mydir)
os.makedirs(mydir)


Expand All @@ -101,7 +101,7 @@ def delete_dir(path: str):
try:
shutil.rmtree(path)
except OSError:
logger.error("Error: Failed to delete directory %s", path)
logger.error("Failed to delete directory %s", path)
return False
return True

Expand Down
Loading