8 from ambiguity_solver_network
import prepareDataSet
12 """Read the dataset from the different file, remove the pure duplicate tracks and combine the datasets"""
14 @param[in] CKS_files: DataFrame contain the data from each track files (1 file per events usually)
15 @return: combined DataFrame containing all the track, ordered by events and then by truth particle ID in each event
20 datafile = pd.read_csv(f)
30 CKF_files_track = sorted(
31 glob.glob(
"odd_output" +
"/event0000000[0-9][0-9]-tracks_ckf.csv")
33 CKF_files_resolved = sorted(
34 glob.glob(
"odd_output" +
"/event0000000[0-9][0-9]-tracks_ambiML.csv")
53 for trackEvent, resolvedEvent
in zip(data_track, data_resolved):
54 nb_part += trackEvent.loc[trackEvent[
"good/duplicate/fake"] ==
"good"].shape[0]
55 nb_track += trackEvent.shape[0]
56 nb_fake += trackEvent.loc[trackEvent[
"good/duplicate/fake"] ==
"fake"].shape[0]
57 nb_duplicate += trackEvent.loc[
58 trackEvent[
"good/duplicate/fake"] ==
"duplicate"
63 trackEvent.loc[trackEvent[
"good/duplicate/fake"] ==
"good"],
73 "good/duplicate/fake",
79 merged[
"exists"] = np.where(merged.exists ==
"both",
True,
False)
80 merged.to_csv(path_or_buf=
"merged.csv")
82 nb_good_match += merged.loc[merged[
"exists"] ==
True].shape[0]
83 nb_reco_fake += resolvedEvent.loc[
84 resolvedEvent[
"good/duplicate/fake"] ==
"fake"
86 nb_reco_duplicate += resolvedEvent.loc[
87 resolvedEvent[
"good/duplicate/fake"] ==
"duplicate"
89 nb_reco_part += resolvedEvent.loc[
90 resolvedEvent[
"good/duplicate/fake"] !=
"fake"
92 nb_reco_track += resolvedEvent.shape[0]
94 print(
"===Initial efficiencies===")
95 print(
"nb particles : ", nb_part)
96 print(
"nb track : ", nb_track)
97 print(
"duplicate rate: ", 100 * nb_duplicate / nb_track,
" %")
98 print(
"Fake rate: ", 100 * nb_fake / nb_track,
" %")
100 print(
"===computed efficiencies===")
101 print(
"nb particles : ", nb_part)
102 print(
"nb good match : ", nb_good_match)
103 print(
"nb particle reco : ", nb_reco_part)
104 print(
"nb track reco : ", nb_reco_track)
105 print(
"Efficiency (good track) : ", 100 * nb_good_match / nb_part,
" %")
106 print(
"Efficiency (particle reco) : ", 100 * nb_reco_part / nb_part,
" %")
107 print(
"duplicate rate: ", 100 * nb_reco_duplicate / nb_reco_track,
" %")
108 print(
"Fake rate: ", 100 * nb_reco_fake / nb_reco_track,
" %")