Analysis Software
Documentation for sPHENIX simulation software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ambiguity_solver_perf.py
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file ambiguity_solver_perf.py
1 import glob
2 import os
3 import math
4 
5 import pandas as pd
6 import numpy as np
7 
8 from ambiguity_solver_network import prepareDataSet
9 
10 
11 def readDataSet(CKS_files: list[str]) -> pd.DataFrame:
12  """Read the dataset from the different file, remove the pure duplicate tracks and combine the datasets"""
13  """
14  @param[in] CKS_files: DataFrame contain the data from each track files (1 file per events usually)
15  @return: combined DataFrame containing all the track, ordered by events and then by truth particle ID in each event
16  """
17  globalindex = 0
18  data = []
19  for f in CKS_files:
20  datafile = pd.read_csv(f)
21  datafile = prepareDataSet(datafile)
22  # Combine dataset
23  data.append(datafile)
24  return data
25 
26 
27 # ==================================================================
28 
29 # CSV files to be compared, do not forget to sort them
30 CKF_files_track = sorted(
31  glob.glob("odd_output" + "/event0000000[0-9][0-9]-tracks_ckf.csv")
32 )
33 CKF_files_resolved = sorted(
34  glob.glob("odd_output" + "/event0000000[0-9][0-9]-tracks_ambiML.csv")
35 )
36 
37 data_track = readDataSet(CKF_files_track)
38 data_resolved = readDataSet(CKF_files_resolved)
39 
40 # Compute the algorithm performances
41 nb_part = 0
42 nb_track = 0
43 nb_fake = 0
44 nb_duplicate = 0
45 
46 nb_good_match = 0
47 nb_reco_part = 0
48 nb_reco_fake = 0
49 nb_reco_duplicate = 0
50 nb_reco_track = 0
51 
52 # Compute the different efficiencies
53 for trackEvent, resolvedEvent in zip(data_track, data_resolved):
54  nb_part += trackEvent.loc[trackEvent["good/duplicate/fake"] == "good"].shape[0]
55  nb_track += trackEvent.shape[0]
56  nb_fake += trackEvent.loc[trackEvent["good/duplicate/fake"] == "fake"].shape[0]
57  nb_duplicate += trackEvent.loc[
58  trackEvent["good/duplicate/fake"] == "duplicate"
59  ].shape[0]
60 
61  # Merge two dataFrames and add indicator column
62  merged = pd.merge(
63  trackEvent.loc[trackEvent["good/duplicate/fake"] == "good"],
64  resolvedEvent,
65  on=[
66  "particleId",
67  "nStates",
68  "nMeasurements",
69  "nOutliers",
70  "nHoles",
71  "ndf",
72  "chi2/ndf",
73  "good/duplicate/fake",
74  ],
75  how="left",
76  indicator="exists",
77  )
78  # Add column to show if each row in first DataFrame exists in second
79  merged["exists"] = np.where(merged.exists == "both", True, False)
80  merged.to_csv(path_or_buf="merged.csv")
81 
82  nb_good_match += merged.loc[merged["exists"] == True].shape[0]
83  nb_reco_fake += resolvedEvent.loc[
84  resolvedEvent["good/duplicate/fake"] == "fake"
85  ].shape[0]
86  nb_reco_duplicate += resolvedEvent.loc[
87  resolvedEvent["good/duplicate/fake"] == "duplicate"
88  ].shape[0]
89  nb_reco_part += resolvedEvent.loc[
90  resolvedEvent["good/duplicate/fake"] != "fake"
91  ].index.nunique()
92  nb_reco_track += resolvedEvent.shape[0]
93 
94 print("===Initial efficiencies===")
95 print("nb particles : ", nb_part)
96 print("nb track : ", nb_track)
97 print("duplicate rate: ", 100 * nb_duplicate / nb_track, " %")
98 print("Fake rate: ", 100 * nb_fake / nb_track, " %")
99 
100 print("===computed efficiencies===")
101 print("nb particles : ", nb_part)
102 print("nb good match : ", nb_good_match)
103 print("nb particle reco : ", nb_reco_part)
104 print("nb track reco : ", nb_reco_track)
105 print("Efficiency (good track) : ", 100 * nb_good_match / nb_part, " %")
106 print("Efficiency (particle reco) : ", 100 * nb_reco_part / nb_part, " %")
107 print("duplicate rate: ", 100 * nb_reco_duplicate / nb_reco_track, " %")
108 print("Fake rate: ", 100 * nb_reco_fake / nb_reco_track, " %")