Analysis Software
Documentation for sPHENIX simulation software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SVM_v1.py
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file SVM_v1.py
1 #import libraries
2 import numpy as np
3 
4 import matplotlib.pyplot as plt
5 from matplotlib import style
6 style.use("ggplot")
7 
8 from sklearn import svm
9 
10 import itertools
11 
12 # get data - format: event PID momentum E_Ecal E_HcalInner
13 csv_e = np.genfromtxt ('testdata/set001/CalorimeterResponseHijingCentralRapidity_e_8GeV.csv')
14 csv_pi = np.genfromtxt ('testdata/set001/CalorimeterResponseHijingCentralRapidity_pi_8GeV.csv')
15 
16 # set pid column (NaN) to 0
17 csv_e[:,1] = 0
18 csv_pi[:,1] = 0
19 
20 # separate Momentum, Ecal and Hcal energy columns from rest
21 data_e_sub = csv_e[:,2:5]
22 data_pi_sub = csv_pi[:,2:5]
23 
24 # split data in train and test data
25 data_e_train = data_e_sub[0:500,:]
26 data_e_test = data_e_sub[500:1000,:]
27 
28 data_pi_train = data_pi_sub[0:500,:]
29 data_pi_test = data_pi_sub[500:1000,:]
30 
31 # merge electrons and pions
32 data_train = np.vstack((data_e_train,data_pi_train))
33 data_test = np.vstack((data_pi_test,data_e_test))
34 
35 # pid integer: 0 = pion, 1 = electroon
36 id_pi = 0
37 id_e = 1
38 pid_train = np.vstack(( np.ones((500,1)) , np.zeros((500,1)) ))
39 pid_test = np.vstack(( np.zeros((500,1)) , np.ones((500,1)) ))
40 
41 # reshape data to work as input for SVM
42 X = data_train[:,1:3]
43 y = np.ravel(pid_train)
44 h = .001 # step size in the mesh
45 
46 # prepare and train SVM
47 C = 1.0 # SVM regularization parameter
48 svc = svm.SVC(kernel='linear', C=C).fit(X, y)
49 rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(X, y)
50 poly_svc = svm.SVC(kernel='poly', degree=3, C=C).fit(X, y)
51 lin_svc = svm.LinearSVC(C=C).fit(X, y)
52 
53 # create a mesh to plot in
54 x_min, x_max = X[:, 0].min() - 0.01, X[:, 0].max() + 0.01
55 y_min, y_max = X[:, 1].min() - 0.01, X[:, 1].max() + 0.01
56 xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
57  np.arange(y_min, y_max, h))
58 
59 # title for the plots
60 titles = ['SVC with linear kernel',
61  'LinearSVC (linear kernel)',
62  'SVC with RBF kernel',
63  'SVC with polynomial (degree 3) kernel']
64 
65 # plot decision boundaries and test data
66 for i, clf in enumerate((svc, lin_svc, rbf_svc, poly_svc)):
67  # Plot the decision boundary. For that, we will assign a color to each
68  # point in the mesh [x_min, m_max]x[y_min, y_max].
69  plt.subplot(2, 2, i + 1)
70  plt.subplots_adjust(wspace=0.4, hspace=0.4)
71 
72  Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
73 
74  # Put the result into a color plot
75  Z = Z.reshape(xx.shape)
76  plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
77 
78  # Plot also the training points
79  plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)
80  plt.xlabel('energy EMCAL')
81  plt.ylabel('energy HCAL (in)')
82  plt.xlim(xx.min(), xx.max())
83  plt.ylim(yy.min(), yy.max())
84  plt.xticks(())
85  plt.yticks(())
86  plt.title(titles[i])
87 
88 plt.show()
89 
90 for i, clf in enumerate((svc, lin_svc, rbf_svc, poly_svc)):
91 
92  pid_predict = clf.predict( data_test[:,1:3] )
93 
94  # check prediction against truth
95  count_all = 0
96  count_electron_as_electron = 0
97  count_pion_as_pion = 0
98  count_electron_as_pion = 0
99  count_pion_as_electron = 0
100  count_true_electron = 0
101  count_true_pion = 0
102 
103  for pid_pred, pid_true in itertools.izip(pid_predict, pid_test):
104 
105  count_all += 1
106 
107  if (pid_pred == id_e) & (pid_true == id_e):
108  count_electron_as_electron += 1
109  count_true_electron += 1
110  elif (pid_pred == id_pi) & (pid_true == id_pi):
111  count_pion_as_pion += 1
112  count_true_pion += 1
113  elif (pid_pred == id_pi) & (pid_true == id_e):
114  count_electron_as_pion += 1
115  count_true_electron += 1
116  elif (pid_pred == id_e) & (pid_true == id_pi):
117  count_pion_as_electron += 1
118  count_true_pion += 1
119  else:
120  pass
121 
122  print "-----------------------------------"
123  print clf
124  print "-----------------------------------"
125  print "Electrons identified as electrons: %d out of %d" % (count_electron_as_electron, count_true_electron)
126  print "Pions identified as electrons: %d out of %d" % (count_pion_as_electron, count_true_pion)
127  print "-----------------------------------"
128  print "Pions identified as pions: %d out of %d" % (count_pion_as_pion, count_true_pion)
129  print "Electrons identified as pions: %d out of %d" % (count_electron_as_pion, count_true_electron)
130  print "-----------------------------------"
131