3 from pandas.tools.plotting
import scatter_matrix
4 import matplotlib.pyplot
as plt
5 from sklearn
import model_selection
6 from sklearn.metrics
import classification_report
7 from sklearn.metrics
import confusion_matrix
8 from sklearn.metrics
import accuracy_score
9 from sklearn.linear_model
import LogisticRegression, RidgeClassifier
10 from sklearn.tree
import DecisionTreeClassifier
11 from sklearn.neighbors
import KNeighborsClassifier, RadiusNeighborsClassifier
12 from sklearn.discriminant_analysis
import LinearDiscriminantAnalysis
13 from sklearn.discriminant_analysis
import QuadraticDiscriminantAnalysis
14 from sklearn.neural_network
import MLPClassifier
15 from sklearn.naive_bayes
import GaussianNB, MultinomialNB, BernoulliNB
16 from sklearn.gaussian_process
import GaussianProcessClassifier
17 from sklearn.gaussian_process.kernels
import RBF
18 from sklearn.svm
import SVC
19 from sklearn.ensemble
import RandomForestClassifier, AdaBoostClassifier, ExtraTreesClassifier
20 from sklearn.cluster
import SpectralClustering
21 from sklearn.mixture
import GaussianMixture
22 from sklearn.cluster
import KMeans
23 from sklearn.tree
import DecisionTreeClassifier, ExtraTreeClassifier
24 from sklearn.calibration
import CalibratedClassifierCV
25 from pandas.tools.plotting
import andrews_curves
26 from pandas.tools.plotting
import parallel_coordinates
27 from pandas.tools.plotting
import radviz
34 path =
"./data/JetSummary_p250_e20_1000events_r05.csv"
36 names = [
'n_track',
'charge_tot',
'eta',
'vertex',
'class']
37 dataset = pandas.read_csv(path, names=names)
68 array = dataset.values
73 validation_size = 0.60
75 X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X, Y, test_size=validation_size, random_state=seed)
77 ding = np.column_stack((X_train,Y_train))
80 dong = np.column_stack((X_validation,Y_validation))
93 models.append((
'LR', LogisticRegression()))
94 models.append((
'LDA', LinearDiscriminantAnalysis()))
96 models.append((
'KNN', KNeighborsClassifier()))
97 models.append((
'CART', DecisionTreeClassifier()))
98 models.append((
'GNB', GaussianNB()))
100 models.append((
'SVMlin', SVC(kernel=
"linear", C=0.025)))
104 models.append((
'RFC', RandomForestClassifier()))
105 models.append((
'ADA', AdaBoostClassifier(base_estimator=
None, n_estimators=50, learning_rate=1.0, algorithm=
'SAMME.R', random_state=
None)))
106 models.append((
'ADA3', AdaBoostClassifier(base_estimator=
None, n_estimators=100, learning_rate=0.5, algorithm=
'SAMME.R', random_state=
None)))
119 for name, model
in models:
120 kfold = model_selection.KFold(n_splits=10, random_state=seed)
121 cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
122 results.append(cv_results)
124 msg =
"%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
135 ada = AdaBoostClassifier()
136 ada.fit(X_train, Y_train)
137 predictions = ada.predict(X_validation)
143 print(accuracy_score(Y_validation, predictions))
144 print(confusion_matrix(Y_validation, predictions))
145 print(classification_report(Y_validation, predictions))