Analysis Software
Documentation for sPHENIX simulation software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TMVA_D0_D0bar.C
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file TMVA_D0_D0bar.C
1 #include <cstdlib>
2 #include <iostream>
3 #include <map>
4 #include <string>
5 
6 #include "TChain.h"
7 #include "TFile.h"
8 #include "TTree.h"
9 #include "TString.h"
10 #include "TObjString.h"
11 #include "TSystem.h"
12 #include "TROOT.h"
13 
14 #include "TMVA/Factory.h"
15 #include "TMVA/DataLoader.h"
16 #include "TMVA/Tools.h"
17 #include "TMVA/TMVAGui.h"
18 
19 int TMVA_D0_D0bar( TString myMethodList = "" )
20 {
21  TMVA::Tools::Instance();
22 
23  // Default MVA methods to be trained + tested
24  std::map<std::string,int> Use;
25 
26  // Cut optimisation
27  Use["Cuts"] = 0;
28  Use["CutsD"] = 0;
29  Use["CutsPCA"] = 0;
30  Use["CutsGA"] = 0;
31  Use["CutsSA"] = 0;
32  //
33  // 1-dimensional likelihood ("naive Bayes estimator")
34  Use["Likelihood"] = 0;
35  Use["LikelihoodD"] = 0; // the "D" extension indicates decorrelated input variables (see option strings)
36  Use["LikelihoodPCA"] = 0; // the "PCA" extension indicates PCA-transformed input variables (see option strings)
37  Use["LikelihoodKDE"] = 0;
38  Use["LikelihoodMIX"] = 0;
39  //
40  // Mutidimensional likelihood and Nearest-Neighbour methods
41  Use["PDERS"] = 0;
42  Use["PDERSD"] = 0;
43  Use["PDERSPCA"] = 0;
44  Use["PDEFoam"] = 0;
45  Use["PDEFoamBoost"] = 0; // uses generalised MVA method boosting
46  Use["KNN"] = 0; // k-nearest neighbour method
47  //
48  // Linear Discriminant Analysis
49  Use["LD"] = 0; // Linear Discriminant identical to Fisher
50  Use["Fisher"] = 0;
51  Use["FisherG"] = 0;
52  Use["BoostedFisher"] = 0; // uses generalised MVA method boosting
53  Use["HMatrix"] = 0;
54  //
55  // Function Discriminant analysis
56  Use["FDA_GA"] = 0; // minimisation of user-defined function using Genetics Algorithm
57  Use["FDA_SA"] = 0;
58  Use["FDA_MC"] = 0;
59  Use["FDA_MT"] = 0;
60  Use["FDA_GAMT"] = 0;
61  Use["FDA_MCMT"] = 0;
62  //
63  // Neural Networks (all are feed-forward Multilayer Perceptrons)
64  Use["MLP"] = 1; // Recommended ANN
65  Use["MLPBFGS"] = 0; // Recommended ANN with optional training method
66  Use["MLPBNN"] = 0; // Recommended ANN with BFGS training method and bayesian regulator
67  Use["CFMlpANN"] = 0; // Depreciated ANN from ALEPH
68  Use["TMlpANN"] = 0; // ROOT's own ANN
69 #ifdef R__HAS_TMVAGPU
70  Use["DNN_GPU"] = 0; // CUDA-accelerated DNN training.
71 #else
72  Use["DNN_GPU"] = 0;
73 #endif
74 
75 #ifdef R__HAS_TMVACPU
76  Use["DNN_CPU"] = 0; // Multi-core accelerated DNN.
77 #else
78  Use["DNN_CPU"] = 0;
79 #endif
80  //
81  // Support Vector Machine
82  Use["SVM"] = 0;
83  //
84  // Boosted Decision Trees
85  Use["BDT"] = 1; // uses Adaptive Boost
86  Use["BDTG"] = 0; // uses Gradient Boost
87  Use["BDTB"] = 0; // uses Bagging
88  Use["BDTD"] = 0; // decorrelation + Adaptive Boost
89  Use["BDTF"] = 0; // allow usage of fisher discriminant for node splitting
90  //
91  // Friedman's RuleFit method, ie, an optimised series of cuts ("rules")
92  Use["RuleFit"] = 0;
93  // ---------------------------------------------------------------
94 
95  std::cout << std::endl;
96  std::cout << "==> Start TMVAClassification" << std::endl;
97 
98  // Select methods
99  if (myMethodList != "") {
100  for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;
101 
102  std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
103  for (UInt_t i=0; i<mlist.size(); i++) {
104  std::string regMethod(mlist[i]);
105 
106  if (Use.find(regMethod) == Use.end()) {
107  std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
108  for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
109  std::cout << std::endl;
110  return 1;
111  }
112  Use[regMethod] = 1;
113  }
114  }
115 
116  // Read training and test data
117  // (it is also possible to use ASCII format as input -> see TMVA Users Guide)
118  TFile *input(0);
119  TString fname = "/sphenix/user/rosstom/analysis/HF-Particle/KFParticle_sPHENIX/Run40Acceptance082922/Run40_D0_Separated_091922.root";
120  if (!gSystem->AccessPathName( fname )) {
121  input = TFile::Open( fname ); // check if file in local directory exists
122  }
123  else {
124  TFile::SetCacheFileDir(".");
125  input = TFile::Open("http://root.cern.ch/files/tmva_class_example.root", "CACHEREAD");
126  }
127  if (!input) {
128  std::cout << "ERROR: could not open data file" << std::endl;
129  exit(1);
130  }
131  std::cout << "--- TMVAClassification : Using input file: " << input->GetName() << std::endl;
132 
133  // Register the training and test trees
134 
135  TTree *D0_Tree = (TTree*)input->Get("D0_tree");
136  TTree *D0bar_Tree = (TTree*)input->Get("D0bar_tree");
137  TTree *Background_Tree = (TTree*)input->Get("Background_tree");
138 
139  // Create a ROOT output file where TMVA will store ntuples, histograms, etc.
140  TString outfileName( "TMVA_D0Sep_092122.root" );
141  // TString outfileName("TMVA_D0bar.root");
142  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
143 
144  // Create the factory object. Later you can choose the methods
145  // whose performance you'd like to investigate. The factory is
146  // the only TMVA object you have to interact with
147  //
148  // The first argument is the base of the name of all the
149  // weightfiles in the directory weight/
150  //
151  // The second argument is the output file for the training results
152  // All TMVA output can be suppressed by removing the "!" (not) in
153  // front of the "Silent" argument in the option string
154  TMVA::Factory *factory = new TMVA::Factory( "TMVAClassification", outputFile,
155  "!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification" );
156 
157  TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset");
158  // If you wish to modify default settings
159  // (please check "src/Config.h" to see all available global options)
160  //
161  // (TMVA::gConfig().GetVariablePlotting()).fTimesRMS = 8.0;
162  // (TMVA::gConfig().GetIONames()).fWeightFileDir = "myWeightDirectory";
163 
164  // Define the input variables that shall be used for the MVA training
165  // note that you may also use variable expressions, such as: "3*var1/var2*abs(var3)"
166  // [all types of expressions that can also be parsed by TTree::Draw( "expression" )]
167  dataloader->AddVariable( "outKFP_positive_p", "P_p", "GeV/c", 'F' );
168  dataloader->AddVariable( "outKFP_negative_p", "N_p", "GeV/c", 'F' );
169  dataloader->AddVariable( "outKFP_KpPm_invm", "KpPm_invm", "GeV/c^{2}", 'F' );
170  dataloader->AddVariable( "outKFP_KmPp_invm", "KmPp_invm", "GeV/c^{2}", 'F' );
171  //dataloader->AddVariable( "outKFP_D0_DIRA", "DIRA", "DIRA", 'F' );
172  //dataloader->AddVariable( "outKFP_D0_IPchi2", "IPchi2", "IPchi2", 'F' );
173  //dataloader->AddVariable( "outKFP_D0_pseudorapidity", "pseudorapidity", "#eta", 'F' );
174 
175  // You can add so-called "Spectator variables", which are not used in the MVA training,
176  // but will appear in the final "TestTree" produced by TMVA. This TestTree will contain the
177  // input variables, the response values of all trained MVAs, and the spectator variables
178 
179  //dataloader->AddSpectator( "", "", "GeV", 'F' );
180 
181 
182  // global event weights per tree (see below for setting event-wise weights)
183  Double_t signalWeight = 50.0; // increasing the signal weight should help to balance how few signal events there are compared to background
184  Double_t backgroundWeight = 1.0;
185 
186  // You can add an arbitrary number of signal or background trees
187  dataloader->AddSignalTree ( D0_Tree, signalWeight );
188  dataloader->AddBackgroundTree( D0bar_Tree, signalWeight );
189  dataloader->AddBackgroundTree( Background_Tree, backgroundWeight );
190 
191  // To give different trees for training and testing, do as follows:
192  //
193  // dataloader->AddSignalTree( signalTrainingTree, signalTrainWeight, "Training" );
194  // dataloader->AddSignalTree( signalTestTree, signalTestWeight, "Test" );
195 
196  // Use the following code instead of the above two or four lines to add signal and background
197  // training and test events "by hand"
198  // NOTE that in this case one should not give expressions (such as "var1+var2") in the input
199  // variable definition, but simply compute the expression before adding the event
200  // ```cpp
201  // // --- begin ----------------------------------------------------------
202  // std::vector<Double_t> vars( 4 ); // vector has size of number of input variables
203  // Float_t treevars[4], weight;
204  //
205  // // Signal
206  // for (UInt_t ivar=0; ivar<4; ivar++) signalTree->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
207  // for (UInt_t i=0; i<signalTree->GetEntries(); i++) {
208  // signalTree->GetEntry(i);
209  // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
210  // // add training and test events; here: first half is training, second is testing
211  // // note that the weight can also be event-wise
212  // if (i < signalTree->GetEntries()/2.0) dataloader->AddSignalTrainingEvent( vars, signalWeight );
213  // else dataloader->AddSignalTestEvent ( vars, signalWeight );
214  // }
215  //
216  // // Background (has event weights)
217  // background->SetBranchAddress( "weight", &weight );
218  // for (UInt_t ivar=0; ivar<4; ivar++) background->SetBranchAddress( Form( "var%i", ivar+1 ), &(treevars[ivar]) );
219  // for (UInt_t i=0; i<background->GetEntries(); i++) {
220  // background->GetEntry(i);
221  // for (UInt_t ivar=0; ivar<4; ivar++) vars[ivar] = treevars[ivar];
222  // // add training and test events; here: first half is training, second is testing
223  // // note that the weight can also be event-wise
224  // if (i < background->GetEntries()/2) dataloader->AddBackgroundTrainingEvent( vars, backgroundWeight*weight );
225  // else dataloader->AddBackgroundTestEvent ( vars, backgroundWeight*weight );
226  // }
227  // // --- end ------------------------------------------------------------
228  // ```
229  // End of tree registration
230 
231  // Set individual event weights (the variables must exist in the original TTree)
232  // - for signal : `dataloader->SetSignalWeightExpression ("weight1*weight2");`
233  // - for background: `dataloader->SetBackgroundWeightExpression("weight1*weight2");`
234  //dataloader->SetBackgroundWeightExpression( "weight" );
235 
236  // Apply additional cuts on the signal and background samples (can be different)
237  TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
238  TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";
239 
240  // Tell the dataloader how to use the training and testing events
241  //
242  // If no numbers of events are given, half of the events in the tree are used
243  // for training, and the other half for testing:
244  //
245  // dataloader->PrepareTrainingAndTestTree( mycut, "SplitMode=random:!V" );
246  //
247  // To also specify the number of testing events, use:
248  //
249  // dataloader->PrepareTrainingAndTestTree( mycut,
250  // "NSigTrain=3000:NBkgTrain=3000:NSigTest=3000:NBkgTest=3000:SplitMode=Random:!V" );
251  dataloader->PrepareTrainingAndTestTree( mycuts, mycutb,
252  "nTrain_Signal=1500:nTrain_Background=75000:SplitMode=Random:NormMode=NumEvents:!V" );
253 
254  // ### Book MVA methods
255  //
256  // Please lookup the various method configuration options in the corresponding cxx files, eg:
257  // src/MethoCuts.cxx, etc, or here: http://tmva.sourceforge.net/optionRef.html
258  // it is possible to preset ranges in the option string in which the cut optimisation should be done:
259  // "...:CutRangeMin[2]=-1:CutRangeMax[2]=1"...", where [2] is the third input variable
260 
261  // Cut optimisation
262  if (Use["Cuts"])
263  factory->BookMethod( dataloader, TMVA::Types::kCuts, "Cuts",
264  "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart" );
265 
266  if (Use["CutsD"])
267  factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsD",
268  "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=Decorrelate" );
269 
270  if (Use["CutsPCA"])
271  factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsPCA",
272  "!H:!V:FitMethod=MC:EffSel:SampleSize=200000:VarProp=FSmart:VarTransform=PCA" );
273 
274  if (Use["CutsGA"])
275  factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsGA",
276  "H:!V:FitMethod=GA:CutRangeMin[0]=-10:CutRangeMax[0]=10:VarProp[1]=FMax:EffSel:Steps=30:Cycles=3:PopSize=400:SC_steps=10:SC_rate=5:SC_factor=0.95" );
277 
278  if (Use["CutsSA"])
279  factory->BookMethod( dataloader, TMVA::Types::kCuts, "CutsSA",
280  "!H:!V:FitMethod=SA:EffSel:MaxCalls=150000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
281 
282  // Likelihood ("naive Bayes estimator")
283  if (Use["Likelihood"])
284  factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "Likelihood",
285  "H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmoothBkg[1]=10:NSmooth=1:NAvEvtPerBin=50" );
286 
287  // Decorrelated likelihood
288  if (Use["LikelihoodD"])
289  factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodD",
290  "!H:!V:TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=Decorrelate" );
291 
292  // PCA-transformed likelihood
293  if (Use["LikelihoodPCA"])
294  factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodPCA",
295  "!H:!V:!TransformOutput:PDFInterpol=Spline2:NSmoothSig[0]=20:NSmoothBkg[0]=20:NSmooth=5:NAvEvtPerBin=50:VarTransform=PCA" );
296 
297  // Use a kernel density estimator to approximate the PDFs
298  if (Use["LikelihoodKDE"])
299  factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodKDE",
300  "!H:!V:!TransformOutput:PDFInterpol=KDE:KDEtype=Gauss:KDEiter=Adaptive:KDEFineFactor=0.3:KDEborder=None:NAvEvtPerBin=50" );
301 
302  // Use a variable-dependent mix of splines and kernel density estimator
303  if (Use["LikelihoodMIX"])
304  factory->BookMethod( dataloader, TMVA::Types::kLikelihood, "LikelihoodMIX",
305  "!H:!V:!TransformOutput:PDFInterpolSig[0]=KDE:PDFInterpolBkg[0]=KDE:PDFInterpolSig[1]=KDE:PDFInterpolBkg[1]=KDE:PDFInterpolSig[2]=Spline2:PDFInterpolBkg[2]=Spline2:PDFInterpolSig[3]=Spline2:PDFInterpolBkg[3]=Spline2:KDEtype=Gauss:KDEiter=Nonadaptive:KDEborder=None:NAvEvtPerBin=50" );
306 
307  // Test the multi-dimensional probability density estimator
308  // here are the options strings for the MinMax and RMS methods, respectively:
309  //
310  // "!H:!V:VolumeRangeMode=MinMax:DeltaFrac=0.2:KernelEstimator=Gauss:GaussSigma=0.3" );
311  // "!H:!V:VolumeRangeMode=RMS:DeltaFrac=3:KernelEstimator=Gauss:GaussSigma=0.3" );
312  if (Use["PDERS"])
313  factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERS",
314  "!H:!V:NormTree=T:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600" );
315 
316  if (Use["PDERSD"])
317  factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERSD",
318  "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=Decorrelate" );
319 
320  if (Use["PDERSPCA"])
321  factory->BookMethod( dataloader, TMVA::Types::kPDERS, "PDERSPCA",
322  "!H:!V:VolumeRangeMode=Adaptive:KernelEstimator=Gauss:GaussSigma=0.3:NEventsMin=400:NEventsMax=600:VarTransform=PCA" );
323 
324  // Multi-dimensional likelihood estimator using self-adapting phase-space binning
325  if (Use["PDEFoam"])
326  factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoam",
327  "!H:!V:SigBgSeparate=F:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );
328 
329  if (Use["PDEFoamBoost"])
330  factory->BookMethod( dataloader, TMVA::Types::kPDEFoam, "PDEFoamBoost",
331  "!H:!V:Boost_Num=30:Boost_Transform=linear:SigBgSeparate=F:MaxDepth=4:UseYesNoCell=T:DTLogic=MisClassificationError:FillFoamWithOrigWeights=F:TailCut=0:nActiveCells=500:nBin=20:Nmin=400:Kernel=None:Compress=T" );
332 
333  // K-Nearest Neighbour classifier (KNN)
334  if (Use["KNN"])
335  factory->BookMethod( dataloader, TMVA::Types::kKNN, "KNN",
336  "H:nkNN=20:ScaleFrac=0.8:SigmaFact=1.0:Kernel=Gaus:UseKernel=F:UseWeight=T:!Trim" );
337 
338  // H-Matrix (chi2-squared) method
339  if (Use["HMatrix"])
340  factory->BookMethod( dataloader, TMVA::Types::kHMatrix, "HMatrix", "!H:!V:VarTransform=None" );
341 
342  // Linear discriminant (same as Fisher discriminant)
343  if (Use["LD"])
344  factory->BookMethod( dataloader, TMVA::Types::kLD, "LD", "H:!V:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
345 
346  // Fisher discriminant (same as LD)
347  if (Use["Fisher"])
348  factory->BookMethod( dataloader, TMVA::Types::kFisher, "Fisher", "H:!V:Fisher:VarTransform=None:CreateMVAPdfs:PDFInterpolMVAPdf=Spline2:NbinsMVAPdf=50:NsmoothMVAPdf=10" );
349 
350  // Fisher with Gauss-transformed input variables
351  if (Use["FisherG"])
352  factory->BookMethod( dataloader, TMVA::Types::kFisher, "FisherG", "H:!V:VarTransform=Gauss" );
353 
354  // Composite classifier: ensemble (tree) of boosted Fisher classifiers
355  if (Use["BoostedFisher"])
356  factory->BookMethod( dataloader, TMVA::Types::kFisher, "BoostedFisher",
357  "H:!V:Boost_Num=20:Boost_Transform=log:Boost_Type=AdaBoost:Boost_AdaBoostBeta=0.2:!Boost_DetailedMonitoring" );
358 
359  // Function discrimination analysis (FDA) -- test of various fitters - the recommended one is Minuit (or GA or SA)
360  if (Use["FDA_MC"])
361  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MC",
362  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:SampleSize=100000:Sigma=0.1" );
363 
364  if (Use["FDA_GA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
365  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GA",
366  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=100:Cycles=2:Steps=5:Trim=True:SaveBestGen=1" );
367 
368  if (Use["FDA_SA"]) // can also use Simulated Annealing (SA) algorithm (see Cuts_SA options])
369  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_SA",
370  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=SA:MaxCalls=15000:KernelTemp=IncAdaptive:InitialTemp=1e+6:MinTemp=1e-6:Eps=1e-10:UseDefaultScale" );
371 
372  if (Use["FDA_MT"])
373  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MT",
374  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=2:UseImprove:UseMinos:SetBatch" );
375 
376  if (Use["FDA_GAMT"])
377  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_GAMT",
378  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:Cycles=1:PopSize=5:Steps=5:Trim" );
379 
380  if (Use["FDA_MCMT"])
381  factory->BookMethod( dataloader, TMVA::Types::kFDA, "FDA_MCMT",
382  "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=MC:Converger=MINUIT:ErrorLevel=1:PrintLevel=-1:FitStrategy=0:!UseImprove:!UseMinos:SetBatch:SampleSize=20" );
383 
384  // TMVA ANN: MLP (recommended ANN) -- all ANNs in TMVA are Multilayer Perceptrons
385  if (Use["MLP"])
386  factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLP", "H:!V:EstimatorType=MSE:NeuronType=sigmoid:VarTransform=N:NCycles=21:HiddenLayers=N-1:TestRate=1:UseRegulator" );
387 
388  if (Use["MLPBFGS"])
389  factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLPBFGS", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=600:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:!UseRegulator" );
390 
391  if (Use["MLPBNN"])
392  factory->BookMethod( dataloader, TMVA::Types::kMLP, "MLPBNN", "H:!V:NeuronType=tanh:VarTransform=N:NCycles=60:HiddenLayers=N+5:TestRate=5:TrainingMethod=BFGS:UseRegulator" ); // BFGS training with bayesian regulators
393 
394 
395  // Multi-architecture DNN implementation.
396  if (Use["DNN_CPU"] or Use["DNN_GPU"]) {
397  // General layout.
398  TString layoutString ("Layout=TANH|128,TANH|128,TANH|128,LINEAR");
399 
400  // Define Training strategy. One could define multiple strategy string separated by the "|" delimiter
401 
402  TString trainingStrategyString = ("TrainingStrategy=LearningRate=1e-2,Momentum=0.9,"
403  "ConvergenceSteps=20,BatchSize=100,TestRepetitions=1,"
404  "WeightDecay=1e-4,Regularization=None,"
405  "DropConfig=0.0+0.5+0.5+0.5");
406 
407  // General Options.
408  TString dnnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:"
409  "WeightInitialization=XAVIERUNIFORM");
410  dnnOptions.Append (":"); dnnOptions.Append (layoutString);
411  dnnOptions.Append (":"); dnnOptions.Append (trainingStrategyString);
412 
413  // Cuda implementation.
414  if (Use["DNN_GPU"]) {
415  TString gpuOptions = dnnOptions + ":Architecture=GPU";
416  factory->BookMethod(dataloader, TMVA::Types::kDL, "DNN_GPU", gpuOptions);
417  }
418  // Multi-core CPU implementation.
419  if (Use["DNN_CPU"]) {
420  TString cpuOptions = dnnOptions + ":Architecture=CPU";
421  factory->BookMethod(dataloader, TMVA::Types::kDL, "DNN_CPU", cpuOptions);
422  }
423  }
424 
425  // CF(Clermont-Ferrand)ANN
426  if (Use["CFMlpANN"])
427  factory->BookMethod( dataloader, TMVA::Types::kCFMlpANN, "CFMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N" ); // n_cycles:#nodes:#nodes:...
428 
429  // Tmlp(Root)ANN
430  if (Use["TMlpANN"])
431  factory->BookMethod( dataloader, TMVA::Types::kTMlpANN, "TMlpANN", "!H:!V:NCycles=200:HiddenLayers=N+1,N:LearningMethod=BFGS:ValidationFraction=0.3" ); // n_cycles:#nodes:#nodes:...
432 
433  // Support Vector Machine
434  if (Use["SVM"])
435  factory->BookMethod( dataloader, TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm" );
436 
437  // Boosted Decision Trees
438  if (Use["BDTG"]) // Gradient Boost
439  factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTG",
440  "!H:!V:NTrees=1000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2" );
441 
442  if (Use["BDT"]) // Adaptive Boost
443  factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDT",
444  "!H:!V:NTrees=80:MinNodeSize=2.5%:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20:DoBoostMonitor" );
445 
446  if (Use["BDTB"]) // Bagging
447  factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTB",
448  "!H:!V:NTrees=400:BoostType=Bagging:SeparationType=GiniIndex:nCuts=20" );
449 
450  if (Use["BDTD"]) // Decorrelation + Adaptive Boost
451  factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTD",
452  "!H:!V:NTrees=400:MinNodeSize=5%:MaxDepth=3:BoostType=AdaBoost:SeparationType=GiniIndex:nCuts=20:VarTransform=Decorrelate" );
453 
454  if (Use["BDTF"]) // Allow Using Fisher discriminant in node splitting for (strong) linearly correlated variables
455  factory->BookMethod( dataloader, TMVA::Types::kBDT, "BDTF",
456  "!H:!V:NTrees=50:MinNodeSize=2.5%:UseFisherCuts:MaxDepth=3:BoostType=AdaBoost:AdaBoostBeta=0.5:SeparationType=GiniIndex:nCuts=20" );
457 
458  // RuleFit -- TMVA implementation of Friedman's method
459  if (Use["RuleFit"])
460  factory->BookMethod( dataloader, TMVA::Types::kRuleFit, "RuleFit",
461  "H:!V:RuleFitModule=RFTMVA:Model=ModRuleLinear:MinImp=0.001:RuleMinDist=0.001:NTrees=20:fEventsMin=0.01:fEventsMax=0.5:GDTau=-1.0:GDTauPrec=0.01:GDStep=0.01:GDNSteps=10000:GDErrScale=1.02" );
462 
463  // For an example of the category classifier usage, see: TMVAClassificationCategory
464  //
465  // --------------------------------------------------------------------------------------------------
466  // Now you can optimize the setting (configuration) of the MVAs using the set of training events
467  // STILL EXPERIMENTAL and only implemented for BDT's !
468  //
469  // factory->OptimizeAllMethods("SigEffAtBkg0.01","Scan");
470  // factory->OptimizeAllMethods("ROCIntegral","FitGA");
471  //
472  // --------------------------------------------------------------------------------------------------
473 
474  // Now you can tell the factory to train, test, and evaluate the MVAs
475  //
476  // Train MVAs using the set of training events
477  factory->TrainAllMethods();
478 
479  // Evaluate all MVAs using the set of test events
480  factory->TestAllMethods();
481 
482  // Evaluate and compare performance of all configured MVAs
483  factory->EvaluateAllMethods();
484 
485  // --------------------------------------------------------------
486 
487  // Save the output
488  outputFile->Close();
489 
490  std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
491  std::cout << "==> TMVAClassification is done!" << std::endl;
492 
493  delete factory;
494  delete dataloader;
495  // Launch the GUI for the root macros
496  if (!gROOT->IsBatch()) TMVA::TMVAGui( outfileName );
497 
498  return 0;
499 }
500 
501 int main( int argc, char** argv )
502 {
503  // Select methods (don't look at this code - not of interest)
504  TString methodList;
505  for (int i=1; i<argc; i++) {
506  TString regMethod(argv[i]);
507  if(regMethod=="-b" || regMethod=="--batch") continue;
508  if (!methodList.IsNull()) methodList += TString(",");
509  methodList += regMethod;
510  }
511  return TMVA_D0_D0bar(methodList);
512 }