Analysis Software
Documentation for sPHENIX simulation software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
KFParticleBaseSIMD.cxx
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file KFParticleBaseSIMD.cxx
1 /*
2  * This file is part of KFParticle package
3  * Copyright (C) 2007-2019 FIAS Frankfurt Institute for Advanced Studies
4  * 2007-2019 Goethe University of Frankfurt
5  * 2007-2019 Ivan Kisel <I.Kisel@compeng.uni-frankfurt.de>
6  * 2007-2019 Maksym Zyzak
7  *
8  * KFParticle is free software: you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation, either version 3 of the License, or
11  * (at your option) any later version.
12  *
13  * KFParticle is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public License
19  * along with this program. If not, see <https://www.gnu.org/licenses/>.
20  */
21 
22 
23 #include "KFParticleBaseSIMD.h"
24 #include <iostream>
25 
26 static const float_v small = 1.e-20f;
27 
30 {
38  Initialize();
39 }
40 
41 void KFParticleBaseSIMD::Initialize( const float_v Param[], const float_v Cov[], int_v Charge, float_v Mass )
42 {
58  for( Int_t i=0; i<6 ; i++ ) fP[i] = Param[i];
59  for( Int_t i=0; i<21; i++ ) fC[i] = Cov[i];
60 
61  float_v energy = sqrt( Mass*Mass + fP[3]*fP[3] + fP[4]*fP[4] + fP[5]*fP[5]);
62  fP[6] = energy;
63  fP[7] = 0;
64  fQ = Charge;
65  fNDF = 0;
66  fChi2 = 0;
68  fSFromDecay = 0;
69 
70  float_v energyInv = 1.f/energy;
71  float_v
72  h0 = fP[3]*energyInv,
73  h1 = fP[4]*energyInv,
74  h2 = fP[5]*energyInv;
75 
76  fC[21] = h0*fC[ 6] + h1*fC[10] + h2*fC[15];
77  fC[22] = h0*fC[ 7] + h1*fC[11] + h2*fC[16];
78  fC[23] = h0*fC[ 8] + h1*fC[12] + h2*fC[17];
79  fC[24] = h0*fC[ 9] + h1*fC[13] + h2*fC[18];
80  fC[25] = h0*fC[13] + h1*fC[14] + h2*fC[19];
81  fC[26] = h0*fC[18] + h1*fC[19] + h2*fC[20];
82  fC[27] = ( h0*h0*fC[ 9] + h1*h1*fC[14] + h2*h2*fC[20]
83  + 2*(h0*h1*fC[13] + h0*h2*fC[18] + h1*h2*fC[19] ) );
84  for( Int_t i=28; i<36; i++ ) fC[i] = 0.f;
85  fC[35] = 1.f;
86 
87  SumDaughterMass = Mass;
88  fMassHypo = Mass;
89 }
90 
92 {
101  for( Int_t i=0; i<8; i++) fP[i] = 0.f;
102  for(Int_t i=0;i<36;++i) fC[i]=0.f;
103  fC[0] = fC[2] = fC[5] = 100.f;
104  fC[35] = 1.f;
105  fNDF = -3;
106  fChi2 = 0.f;
107  fQ = 0;
108  fSFromDecay = 0.f;
110  SumDaughterMass = 0.f;
111  fMassHypo = -1;
112 }
113 
114 float_m KFParticleBaseSIMD::GetMomentum( float_v &p, float_v &error ) const
115 {
122  float_v x = fP[3];
123  float_v y = fP[4];
124  float_v z = fP[5];
125  float_v x2 = x*x;
126  float_v y2 = y*y;
127  float_v z2 = z*z;
128  float_v p2 = x2+y2+z2;
129  p = sqrt(p2);
130  error = (x2*fC[9]+y2*fC[14]+z2*fC[20] + 2*(x*y*fC[13]+x*z*fC[18]+y*z*fC[19]) );
131  const float_v LocalSmall = 1.e-4f;
132  float_m mask = (0.f < error) && (LocalSmall < abs(p));
133  error(!mask) = 1.e20f;
134  error = sqrt(error);
135  return (!mask);
136 }
137 
138 float_m KFParticleBaseSIMD::GetPt( float_v &pt, float_v &error ) const
139 {
146  float_v px = fP[3];
147  float_v py = fP[4];
148  float_v px2 = px*px;
149  float_v py2 = py*py;
150  float_v pt2 = px2+py2;
151  pt = sqrt(pt2);
152  error = (px2*fC[9] + py2*fC[14] + 2*px*py*fC[13] );
153  const float_v LocalSmall = 1.e-4f;
154  float_m mask = ( (0.f < error) && (LocalSmall < abs(pt)));
155  error(!mask) = 1.e20f;
156  error = sqrt(error);
157  return (!mask);
158 }
159 
160 float_m KFParticleBaseSIMD::GetEta( float_v &eta, float_v &error ) const
161 {
168  const float_v BIG = 1.e8f;
169  const float_v LocalSmall = 1.e-8f;
170 
171  float_v px = fP[3];
172  float_v py = fP[4];
173  float_v pz = fP[5];
174  float_v pt2 = px*px + py*py;
175  float_v p2 = pt2 + pz*pz;
176  float_v p = sqrt(p2);
177  float_v a = p + pz;
178  float_v b = p - pz;
179  eta = BIG;
180  float_v c = 0.f;
181  c(b > LocalSmall) = (a/b);
182  float_v logc = 0.5f*KFPMath::Log(c);
183  eta(LocalSmall<abs(c)) = logc;
184 
185  float_v h3 = -px*pz;
186  float_v h4 = -py*pz;
187  float_v pt4 = pt2*pt2;
188  float_v p2pt4 = p2*pt4;
189  error = (h3*h3*fC[9] + h4*h4*fC[14] + pt4*fC[20] + 2*( h3*(h4*fC[13] + fC[18]*pt2) + pt2*h4*fC[19] ) );
190 
191  float_m mask = ((LocalSmall < abs(p2pt4)) && (0.f < error));
192  error(mask) = sqrt(error/p2pt4);
193  error(!mask) = BIG;
194 
195  return (!mask);
196 }
197 
198 float_m KFParticleBaseSIMD::GetPhi( float_v &phi, float_v &error ) const
199 {
206  float_v px = fP[3];
207  float_v py = fP[4];
208  float_v px2 = px*px;
209  float_v py2 = py*py;
210  float_v pt2 = px2 + py2;
211  phi = KFPMath::ATan2(py,px);
212  error = (py2*fC[9] + px2*fC[14] - float_v(2.f)*px*py*fC[13] );
213 
214  float_m mask = (0.f < error) && (1.e-4f < pt2);
215  error(mask) = sqrt(error)/pt2;
216  error(!mask) = 1.e10f;
217  return !mask;
218 }
219 
220 float_m KFParticleBaseSIMD::GetR( float_v &r, float_v &error ) const
221 {
228  float_v x = fP[0];
229  float_v y = fP[1];
230  float_v x2 = x*x;
231  float_v y2 = y*y;
232  r = sqrt(x2 + y2);
233  error = (x2*fC[0] + y2*fC[2] - float_v(2.f)*x*y*fC[1] );
234 
235  float_m mask = (0.f < error) && (1.e-4f < r);
236  error(mask) = sqrt(error)/r;
237  error(!mask ) = 1.e10f;
238  return !mask;
239 }
240 
241 float_m KFParticleBaseSIMD::GetMass( float_v &m, float_v &error ) const
242 {
249  const float_v BIG = 1.e8f;
250  const float_v LocalSmall = 1.e-8f;
251 
252  float_v s = ( fP[3]*fP[3]*fC[9] + fP[4]*fP[4]*fC[14] + fP[5]*fP[5]*fC[20]
253  + fP[6]*fP[6]*fC[27]
254  + float_v(2.f)*( + fP[3]*fP[4]*fC[13] + fP[5]*(fP[3]*fC[18] + fP[4]*fC[19])
255  - fP[6]*( fP[3]*fC[24] + fP[4]*fC[25] + fP[5]*fC[26] ) )
256  );
257 
258  float_v m2 = (fP[6]*fP[6] - fP[3]*fP[3] - fP[4]*fP[4] - fP[5]*fP[5]);
259 
260  float_m mask = 0.f <= m2;
261  m(mask) = sqrt(m2);
262  m(!mask) = -sqrt(-m2);
263 
264  mask = (mask && (0.f <= s) && (LocalSmall < m));
265  error(mask) = sqrt(s)/m;
266  error(!mask) = BIG;
267 
268  return !mask;
269 }
270 
271 
272 float_m KFParticleBaseSIMD::GetDecayLength( float_v &l, float_v &error ) const
273 {
281  const float_v BIG = 1.e20f;
282 
283  float_v x = fP[3];
284  float_v y = fP[4];
285  float_v z = fP[5];
286  float_v t = fP[7];
287  float_v x2 = x*x;
288  float_v y2 = y*y;
289  float_v z2 = z*z;
290  float_v p2 = x2+y2+z2;
291  l = t*sqrt(p2);
292 
293  error = p2*fC[35] + t*t/p2*(x2*fC[9]+y2*fC[14]+z2*fC[20]
294  + float_v(2.f)*(x*y*fC[13]+x*z*fC[18]+y*z*fC[19]) )
295  + float_v(2.f)*t*(x*fC[31]+y*fC[32]+z*fC[33]);
296 
297  float_m mask = ((1.e-4f) < p2);
298  error(mask) = sqrt(abs(error));
299  error(!mask) = BIG;
300  return !mask;
301 }
302 
303 float_m KFParticleBaseSIMD::GetDecayLengthXY( float_v &l, float_v &error ) const
304 {
313  const float_v BIG = 1.e8f;
314  float_v x = fP[3];
315  float_v y = fP[4];
316  float_v t = fP[7];
317  float_v x2 = x*x;
318  float_v y2 = y*y;
319  float_v pt2 = x2+y2;
320  l = t*sqrt(pt2);
321 
322  error = pt2*fC[35] + t*t/pt2*(x2*fC[9]+y2*fC[14] + 2*x*y*fC[13] )
323  + float_v(2.f)*t*(x*fC[31]+y*fC[32]);
324  float_m mask = ((1.e-4f) < pt2);
325  error(mask) = sqrt(abs(error));
326  error(!mask) = BIG;
327  return !mask;
328 }
329 
330 
331 float_m KFParticleBaseSIMD::GetLifeTime( float_v &tauC, float_v &error ) const
332 {
341  const float_v BIG = 1.e20f;
342 
343  float_v m, dm;
344  GetMass( m, dm );
345  float_v cTM = (-fP[3]*fC[31] - fP[4]*fC[32] - fP[5]*fC[33] + fP[6]*fC[34]);
346  tauC = fP[7]*m;
347  error = m*m*fC[35] + 2*fP[7]*cTM + fP[7]*fP[7]*dm*dm;
348  float_m mask = (0.f < error);
349  error(mask) = sqrt(error);
350  error(!mask) = BIG;
351  return !mask;
352 }
353 
354 
356 {
361  AddDaughter( Daughter );
362 }
363 
364 void KFParticleBaseSIMD::GetMeasurement( const KFParticleBaseSIMD& daughter, float_v m[], float_v V[], float_v D[3][3] )
365 {
379  if(fNDF[0] == -1)
380  {
381  float_v ds[2] = {0.f,0.f};
382  float_v dsdr[4][6];
383  float_v F1[36], F2[36], F3[36], F4[36];
384  for(int i1=0; i1<36; i1++)
385  {
386  F1[i1] = 0;
387  F2[i1] = 0;
388  F3[i1] = 0;
389  F4[i1] = 0;
390  }
391  GetDStoParticle( daughter, ds, dsdr );
392 
393  float_v V0Tmp[36] ;
394  float_v V1Tmp[36] ;
395 
396  float_v C[36];
397  for(int iC=0; iC<36; iC++)
398  C[iC] = fC[iC];
399 
400  Transport(ds[0], dsdr[0], fP, fC, dsdr[1], F1, F2);
401  daughter.Transport(ds[1], dsdr[3], m, V, dsdr[2], F4, F3);
402 
403  MultQSQt(F2, daughter.fC, V0Tmp, 6);
404  MultQSQt(F3, C, V1Tmp, 6);
405 
406  for(int iC=0; iC<21; iC++)
407  {
408  fC[iC] += V0Tmp[iC];
409  V[iC] += V1Tmp[iC];
410  }
411 
412  float_v C1F1T[6][6];
413  for(int i=0; i<6; i++)
414  for(int j=0; j<6; j++)
415  {
416  C1F1T[i][j] = 0;
417  for(int k=0; k<6; k++)
418  {
419  C1F1T[i][j] += C[IJ(i,k)] * F1[j*6+k];
420  }
421  }
422  float_v F3C1F1T[6][6];
423  for(int i=0; i<6; i++)
424  for(int j=0; j<6; j++)
425  {
426  F3C1F1T[i][j] = 0;
427  for(int k=0; k<6; k++)
428  {
429  F3C1F1T[i][j] += F3[i*6+k] * C1F1T[k][j];
430  }
431  }
432  float_v C2F2T[6][6];
433  for(int i=0; i<6; i++)
434  for(int j=0; j<6; j++)
435  {
436  C2F2T[i][j] = 0;
437  for(int k=0; k<6; k++)
438  {
439  C2F2T[i][j] += daughter.fC[IJ(i,k)] * F2[j*6+k];
440  }
441  }
442  for(int i=0; i<3; i++)
443  for(int j=0; j<3; j++)
444  {
445  D[i][j] = F3C1F1T[i][j];
446  for(int k=0; k<6; k++)
447  {
448  D[i][j] += F4[i*6+k] * C2F2T[k][j];
449  }
450  }
451  }
452  else
453  {
454  float_v dsdr[6];
455  float_v dS = daughter.GetDStoPoint(fP, dsdr);
456 
457  float_v dsdp[6] = {-dsdr[0], -dsdr[1], -dsdr[2], 0.f, 0.f, 0.f};
458 
459  float_v F[36], F1[36];
460  for(int i2=0; i2<36; i2++)
461  {
462  F[i2] = 0;
463  F1[i2] = 0;
464  }
465  daughter.Transport(dS, dsdr, m, V, dsdp, F, F1);
466 
467 // float_v V1Tmp[36] = {0.};
468 // MultQSQt(F1, fC, V1Tmp, 6);
469 
470 // for(int iC=0; iC<21; iC++)
471 // V[iC] += V1Tmp[iC];
472 
473  float_v VFT[3][6];
474  for(int i=0; i<3; i++)
475  for(int j=0; j<6; j++)
476  {
477  VFT[i][j] = 0;
478  for(int k=0; k<3; k++)
479  {
480  VFT[i][j] += fC[IJ(i,k)] * F1[j*6+k];
481  }
482  }
483 
484  float_v FVFT[6][6];
485  for(int i=0; i<6; i++)
486  for(int j=0; j<6; j++)
487  {
488  FVFT[i][j] = 0;
489  for(int k=0; k<3; k++)
490  {
491  FVFT[i][j] += F1[i*6+k] * VFT[k][j];
492  }
493  }
494 
495  for(int i=0; i<3; i++)
496  for(int j=0; j<3; j++)
497  {
498  D[i][j] = 0;
499  for(int k=0; k<3; k++)
500  {
501  D[i][j] += fC[IJ(j,k)] * F1[i*6+k];
502  }
503  }
504 
505  V[0] += FVFT[0][0];
506  V[1] += FVFT[1][0];
507  V[2] += FVFT[1][1];
508  V[3] += FVFT[2][0];
509  V[4] += FVFT[2][1];
510  V[5] += FVFT[2][2];
511  }
512 }
513 
515 {
529  AddDaughterId( Daughter.Id() );
530 
531  if( int(fNDF[0])<-1 ){ // first daughter -> just copy
532  fNDF = -1;
533  fQ = Daughter.GetQ();
534  for( Int_t i=0; i<7; i++ ) fP[i] = Daughter.fP[i];
535  for( Int_t i=0; i<28; i++ ) fC[i] = Daughter.fC[i];
536  fSFromDecay = 0;
537  fMassHypo = Daughter.fMassHypo;
538  SumDaughterMass = Daughter.SumDaughterMass;
539  return;
540  }
541 
542  if(fConstructMethod == 0)
543  AddDaughterWithEnergyFit(Daughter);
544  else if(fConstructMethod == 2)
545  AddDaughterWithEnergyFitMC(Daughter);
546 
547  SumDaughterMass += Daughter.SumDaughterMass;
548  fMassHypo = -1.f;
549 }
550 
552 {
561  Int_t maxIter = 1;
562 
563  for( Int_t iter=0; iter<maxIter; iter++ ){
564 
565  float_v m[8], mV[36];
566 
567  float_v D[3][3];
568  GetMeasurement(Daughter, m, mV, D);
569 
570  float_v mS[6]= { fC[0]+mV[0],
571  fC[1]+mV[1], fC[2]+mV[2],
572  fC[3]+mV[3], fC[4]+mV[4], fC[5]+mV[5] };
573  InvertCholetsky3(mS);
574  //* Residual (measured - estimated)
575 
576  float_v zeta[3] = { m[0]-fP[0], m[1]-fP[1], m[2]-fP[2] };
577 
578  float_v K[3][3];
579  for(int i=0; i<3; i++)
580  for(int j=0; j<3; j++)
581  {
582  K[i][j] = 0;
583  for(int k=0; k<3; k++)
584  K[i][j] += fC[IJ(i,k)] * mS[IJ(k,j)];
585  }
586 
587  //* CHt = CH' - D'
588  float_v mCHt0[7], mCHt1[7], mCHt2[7];
589 
590  mCHt0[0]=fC[ 0] ; mCHt1[0]=fC[ 1] ; mCHt2[0]=fC[ 3] ;
591  mCHt0[1]=fC[ 1] ; mCHt1[1]=fC[ 2] ; mCHt2[1]=fC[ 4] ;
592  mCHt0[2]=fC[ 3] ; mCHt1[2]=fC[ 4] ; mCHt2[2]=fC[ 5] ;
593  mCHt0[3]=fC[ 6]-mV[ 6]; mCHt1[3]=fC[ 7]-mV[ 7]; mCHt2[3]=fC[ 8]-mV[ 8];
594  mCHt0[4]=fC[10]-mV[10]; mCHt1[4]=fC[11]-mV[11]; mCHt2[4]=fC[12]-mV[12];
595  mCHt0[5]=fC[15]-mV[15]; mCHt1[5]=fC[16]-mV[16]; mCHt2[5]=fC[17]-mV[17];
596  mCHt0[6]=fC[21]-mV[21]; mCHt1[6]=fC[22]-mV[22]; mCHt2[6]=fC[23]-mV[23];
597 
598  //* Kalman gain K = mCH'*S
599 
600  float_v k0[7], k1[7], k2[7];
601 
602  for(Int_t i=0;i<7;++i){
603  k0[i] = mCHt0[i]*mS[0] + mCHt1[i]*mS[1] + mCHt2[i]*mS[3];
604  k1[i] = mCHt0[i]*mS[1] + mCHt1[i]*mS[2] + mCHt2[i]*mS[4];
605  k2[i] = mCHt0[i]*mS[3] + mCHt1[i]*mS[4] + mCHt2[i]*mS[5];
606  }
607 
608  //* Add the daughter momentum to the particle momentum
609 
610  fP[ 3] += m[ 3];
611  fP[ 4] += m[ 4];
612  fP[ 5] += m[ 5];
613  fP[ 6] += m[ 6];
614 
615  fC[ 9] += mV[ 9];
616  fC[13] += mV[13];
617  fC[14] += mV[14];
618  fC[18] += mV[18];
619  fC[19] += mV[19];
620  fC[20] += mV[20];
621  fC[24] += mV[24];
622  fC[25] += mV[25];
623  fC[26] += mV[26];
624  fC[27] += mV[27];
625 
626 
627  //* New estimation of the vertex position r += K*zeta
628 
629  for(Int_t i=0;i<7;++i)
630  fP[i] = fP[i] + k0[i]*zeta[0] + k1[i]*zeta[1] + k2[i]*zeta[2];
631 
632  //* New covariance matrix C -= K*(mCH')'
633 
634  for(Int_t i=0, k=0;i<7;++i){
635  for(Int_t j=0;j<=i;++j,++k){
636  fC[k] = fC[k] - (k0[i]*mCHt0[j] + k1[i]*mCHt1[j] + k2[i]*mCHt2[j] );
637  }
638  }
639 
640  float_v K2[3][3];
641  for(int i=0; i<3; i++)
642  {
643  for(int j=0; j<3; j++)
644  K2[i][j] = -K[j][i];
645  K2[i][i] += 1;
646  }
647 
648  float_v A[3][3];
649  for(int i=0; i<3; i++)
650  for(int j=0; j<3; j++)
651  {
652  A[i][j] = 0;
653  for(int k=0; k<3; k++)
654  {
655  A[i][j] += D[i][k] * K2[k][j];
656  }
657  }
658 
659  float_v M[3][3];
660  for(int i=0; i<3; i++)
661  for(int j=0; j<3; j++)
662  {
663  M[i][j] = 0;
664  for(int k=0; k<3; k++)
665  {
666  M[i][j] += K[i][k] * A[k][j];
667  }
668  }
669 
670  fC[0] += 2.f*M[0][0];
671  fC[1] += M[0][1] + M[1][0];
672  fC[2] += 2.f*M[1][1];
673  fC[3] += M[0][2] + M[2][0];
674  fC[4] += M[1][2] + M[2][1];
675  fC[5] += 2.f*M[2][2];
676 
677  //* Calculate Chi^2
678 
679  fNDF += 2;
680  fQ += Daughter.GetQ();
681  fSFromDecay = 0;
682  fChi2 += (mS[0]*zeta[0] + mS[1]*zeta[1] + mS[3]*zeta[2])*zeta[0]
683  + (mS[1]*zeta[0] + mS[2]*zeta[1] + mS[4]*zeta[2])*zeta[1]
684  + (mS[3]*zeta[0] + mS[4]*zeta[1] + mS[5]*zeta[2])*zeta[2];
685  }
686 }
687 
689 {
696  Int_t maxIter = 1;
697 
698  for( Int_t iter=0; iter<maxIter; iter++ ){
699 
700  float_v m[8], mV[36];
701 
702  float_v D[3][3];
703  GetMeasurement(Daughter, m, mV, D);
704 
705  float_v mS[6]= { fC[0]+mV[0],
706  fC[1]+mV[1], fC[2]+mV[2],
707  fC[3]+mV[3], fC[4]+mV[4], fC[5]+mV[5] };
708  InvertCholetsky3(mS);
709  //* Residual (measured - estimated)
710 
711  float_v zeta[3] = { m[0]-fP[0], m[1]-fP[1], m[2]-fP[2] };
712 
713  float_v K[3][6];
714  for(int i=0; i<3; i++)
715  for(int j=0; j<3; j++)
716  {
717  K[i][j] = 0;
718  for(int k=0; k<3; k++)
719  K[i][j] += fC[IJ(i,k)] * mS[IJ(k,j)];
720  }
721 
722 
723  //* CHt = CH'
724 
725  float_v mCHt0[7], mCHt1[7], mCHt2[7];
726 
727  mCHt0[0]=fC[ 0] ; mCHt1[0]=fC[ 1] ; mCHt2[0]=fC[ 3] ;
728  mCHt0[1]=fC[ 1] ; mCHt1[1]=fC[ 2] ; mCHt2[1]=fC[ 4] ;
729  mCHt0[2]=fC[ 3] ; mCHt1[2]=fC[ 4] ; mCHt2[2]=fC[ 5] ;
730  mCHt0[3]=fC[ 6] ; mCHt1[3]=fC[ 7] ; mCHt2[3]=fC[ 8] ;
731  mCHt0[4]=fC[10] ; mCHt1[4]=fC[11] ; mCHt2[4]=fC[12] ;
732  mCHt0[5]=fC[15] ; mCHt1[5]=fC[16] ; mCHt2[5]=fC[17] ;
733  mCHt0[6]=fC[21] ; mCHt1[6]=fC[22] ; mCHt2[6]=fC[23] ;
734 
735  //* Kalman gain K = mCH'*S
736 
737  float_v k0[7], k1[7], k2[7];
738 
739  for(Int_t i=0;i<7;++i){
740  k0[i] = mCHt0[i]*mS[0] + mCHt1[i]*mS[1] + mCHt2[i]*mS[3];
741  k1[i] = mCHt0[i]*mS[1] + mCHt1[i]*mS[2] + mCHt2[i]*mS[4];
742  k2[i] = mCHt0[i]*mS[3] + mCHt1[i]*mS[4] + mCHt2[i]*mS[5];
743  }
744 
745  // last itearation -> update the particle
746 
747  //* VHt = VH'
748 
749  float_v mVHt0[7], mVHt1[7], mVHt2[7];
750 
751  mVHt0[0]=mV[ 0] ; mVHt1[0]=mV[ 1] ; mVHt2[0]=mV[ 3] ;
752  mVHt0[1]=mV[ 1] ; mVHt1[1]=mV[ 2] ; mVHt2[1]=mV[ 4] ;
753  mVHt0[2]=mV[ 3] ; mVHt1[2]=mV[ 4] ; mVHt2[2]=mV[ 5] ;
754  mVHt0[3]=mV[ 6] ; mVHt1[3]=mV[ 7] ; mVHt2[3]=mV[ 8] ;
755  mVHt0[4]=mV[10] ; mVHt1[4]=mV[11] ; mVHt2[4]=mV[12] ;
756  mVHt0[5]=mV[15] ; mVHt1[5]=mV[16] ; mVHt2[5]=mV[17] ;
757  mVHt0[6]=mV[21] ; mVHt1[6]=mV[22] ; mVHt2[6]=mV[23] ;
758 
759  //* Kalman gain Km = mCH'*S
760 
761  float_v km0[7], km1[7], km2[7];
762 
763  for(Int_t i=0;i<7;++i){
764  km0[i] = mVHt0[i]*mS[0] + mVHt1[i]*mS[1] + mVHt2[i]*mS[3];
765  km1[i] = mVHt0[i]*mS[1] + mVHt1[i]*mS[2] + mVHt2[i]*mS[4];
766  km2[i] = mVHt0[i]*mS[3] + mVHt1[i]*mS[4] + mVHt2[i]*mS[5];
767  }
768 
769  for(Int_t i=0;i<7;++i)
770  fP[i] = fP[i] + k0[i]*zeta[0] + k1[i]*zeta[1] + k2[i]*zeta[2];
771 
772  for(Int_t i=0;i<7;++i)
773  m[i] = m[i] - km0[i]*zeta[0] - km1[i]*zeta[1] - km2[i]*zeta[2];
774 
775  for(Int_t i=0, k=0;i<7;++i){
776  for(Int_t j=0;j<=i;++j,++k){
777  fC[k] = fC[k] - (k0[i]*mCHt0[j] + k1[i]*mCHt1[j] + k2[i]*mCHt2[j] );
778  }
779  }
780 
781  for(Int_t i=0, k=0;i<7;++i){
782  for(Int_t j=0;j<=i;++j,++k){
783  mV[k] = mV[k] - (km0[i]*mVHt0[j] + km1[i]*mVHt1[j] + km2[i]*mVHt2[j] );
784  }
785  }
786 
787  float_v mDf[7][7];
788 
789  for(Int_t i=0;i<7;++i){
790  for(Int_t j=0;j<7;++j){
791  mDf[i][j] = (km0[i]*mCHt0[j] + km1[i]*mCHt1[j] + km2[i]*mCHt2[j] );
792  }
793  }
794 
795  float_v mJ1[7][7], mJ2[7][7];
796  for(Int_t iPar1=0; iPar1<7; iPar1++)
797  {
798  for(Int_t iPar2=0; iPar2<7; iPar2++)
799  {
800  mJ1[iPar1][iPar2] = 0;
801  mJ2[iPar1][iPar2] = 0;
802  }
803  }
804 
805  float_v mMassParticle = fP[6]*fP[6] - (fP[3]*fP[3] + fP[4]*fP[4] + fP[5]*fP[5]);
806  float_v mMassDaughter = m[6]*m[6] - (m[3]*m[3] + m[4]*m[4] + m[5]*m[5]);
807  mMassParticle(mMassParticle > 0.f) = sqrt(mMassParticle);
808  mMassParticle(mMassParticle <= 0.f) = 0.f;
809  mMassDaughter(mMassDaughter > 0.f) = sqrt(mMassDaughter);
810  mMassDaughter(mMassDaughter <= 0.f) = 0.f;
811 
812  float_m mask1 = fMassHypo > -0.5f;
813  float_m mask2 = (!mask1) && ( (mMassParticle < SumDaughterMass) || (fP[6]<0.f)) ;
814  SetMassConstraint(fP,fC,mJ1,fMassHypo, mask1);
816 
817  float_m mask3 = Daughter.fMassHypo > -0.5f;
818  float_m mask4 = ( (!mask3) && ( (mMassDaughter<Daughter.SumDaughterMass) || (m[6]<0.f)) );
819  SetMassConstraint(m,mV,mJ2,Daughter.fMassHypo, mask3);
820  SetMassConstraint(m,mV,mJ2,Daughter.SumDaughterMass, mask4);
821 
822  float_v mDJ[7][7];
823 
824  for(Int_t i=0; i<7; i++) {
825  for(Int_t j=0; j<7; j++) {
826  mDJ[i][j] = 0;
827  for(Int_t k=0; k<7; k++) {
828  mDJ[i][j] += mDf[i][k]*mJ1[j][k];
829  }
830  }
831  }
832 
833  for(Int_t i=0; i<7; ++i){
834  for(Int_t j=0; j<7; ++j){
835  mDf[i][j]=0;
836  for(Int_t l=0; l<7; l++){
837  mDf[i][j] += mJ2[i][l]*mDJ[l][j];
838  }
839  }
840  }
841 
842  //* Add the daughter momentum to the particle momentum
843 
844  fP[ 3] += m[ 3];
845  fP[ 4] += m[ 4];
846  fP[ 5] += m[ 5];
847  fP[ 6] += m[ 6];
848 
849  fC[ 9] += mV[ 9];
850  fC[13] += mV[13];
851  fC[14] += mV[14];
852  fC[18] += mV[18];
853  fC[19] += mV[19];
854  fC[20] += mV[20];
855  fC[24] += mV[24];
856  fC[25] += mV[25];
857  fC[26] += mV[26];
858  fC[27] += mV[27];
859 
860  fC[6 ] += mDf[3][0]; fC[7 ] += mDf[3][1]; fC[8 ] += mDf[3][2];
861  fC[10] += mDf[4][0]; fC[11] += mDf[4][1]; fC[12] += mDf[4][2];
862  fC[15] += mDf[5][0]; fC[16] += mDf[5][1]; fC[17] += mDf[5][2];
863  fC[21] += mDf[6][0]; fC[22] += mDf[6][1]; fC[23] += mDf[6][2];
864 
865  fC[9 ] += mDf[3][3] + mDf[3][3];
866  fC[13] += mDf[4][3] + mDf[3][4]; fC[14] += mDf[4][4] + mDf[4][4];
867  fC[18] += mDf[5][3] + mDf[3][5]; fC[19] += mDf[5][4] + mDf[4][5]; fC[20] += mDf[5][5] + mDf[5][5];
868  fC[24] += mDf[6][3] + mDf[3][6]; fC[25] += mDf[6][4] + mDf[4][6]; fC[26] += mDf[6][5] + mDf[5][6]; fC[27] += mDf[6][6] + mDf[6][6];
869 
870 
871  float_v K2[3][3];
872  for(int i=0; i<3; i++)
873  {
874  for(int j=0; j<3; j++)
875  K2[i][j] = -K[j][i];
876  K2[i][i] += 1;
877  }
878 
879  float_v A[3][3];
880  for(int i=0; i<3; i++)
881  for(int j=0; j<3; j++)
882  {
883  A[i][j] = 0.f;
884  for(int k=0; k<3; k++)
885  {
886  A[i][j] += D[i][k] * K2[k][j];
887  }
888  }
889 
890  float_v M[3][3];
891  for(int i=0; i<3; i++)
892  for(int j=0; j<3; j++)
893  {
894  M[i][j] = 0.f;
895  for(int k=0; k<3; k++)
896  {
897  M[i][j] += K[i][k] * A[k][j];
898  }
899  }
900 
901  fC[0] += 2*M[0][0];
902  fC[1] += M[0][1] + M[1][0];
903  fC[2] += 2*M[1][1];
904  fC[3] += M[0][2] + M[2][0];
905  fC[4] += M[1][2] + M[2][1];
906  fC[5] += 2*M[2][2];
907 
908  //* Calculate Chi^2
909 
910  fNDF += 2;
911  fQ += Daughter.GetQ();
912  fSFromDecay = 0;
913  fChi2 += (mS[0]*zeta[0] + mS[1]*zeta[1] + mS[3]*zeta[2])*zeta[0]
914  + (mS[1]*zeta[0] + mS[2]*zeta[1] + mS[4]*zeta[2])*zeta[1]
915  + (mS[3]*zeta[0] + mS[4]*zeta[1] + mS[5]*zeta[2])*zeta[2];
916  }
917 }
918 
920 {
926  AddDaughterId( Daughter.Id() );
927 
928  float_v m[8], mV[36];
929 
930  float_v D[3][3];
931  GetMeasurement(Daughter, m, mV, D);
932 
933  float_v mS[6]= { fC[0]+mV[0],
934  fC[1]+mV[1], fC[2]+mV[2],
935  fC[3]+mV[3], fC[4]+mV[4], fC[5]+mV[5] };
936  InvertCholetsky3(mS);
937  //* Residual (measured - estimated)
938 
939  float_v zeta[3] = { m[0]-fP[0], m[1]-fP[1], m[2]-fP[2] };
940 
941  float_v K[3][3];
942  for(int i=0; i<3; i++)
943  for(int j=0; j<3; j++)
944  {
945  K[i][j] = 0;
946  for(int k=0; k<3; k++)
947  K[i][j] += fC[IJ(i,k)] * mS[IJ(k,j)];
948  }
949 
950  //* CHt = CH' - D'
951  float_v mCHt0[7], mCHt1[7], mCHt2[7];
952 
953  mCHt0[0]=fC[ 0] ; mCHt1[0]=fC[ 1] ; mCHt2[0]=fC[ 3] ;
954  mCHt0[1]=fC[ 1] ; mCHt1[1]=fC[ 2] ; mCHt2[1]=fC[ 4] ;
955  mCHt0[2]=fC[ 3] ; mCHt1[2]=fC[ 4] ; mCHt2[2]=fC[ 5] ;
956  mCHt0[3]=fC[ 6]+mV[ 6]; mCHt1[3]=fC[ 7]+mV[ 7]; mCHt2[3]=fC[ 8]+mV[ 8];
957  mCHt0[4]=fC[10]+mV[10]; mCHt1[4]=fC[11]+mV[11]; mCHt2[4]=fC[12]+mV[12];
958  mCHt0[5]=fC[15]+mV[15]; mCHt1[5]=fC[16]+mV[16]; mCHt2[5]=fC[17]+mV[17];
959  mCHt0[6]=fC[21]+mV[21]; mCHt1[6]=fC[22]+mV[22]; mCHt2[6]=fC[23]+mV[23];
960 
961  //* Kalman gain K = mCH'*S
962 
963  float_v k0[7], k1[7], k2[7];
964 
965  for(Int_t i=0;i<7;++i){
966  k0[i] = mCHt0[i]*mS[0] + mCHt1[i]*mS[1] + mCHt2[i]*mS[3];
967  k1[i] = mCHt0[i]*mS[1] + mCHt1[i]*mS[2] + mCHt2[i]*mS[4];
968  k2[i] = mCHt0[i]*mS[3] + mCHt1[i]*mS[4] + mCHt2[i]*mS[5];
969  }
970 
971  //* Add the daughter momentum to the particle momentum
972 
973  fP[ 3] -= m[ 3];
974  fP[ 4] -= m[ 4];
975  fP[ 5] -= m[ 5];
976  fP[ 6] -= m[ 6];
977 
978  fC[ 9] += mV[ 9];
979  fC[13] += mV[13];
980  fC[14] += mV[14];
981  fC[18] += mV[18];
982  fC[19] += mV[19];
983  fC[20] += mV[20];
984  fC[24] += mV[24];
985  fC[25] += mV[25];
986  fC[26] += mV[26];
987  fC[27] += mV[27];
988 
989 
990  //* New estimation of the vertex position r += K*zeta
991 
992  for(Int_t i=0;i<7;++i)
993  fP[i] = fP[i] + k0[i]*zeta[0] + k1[i]*zeta[1] + k2[i]*zeta[2];
994 
995  //* New covariance matrix C -= K*(mCH')'
996 
997  for(Int_t i=0, k=0;i<7;++i){
998  for(Int_t j=0;j<=i;++j,++k){
999  fC[k] = fC[k] - (k0[i]*mCHt0[j] + k1[i]*mCHt1[j] + k2[i]*mCHt2[j] );
1000  }
1001  }
1002 
1003  float_v K2[3][3];
1004  for(int i=0; i<3; i++)
1005  {
1006  for(int j=0; j<3; j++)
1007  K2[i][j] = -K[j][i];
1008  K2[i][i] += 1;
1009  }
1010 
1011  float_v A[3][3];
1012  for(int i=0; i<3; i++)
1013  for(int j=0; j<3; j++)
1014  {
1015  A[i][j] = 0;
1016  for(int k=0; k<3; k++)
1017  {
1018  A[i][j] += D[i][k] * K2[k][j];
1019  }
1020  }
1021 
1022  float_v M[3][3];
1023  for(int i=0; i<3; i++)
1024  for(int j=0; j<3; j++)
1025  {
1026  M[i][j] = 0;
1027  for(int k=0; k<3; k++)
1028  {
1029  M[i][j] += K[i][k] * A[k][j];
1030  }
1031  }
1032 
1033  fC[0] += 2.f*M[0][0];
1034  fC[1] += M[0][1] + M[1][0];
1035  fC[2] += 2.f*M[1][1];
1036  fC[3] += M[0][2] + M[2][0];
1037  fC[4] += M[1][2] + M[2][1];
1038  fC[5] += 2.f*M[2][2];
1039 
1040  //* Calculate Chi^2
1041 
1042  fNDF += 2;
1043  fQ -= Daughter.GetQ();
1044  fSFromDecay = 0;
1045  fChi2 += (mS[0]*zeta[0] + mS[1]*zeta[1] + mS[3]*zeta[2])*zeta[0]
1046  + (mS[1]*zeta[0] + mS[2]*zeta[1] + mS[4]*zeta[2])*zeta[1]
1047  + (mS[3]*zeta[0] + mS[4]*zeta[1] + mS[5]*zeta[2])*zeta[2];
1048 }
1049 
1051 {
1060  const float_v *m = Vtx.fP, *mV = Vtx.fC;
1061 
1062  float_v decayPoint[3] = {fP[0], fP[1], fP[2]};
1063  float_v decayPointCov[6] = { fC[0], fC[1], fC[2], fC[3], fC[4], fC[5] };
1064 
1065  float_v D[6][6];
1066  for(int iD1=0; iD1<6; iD1++)
1067  for(int iD2=0; iD2<6; iD2++)
1068  D[iD1][iD2] = 0.f;
1069 
1070  Bool_t noS = ( fC[35][0]<=0.f ); // no decay length allowed
1071 
1072  if( noS ){
1074  fP[7] = 0.f;
1075  fC[28] = fC[29] = fC[30] = fC[31] = fC[32] = fC[33] = fC[34] = fC[35] = 0.f;
1076  }
1077  else
1078  {
1079  float_v dsdr[6] = {0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
1080  float_v dS = GetDStoPoint(Vtx.fP, dsdr);
1081 
1082  float_v dsdp[6] = {-dsdr[0], -dsdr[1], -dsdr[2], 0.f, 0.f, 0.f };
1083 
1084  float_v F[36], F1[36];
1085  for(int i2=0; i2<36; i2++)
1086  {
1087  F[i2] = 0.f;
1088  F1[i2] = 0.f;
1089  }
1090  Transport( dS, dsdr, fP, fC, dsdp, F, F1 );
1091 
1092  float_v CTmp[36];
1093  MultQSQt(F1, mV, CTmp, 6);
1094 
1095  for(int iC=0; iC<21; iC++)
1096  fC[iC] += CTmp[iC];
1097 
1098  for(int i=0; i<6; i++)
1099  for(int j=0; j<3; j++)
1100  {
1101  D[i][j] = 0;
1102  for(int k=0; k<3; k++)
1103  {
1104  D[i][j] += mV[IJ(j,k)] * F1[i*6+k];
1105  }
1106  }
1107  }
1108 
1109  float_v mS[6] = { fC[0] + mV[0],
1110  fC[1] + mV[1], fC[2] + mV[2],
1111  fC[3] + mV[3], fC[4] + mV[4], fC[5] + mV[5] };
1112  InvertCholetsky3(mS);
1113 
1114  float_v res[3] = { m[0] - X(), m[1] - Y(), m[2] - Z() };
1115 
1116  float_v K[3][6];
1117  for(int i=0; i<3; i++)
1118  for(int j=0; j<3; j++)
1119  {
1120  K[i][j] = 0;
1121  for(int k=0; k<3; k++)
1122  K[i][j] += fC[IJ(i,k)] * mS[IJ(k,j)];
1123  }
1124 
1125  float_v mCHt0[7], mCHt1[7], mCHt2[7];
1126  mCHt0[0]=fC[ 0]; mCHt1[0]=fC[ 1]; mCHt2[0]=fC[ 3];
1127  mCHt0[1]=fC[ 1]; mCHt1[1]=fC[ 2]; mCHt2[1]=fC[ 4];
1128  mCHt0[2]=fC[ 3]; mCHt1[2]=fC[ 4]; mCHt2[2]=fC[ 5];
1129  mCHt0[3]=fC[ 6]; mCHt1[3]=fC[ 7]; mCHt2[3]=fC[ 8];
1130  mCHt0[4]=fC[10]; mCHt1[4]=fC[11]; mCHt2[4]=fC[12];
1131  mCHt0[5]=fC[15]; mCHt1[5]=fC[16]; mCHt2[5]=fC[17];
1132  mCHt0[6]=fC[21]; mCHt1[6]=fC[22]; mCHt2[6]=fC[23];
1133 
1134  float_v k0[7], k1[7], k2[7];
1135  for(Int_t i=0;i<7;++i){
1136  k0[i] = mCHt0[i]*mS[0] + mCHt1[i]*mS[1] + mCHt2[i]*mS[3];
1137  k1[i] = mCHt0[i]*mS[1] + mCHt1[i]*mS[2] + mCHt2[i]*mS[4];
1138  k2[i] = mCHt0[i]*mS[3] + mCHt1[i]*mS[4] + mCHt2[i]*mS[5];
1139  }
1140 
1141  for(Int_t i=0;i<7;++i)
1142  fP[i] = fP[i] + k0[i]*res[0] + k1[i]*res[1] + k2[i]*res[2];
1143 
1144  for(Int_t i=0, k=0;i<7;++i){
1145  for(Int_t j=0;j<=i;++j,++k){
1146  fC[k] = fC[k] - (k0[i]*mCHt0[j] + k1[i]*mCHt1[j] + k2[i]*mCHt2[j] );
1147  }
1148  }
1149 
1150  float_v K2[3][3];
1151  for(int i=0; i<3; i++)
1152  {
1153  for(int j=0; j<3; j++)
1154  K2[i][j] = -K[j][i];
1155  K2[i][i] += 1;
1156  }
1157 
1158  float_v A[3][3];
1159  for(int i=0; i<3; i++)
1160  for(int j=0; j<3; j++)
1161  {
1162  A[i][j] = 0;
1163  for(int k=0; k<3; k++)
1164  {
1165  A[i][j] += D[k][i] * K2[k][j];
1166  }
1167  }
1168 
1169  float_v M[3][3];
1170  for(int i=0; i<3; i++)
1171  for(int j=0; j<3; j++)
1172  {
1173  M[i][j] = 0;
1174  for(int k=0; k<3; k++)
1175  {
1176  M[i][j] += K[i][k] * A[k][j];
1177  }
1178  }
1179 
1180  fC[0] += 2*M[0][0];
1181  fC[1] += M[0][1] + M[1][0];
1182  fC[2] += 2*M[1][1];
1183  fC[3] += M[0][2] + M[2][0];
1184  fC[4] += M[1][2] + M[2][1];
1185  fC[5] += 2*M[2][2];
1186 
1187  fChi2 += (mS[0]*res[0] + mS[1]*res[1] + mS[3]*res[2])*res[0]
1188  + (mS[1]*res[0] + mS[2]*res[1] + mS[4]*res[2])*res[1]
1189  + (mS[3]*res[0] + mS[4]*res[1] + mS[5]*res[2])*res[2];
1190  fNDF += 2;
1191 
1192  if( noS ){
1193  fP[7] = 0;
1194  fC[28] = fC[29] = fC[30] = fC[31] = fC[32] = fC[33] = fC[34] = fC[35] = 0.f;
1195  fSFromDecay = 0;
1196  }
1197  else
1198  {
1199  float_v dsdr[6] = {0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
1200  fP[7] = GetDStoPoint(decayPoint, dsdr);
1201 
1202  float_v dsdp[6] = {-dsdr[0], -dsdr[1], -dsdr[2], 0.f, 0.f, 0.f};
1203 
1204  float_v F[36], F1[36];
1205  for(int i2=0; i2<36; i2++)
1206  {
1207  F[i2] = 0.f;
1208  F1[i2] = 0.f;
1209  }
1210  float_v tmpP[8], tmpC[36];
1211  Transport( fP[7], dsdr, tmpP, tmpC, dsdp, F, F1 );
1212 
1213  fC[35] = 0.f;
1214  for(int iDsDr=0; iDsDr<6; iDsDr++)
1215  {
1216  float_v dsdrC = 0.f, dsdpV = 0.f;
1217 
1218  for(int k=0; k<6; k++)
1219  dsdrC += dsdr[k] * fC[IJ(k,iDsDr)]; // (-dsdr[k])*fC[k,j]
1220 
1221  fC[iDsDr+28] = dsdrC;
1222  fC[35] += dsdrC*dsdr[iDsDr] ;
1223  if(iDsDr < 3)
1224  {
1225  for(int k=0; k<3; k++)
1226  dsdpV -= dsdr[k] * decayPointCov[IJ(k,iDsDr)]; //
1227  fC[35] -= dsdpV*dsdr[iDsDr];
1228  }
1229  }
1230  fSFromDecay = -fP[7];
1231  }
1232 
1233  fAtProductionVertex = 1;
1234 }
1235 
1236 void KFParticleBaseSIMD::SetMassConstraint( float_v *mP, float_v *mC, float_v mJ[7][7], float_v mass, float_m mask )
1237 {
1246  const float_v energy2 = mP[6]*mP[6], p2 = mP[3]*mP[3]+mP[4]*mP[4]+mP[5]*mP[5], mass2 = mass*mass;
1247 
1248  const float_v a = energy2 - p2 + 2.f*mass2;
1249  const float_v b = -2.f*(energy2 + p2);
1250  const float_v c = energy2 - p2 - mass2;
1251 
1252  float_v lambda(Vc::Zero);
1253  lambda(abs(b) > float_v(1.e-10f)) = -c/b ;
1254 
1255  float_v d = 4.f*energy2*p2 - mass2*(energy2-p2-2.f*mass2);
1256  float_m qMask = (d >= 0.f) && (abs(a) > (1.e-10f)) ;
1257  lambda(qMask) = (energy2 + p2 - sqrt(d))/a;
1258 
1259  lambda(mP[6]<0.f) = -1000000.f;
1260 
1261  Int_t iIter=0;
1262  for(iIter=0; iIter<100; iIter++)
1263  {
1264  float_v lambda2 = lambda*lambda;
1265  float_v lambda4 = lambda2*lambda2;
1266 
1267 // float_v lambda0 = lambda;
1268 
1269  float_v f = -mass2 * lambda4 + a*lambda2 + b*lambda + c;
1270  float_v df = -4.f*mass2 * lambda2*lambda + 2.f*a*lambda + b;
1271  lambda(abs(df) > float_v(1.e-10f)) -= f/df;
1272 // if(TMath::Abs(lambda0 - lambda) < 1.e-8) break;
1273  }
1274 
1275  const float_v lpi = 1.f/(1.f + lambda);
1276  const float_v lmi = 1.f/(1.f - lambda);
1277  const float_v lp2i = lpi*lpi;
1278  const float_v lm2i = lmi*lmi;
1279 
1280  float_v lambda2 = lambda*lambda;
1281 
1282  float_v dfl = -4.f*mass2 * lambda2*lambda + 2.f*a*lambda + b;
1283  float_v dfx[4] = {0.f,0.f,0.f,0.f};
1284  dfx[0] = -2.f*(1.f + lambda)*(1.f + lambda)*mP[3];
1285  dfx[1] = -2.f*(1.f + lambda)*(1.f + lambda)*mP[4];
1286  dfx[2] = -2.f*(1.f + lambda)*(1.f + lambda)*mP[5];
1287  dfx[3] = 2.f*(1.f - lambda)*(1.f - lambda)*mP[6];
1288  float_v dlx[4] = {1.f,1.f,1.f,1.f};
1289 
1290  for(int i=0; i<4; i++)
1291  dlx[i](abs(dfl) > float_v(1.e-10f)) = -dfx[i] / dfl;
1292 
1293  float_v dxx[4] = {mP[3]*lm2i, mP[4]*lm2i, mP[5]*lm2i, -mP[6]*lp2i};
1294 
1295  for(Int_t i=0; i<7; i++)
1296  for(Int_t j=0; j<7; j++)
1297  mJ[i][j]=0;
1298  mJ[0][0] = 1.;
1299  mJ[1][1] = 1.;
1300  mJ[2][2] = 1.;
1301 
1302  for(Int_t i=3; i<7; i++)
1303  for(Int_t j=3; j<7; j++)
1304  mJ[i][j] = dlx[j-3]*dxx[i-3];
1305 
1306  for(Int_t i=3; i<6; i++)
1307  mJ[i][i] += lmi;
1308  mJ[6][6] += lpi;
1309 
1310  float_v mCJ[7][7];
1311 
1312  for(Int_t i=0; i<7; i++) {
1313  for(Int_t j=0; j<7; j++) {
1314  mCJ[i][j] = 0;
1315  for(Int_t k=0; k<7; k++) {
1316  mCJ[i][j] += mC[IJ(i,k)]*mJ[j][k];
1317  }
1318  }
1319  }
1320 
1321  for(Int_t i=0; i<7; ++i){
1322  for(Int_t j=0; j<=i; ++j){
1323  mC[IJ(i,j)](mask) = 0.f;
1324  for(Int_t l=0; l<7; l++){
1325  mC[IJ(i,j)](mask) += mJ[i][l]*mCJ[l][j];
1326  }
1327  }
1328  }
1329 
1330  mP[3](mask) *= lmi;
1331  mP[4](mask) *= lmi;
1332  mP[5](mask) *= lmi;
1333  mP[6](mask) *= lpi;
1334 }
1335 
1337 {
1342  const float_v& px = fP[3];
1343  const float_v& py = fP[4];
1344  const float_v& pz = fP[5];
1345  const float_v& energy = fP[6];
1346 
1347  const float_v residual = (energy*energy - px*px - py*py - pz*pz) - mass*mass;
1348  const float_v dm2 = float_v(4.f) * ( fC[9]*px*px + fC[14]*py*py + fC[20]*pz*pz + fC[27]*energy*energy +
1349  float_v(2.f) * ( (fC[13]*py + fC[18]*pz - fC[24]*energy)*px + (fC[19]*pz - fC[25]*energy)*py - fC[26]*pz*energy) );
1350  const float_v dChi2 = residual*residual / dm2;
1351  fChi2 += dChi2;
1352  fNDF += 1;
1353 
1354  float_v mJ[7][7];
1355  SetMassConstraint( fP, fC, mJ, mass, float_m(true) );
1356  fMassHypo = mass;
1358 }
1359 
1360 void KFParticleBaseSIMD::SetMassConstraint( float_v Mass, float_v SigmaMass )
1361 {
1368  fMassHypo = Mass;
1369  SumDaughterMass = Mass;
1370 
1371  float_v m2 = Mass*Mass; // measurement, weighted by Mass
1372  float_v s2 = m2*SigmaMass*SigmaMass; // sigma^2
1373 
1374  float_v p2 = fP[3]*fP[3] + fP[4]*fP[4] + fP[5]*fP[5];
1375  float_v e0 = sqrt(m2+p2);
1376 
1377  float_v mH[8];
1378  mH[0] = mH[1] = mH[2] = 0.f;
1379  mH[3] = -2.f*fP[3];
1380  mH[4] = -2.f*fP[4];
1381  mH[5] = -2.f*fP[5];
1382  mH[6] = 2.f*fP[6];//e0;
1383  mH[7] = 0.f;
1384 
1385  float_v zeta = e0*e0 - e0*fP[6];
1386  zeta = m2 - (fP[6]*fP[6]-p2);
1387 
1388  float_v mCHt[8], s2_est=0.f;
1389  for( Int_t i=0; i<8; ++i ){
1390  mCHt[i] = 0.0f;
1391  for (Int_t j=0;j<8;++j) mCHt[i] += Cij(i,j)*mH[j];
1392  s2_est += mH[i]*mCHt[i];
1393  }
1394 
1395 //TODO add protection
1396 // if( s2_est<1.e-20 ) return; // calculated mass error is already 0,
1397  // the particle can not be constrained on mass
1398 
1399  float_v w2 = 1.f/( s2 + s2_est );
1400  fChi2 += zeta*zeta*w2;
1401  fNDF += 1;
1402  for( Int_t i=0, ii=0; i<8; ++i ){
1403  float_v ki = mCHt[i]*w2;
1404  fP[i]+= ki*zeta;
1405  for(Int_t j=0;j<=i;++j) fC[ii++] -= ki*mCHt[j];
1406  }
1407 }
1408 
1409 
1411 {
1417 
1418  float_v h[8];
1419  h[0] = h[1] = h[2] = h[3] = h[4] = h[5] = h[6] = 0.f;
1420  h[7] = 1.f;
1421 
1422  float_v zeta = 0.f - fP[7];
1423  for(Int_t i=0;i<8;++i) zeta -= h[i]*(fP[i]-fP[i]);
1424 
1425  float_v s = fC[35];
1426 // if( s>1.e-20 ) //TODO add protection
1427  {
1428  s = 1.f/s;
1429  fChi2 += zeta*zeta*s;
1430  fNDF += 1;
1431  for( Int_t i=0, ii=0; i<7; ++i ){
1432  float_v ki = fC[28+i]*s;
1433  fP[i]+= ki*zeta;
1434  for(Int_t j=0;j<=i;++j) fC[ii++] -= ki*fC[28+j];
1435  }
1436  }
1437  fP[7] = 0.f;
1438  fC[28] = fC[29] = fC[30] = fC[31] = fC[32] = fC[33] = fC[34] = fC[35] = 0.f;
1439 }
1440 
1441 void KFParticleBaseSIMD::Construct( const KFParticleBaseSIMD* vDaughters[], Int_t nDaughters,
1442  const KFParticleBaseSIMD *Parent, Float_t Mass )
1443 {
1455  const int maxIter = 1;
1456  for( Int_t iter=0; iter<maxIter; iter++ ){
1457 
1458  CleanDaughtersId();
1459  SetNDaughters(nDaughters);
1460 
1461  fAtProductionVertex = 0;
1462  fSFromDecay = float_v(Vc::Zero);
1463  SumDaughterMass = float_v(Vc::Zero);
1464 
1465  for(Int_t i=0;i<36;++i) fC[i]=0.;
1466  fC[35] = 1.;
1467 
1468  fNDF = -3;
1469  fChi2 = 0.;
1470  fQ = 0;
1471 
1472  for( Int_t itr =0; itr<nDaughters; itr++ ){
1473  AddDaughter( *vDaughters[itr] );
1474  }
1475  }
1476 
1477  if( Mass>=0 ) SetMassConstraint( Mass );
1478  if( Parent ) SetProductionVertex( *Parent );
1479 }
1480 
1482 {
1484  float_v dsdr[6] = {float_v(Vc::Zero),float_v(Vc::Zero),float_v(Vc::Zero),float_v(Vc::Zero),float_v(Vc::Zero),float_v(Vc::Zero)};
1485  if( !( (abs(fSFromDecay) < float_v(1.e-6f)).isFull() ) ) TransportToDS( -fSFromDecay, dsdr );
1486  fAtProductionVertex = 0;
1487 }
1488 
1490 {
1492  float_v dsdr[6] = {float_v(Vc::Zero),float_v(Vc::Zero),float_v(Vc::Zero),float_v(Vc::Zero),float_v(Vc::Zero),float_v(Vc::Zero)};
1493  if( !( (abs(fSFromDecay + fP[7]) < float_v(1.e-6f)).isFull() ) ) TransportToDS( -fSFromDecay-fP[7], dsdr );
1494  fAtProductionVertex = 1;
1495 }
1496 
1497 
1498 void KFParticleBaseSIMD::TransportToDS( float_v dS, const float_v* dsdr )
1499 {
1507  Transport( dS, dsdr, fP, fC );
1508  fSFromDecay+= dS;
1509 }
1510 
1511 void KFParticleBaseSIMD::TransportToDSLine( float_v dS, const float_v* dsdr )
1512 {
1521  TransportLine( dS, dsdr, fP, fC );
1522  fSFromDecay+= dS;
1523 }
1524 
1525 void KFParticleBaseSIMD::GetDistanceToVertexLine( const KFParticleBaseSIMD &Vertex, float_v &l, float_v &dl, float_m *isParticleFromVertex ) const
1526 {
1537  float_v c[6] = {Vertex.fC[0]+fC[0], Vertex.fC[1]+fC[1], Vertex.fC[2]+fC[2],
1538  Vertex.fC[3]+fC[3], Vertex.fC[4]+fC[4], Vertex.fC[5]+fC[5]};
1539 
1540  float_v dx = (Vertex.fP[0]-fP[0]);
1541  float_v dy = (Vertex.fP[1]-fP[1]);
1542  float_v dz = (Vertex.fP[2]-fP[2]);
1543 
1544  l = sqrt( dx*dx + dy*dy + dz*dz );
1545  dl = c[0]*dx*dx + c[2]*dy*dy + c[5]*dz*dz + 2*(c[1]*dx*dy + c[3]*dx*dz + c[4]*dy*dz);
1546 
1547  l(abs(l) < 1.e-8f) = 1.e-8f;
1548  float_m ok = float_v(Vc::Zero)<=dl;
1549  dl(!ok) = 1.e8f;
1550  dl(ok) = sqrt( dl )/l;
1551 
1552  if(isParticleFromVertex)
1553  {
1554  *isParticleFromVertex = ok && ( l<float_v(3.f*dl) );
1555  float_v cosV = dx*fP[3] + dy*fP[4] + dz*fP[5];
1556 // float_v dCos = dy*dy*fC[14] + dz*dz*fC[20] + dx*dx*fC[9] + 2*dz*fC[15]*fP[3] + c[0]* fP[3]*fP[3] +
1557 // 2*dz*fC[16]* fP[4] + 2 *c[1] *fP[3] *fP[4] + c[2] *fP[4]*fP[4] + 2 *dz *fC[17]* fP[5] +
1558 // 2*c[3] *fP[3]* fP[5] + 2 *c[4] *fP[4] *fP[5] + c[5]*fP[5] *fP[5] +
1559 // 2*dy *(dz *fC[19] + fC[10] *fP[3] + fC[11]* fP[4] + fC[12]* fP[5]) +
1560 // 2*dx *(dy *fC[13] + dz *fC[18] + fC[6]* fP[3] + fC[7]* fP[4] + fC[8]* fP[5]);
1561 // ok = float_v(float_v(0)<dCos);
1562 // dCos = float_v(ok & ( dCos ));
1563 // dCos = sqrt(dCos);
1564  *isParticleFromVertex = (*isParticleFromVertex) || (!(*isParticleFromVertex) && (cosV<0.f) ) ;
1565  }
1566 }
1567 
1568 float_v KFParticleBaseSIMD::GetDStoPointLine( const float_v xyz[3], float_v dsdr[6] ) const
1569 {
1579  float_v p2 = fP[3]*fP[3] + fP[4]*fP[4] + fP[5]*fP[5];
1580  p2( p2 < float_v(1.e-4f) ) = float_v(1.f);
1581 
1582  const float_v& a = fP[3]*(xyz[0]-fP[0]) + fP[4]*(xyz[1]-fP[1]) + fP[5]*(xyz[2]-fP[2]);
1583  dsdr[0] = -fP[3]/p2;
1584  dsdr[1] = -fP[4]/p2;
1585  dsdr[2] = -fP[5]/p2;
1586  dsdr[3] = ((xyz[0]-fP[0])*p2 - 2.f* fP[3]*a)/(p2*p2);
1587  dsdr[4] = ((xyz[1]-fP[1])*p2 - 2.f* fP[4]*a)/(p2*p2);
1588  dsdr[5] = ((xyz[2]-fP[2])*p2 - 2.f* fP[5]*a)/(p2*p2);
1589 
1590  return a/p2;
1591 }
1592 
1593 float_v KFParticleBaseSIMD::GetDStoPointBz( float_v B, const float_v xyz[3], float_v dsdr[6], const float_v* param) const
1594 {
1607  if(!param)
1608  param = fP;
1609  //* Get dS to a certain space point for Bz field
1610 
1611  const float_v& x = param[0];
1612  const float_v& y = param[1];
1613  const float_v& z = param[2];
1614  const float_v& px = param[3];
1615  const float_v& py = param[4];
1616  const float_v& pz = param[5];
1617 
1618  const float_v kCLight = 0.000299792458f;
1619  float_v bq = B*simd_cast<float_v>(fQ)*kCLight;
1620  float_v pt2 = param[3]*param[3] + param[4]*param[4];
1621  float_v p2 = pt2 + param[5]*param[5];
1622 
1623  float_v dx = xyz[0] - param[0];
1624  float_v dy = xyz[1] - param[1];
1625  float_v dz = xyz[2] - param[2];
1626  float_v a = dx*param[3]+dy*param[4];
1627  float_v dS(Vc::Zero);
1628 
1629  float_v abq = bq*a;
1630 
1631  const float_v LocalSmall = 1.e-8f;
1632  float_m mask = ( abs(bq)<LocalSmall );
1633  if( !( (!mask).isFull() ) )
1634  {
1635  dS(mask && float_m(p2>1.e-4f)) = (a + dz*pz)/p2;
1636 
1637  dsdr[0](mask && float_m(p2>1.e-4f)) = -px/p2;
1638  dsdr[1](mask && float_m(p2>1.e-4f)) = -py/p2;
1639  dsdr[2](mask && float_m(p2>1.e-4f)) = -pz/p2;
1640  dsdr[3](mask && float_m(p2>1.e-4f)) = (dx*p2 - 2.f* px *(a + dz *pz))/(p2*p2);
1641  dsdr[4](mask && float_m(p2>1.e-4f)) = (dy*p2 - 2.f* py *(a + dz *pz))/(p2*p2);
1642  dsdr[5](mask && float_m(p2>1.e-4f)) = (dz*p2 - 2.f* pz *(a + dz *pz))/(p2*p2);
1643 
1644  if(mask.isFull())
1645  return dS;
1646  }
1647 
1648  dS(!mask) = KFPMath::ATan2( abq, pt2 + bq*(dy*px -dx*py) )/bq;
1649 
1650  float_v bs= bq*dS;
1651 
1652  float_v s = sin(bs), c = cos(bs);
1653 
1654  bq(abs(bq) < LocalSmall) = LocalSmall;
1655  float_v bbq = bq*(dx*py - dy*px) - pt2;
1656 
1657  dsdr[0](!mask) = (px*bbq - py*abq)/(abq*abq + bbq*bbq);
1658  dsdr[1](!mask) = (px*abq + py*bbq)/(abq*abq + bbq*bbq);
1659  dsdr[2](!mask) = 0;
1660  dsdr[3](!mask) = -(dx*bbq + dy*abq + 2.f*px*a)/(abq*abq + bbq*bbq);
1661  dsdr[4](!mask) = (dx*abq - dy*bbq - 2.f*py*a)/(abq*abq + bbq*bbq);
1662  dsdr[5](!mask) = 0;
1663 
1664  float_v sz(Vc::Zero);
1665  float_v cCoeff = (bbq*c - abq*s) - pz*pz ;
1666  sz(abs(cCoeff) > 1.e-8f) = (dS*pz - dz)*pz / cCoeff;
1667 
1668  float_v dcdr[6] = {0.f,0.f,0.f,0.f,0.f,0.f};
1669  dcdr[0] = -bq*py*c - bbq*s*bq*dsdr[0] + px*bq*s - abq*c*bq*dsdr[0];
1670  dcdr[1] = bq*px*c - bbq*s*bq*dsdr[1] + py*bq*s - abq*c*bq*dsdr[1];
1671  dcdr[3] = (-bq*dy-2*px)*c - bbq*s*bq*dsdr[3] - dx*bq*s - abq*c*bq*dsdr[3];
1672  dcdr[4] = ( bq*dx-2*py)*c - bbq*s*bq*dsdr[4] - dy*bq*s - abq*c*bq*dsdr[4];
1673  dcdr[5] = -2*pz;
1674 
1675  for(int iP=0; iP<6; iP++)
1676  dsdr[iP](!mask) += pz*pz/cCoeff*dsdr[iP] - sz/cCoeff*dcdr[iP];
1677 
1678  dsdr[2](!mask) += pz/cCoeff;
1679  dsdr[5](!mask) += (2.f*pz*dS - dz)/cCoeff;
1680 
1681  dS(!mask) += sz;
1682 
1683  bs= bq*dS;
1684  s = sin(bs);
1685  c = cos(bs);
1686 
1687  float_v sB, cB;
1688  const float_v kOvSqr6 = 1.f/sqrt(float_v(6.f));
1689 
1690  sB(LocalSmall < abs(bs)) = s/bq;
1691  sB(LocalSmall >= abs(bs)) = (1.f-bs*kOvSqr6)*(1.f+bs*kOvSqr6)*dS;
1692  cB(LocalSmall < abs(bs)) = (1.f-c)/bq;
1693  cB(LocalSmall >= abs(bs)) = .5f*sB*bs;
1694 
1695  float_v p[5];
1696  p[0] = x + sB*px + cB*py;
1697  p[1] = y - cB*px + sB*py;
1698  p[2] = z + dS*pz;
1699  p[3] = c*px + s*py;
1700  p[4] = -s*px + c*py;
1701 
1702  dx = xyz[0] - p[0];
1703  dy = xyz[1] - p[1];
1704  dz = xyz[2] - p[2];
1705  a = dx*p[3]+dy*p[4] + dz*pz;
1706 
1707  abq = bq*a;
1708 
1709  dS(!mask) += KFPMath::ATan2( abq, p2 + bq*(dy*p[3] -dx*p[4]) )/bq;
1710 
1711  return dS;
1712 }
1713 
1714 float_v KFParticleBaseSIMD::GetDStoPointBy( float_v By, const float_v xyz[3], float_v dsdr[6] )
1715  const
1716 {
1730  const float_v param[6] = { fP[0], -fP[2], fP[1], fP[3], -fP[5], fP[4] };
1731  const float_v point[3] = { xyz[0], -xyz[2], xyz[1] };
1732 
1733  float_v dsdrBz[6] = {0.f,0.f,0.f,0.f,0.f,0.f};
1734 
1735  const float_v dS = GetDStoPointBz(By, point, dsdrBz, param);
1736  dsdr[0] = dsdrBz[0];
1737  dsdr[1] = dsdrBz[2];
1738  dsdr[2] = -dsdrBz[1];
1739  dsdr[3] = dsdrBz[3];
1740  dsdr[4] = dsdrBz[5];
1741  dsdr[5] = -dsdrBz[4];
1742 
1743  return dS;
1744 }
1745 
1746 float_v KFParticleBaseSIMD::GetDStoPointCBM( const float_v xyz[3], float_v dsdr[6] ) const
1747 {
1759  float_v dS(Vc::Zero);
1760 
1761  float_v fld[3];
1762  GetFieldValue( fP, fld );
1763  dS = GetDStoPointBy( fld[1],xyz, dsdr );
1764 
1765  dS(abs(dS)>1.E3f) = 0.f;
1766 
1767  return dS;
1768 }
1769 
1771  float_v dS[2], float_v dsdr[4][6], const float_v* param1, const float_v* param2 ) const
1772 {
1796  if(!param1)
1797  {
1798  param1 = fP;
1799  param2 = p.fP;
1800  }
1801 
1802  //* Get dS to another particle for Bz field
1803  const float_v kOvSqr6 = 1.f/sqrt(float_v(6.f));
1804  const float_v kCLight = 0.000299792458f;
1805 
1806  //in XY plane
1807  //first root
1808  const float_v& bq1 = B*simd_cast<float_v>(fQ)*kCLight;
1809  const float_v& bq2 = B*simd_cast<float_v>(p.fQ)*kCLight;
1810 
1811  const float_m& isStraight1 = abs(bq1) < float_v(1.e-8f);
1812  const float_m& isStraight2 = abs(bq2) < float_v(1.e-8f);
1813 
1814  if( isStraight1.isFull() && isStraight2.isFull() )
1815  {
1816  GetDStoParticleLine(p, dS, dsdr);
1817  return;
1818  }
1819 
1820  const float_v& px1 = param1[3];
1821  const float_v& py1 = param1[4];
1822  const float_v& pz1 = param1[5];
1823 
1824  const float_v& px2 = param2[3];
1825  const float_v& py2 = param2[4];
1826  const float_v& pz2 = param2[5];
1827 
1828  const float_v& pt12 = px1*px1 + py1*py1;
1829  const float_v& pt22 = px2*px2 + py2*py2;
1830 
1831  const float_v& x01 = param1[0];
1832  const float_v& y01 = param1[1];
1833  const float_v& z01 = param1[2];
1834 
1835  const float_v& x02 = param2[0];
1836  const float_v& y02 = param2[1];
1837  const float_v& z02 = param2[2];
1838 
1839  float_v dS1[2] = {0.f, 0.f}, dS2[2]={0.f, 0.f};
1840 
1841  const float_v& dx0 = (x01 - x02);
1842  const float_v& dy0 = (y01 - y02);
1843  const float_v& dr02 = dx0*dx0 + dy0*dy0;
1844  const float_v& drp1 = dx0*px1 + dy0*py1;
1845  const float_v& dxyp1 = dx0*py1 - dy0*px1;
1846  const float_v& drp2 = dx0*px2 + dy0*py2;
1847  const float_v& dxyp2 = dx0*py2 - dy0*px2;
1848  const float_v& p1p2 = px1*px2 + py1*py2;
1849  const float_v& dp1p2 = px1*py2 - px2*py1;
1850 
1851  const float_v& k11 = (bq2*drp1 - dp1p2);
1852  const float_v& k21 = (bq1*(bq2*dxyp1 - p1p2) + bq2*pt12);
1853  const float_v& k12 = ((bq1*drp2 - dp1p2));
1854  const float_v& k22 = (bq2*(bq1*dxyp2 + p1p2) - bq1*pt22);
1855 
1856  const float_v& kp = (dxyp1*bq2 - dxyp2*bq1 - p1p2);
1857  const float_v& kd = dr02/2.f*bq1*bq2 + kp;
1858  const float_v& c1 = -(bq1*kd + pt12*bq2);
1859  const float_v& c2 = bq2*kd + pt22*bq1;
1860 
1861  float_v d1 = pt12*pt22 - kd*kd;
1862  d1(d1 < float_v(Vc::Zero)) = float_v(Vc::Zero);
1863  d1 = sqrt( d1 );
1864  float_v d2 = pt12*pt22 - kd*kd;
1865  d2(d2 < float_v(Vc::Zero)) = float_v(Vc::Zero);
1866  d2 = sqrt( d2 );
1867 
1868  float_v dS1dR1[2][6];
1869  float_v dS2dR2[2][6];
1870 
1871  float_v dS1dR2[2][6];
1872  float_v dS2dR1[2][6];
1873 
1874  float_v dk11dr1[6] = {bq2*px1, bq2*py1, 0.f, bq2*dx0 - py2, bq2*dy0 + px2, 0.f};
1875  float_v dk11dr2[6] = {-bq2*px1, -bq2*py1, 0.f, py1, -px1, 0.f};
1876  float_v dk12dr1[6] = {bq1*px2, bq1*py2, 0.f, -py2, px2, 0.f};
1877  float_v dk12dr2[6] = {-bq1*px2, -bq1*py2, 0.f, bq1*dx0 + py1, bq1*dy0 - px1, 0.f};
1878  float_v dk21dr1[6] = {bq1*bq2*py1, -bq1*bq2*px1, 0.f, 2.f*bq2*px1 + bq1*(-(bq2*dy0) - px2), 2.f*bq2*py1 + bq1*(bq2*dx0 - py2), 0.f};
1879  float_v dk21dr2[6] = {-(bq1*bq2*py1), bq1*bq2*px1, 0.f, -(bq1*px1), -(bq1*py1), 0.f};
1880  float_v dk22dr1[6] = {bq1*bq2*py2, -(bq1*bq2*px2), 0.f, bq2*px2, bq2*py2, 0.f};
1881  float_v dk22dr2[6] = {-(bq1*bq2*py2), bq1*bq2*px2, 0.f, bq2*(-(bq1*dy0) + px1) - 2.f*bq1*px2, bq2*(bq1*dx0 + py1) - 2.f*bq1*py2, 0.f};
1882 
1883  float_v dkddr1[6] = {bq1*bq2*dx0 + bq2*py1 - bq1*py2, bq1*bq2*dy0 - bq2*px1 + bq1*px2, 0.f, -bq2*dy0 - px2, bq2*dx0 - py2, 0.f};
1884  float_v dkddr2[6] = {-bq1*bq2*dx0 - bq2*py1 + bq1*py2, -bq1*bq2*dy0 + bq2*px1 - bq1*px2, 0.f, bq1*dy0 - px1, -bq1*dx0 - py1, 0.f};
1885 
1886  float_v dc1dr1[6] = {-(bq1*(bq1*bq2*dx0 + bq2*py1 - bq1*py2)), -(bq1*(bq1*bq2*dy0 - bq2*px1 + bq1*px2)), 0.f, -2.f*bq2*px1 - bq1*(-(bq2*dy0) - px2), -2.f*bq2*py1 - bq1*(bq2*dx0 - py2), 0.f};
1887  float_v dc1dr2[6] = {-(bq1*(-(bq1*bq2*dx0) - bq2*py1 + bq1*py2)), -(bq1*(-(bq1*bq2*dy0) + bq2*px1 - bq1*px2)), 0.f, -(bq1*(bq1*dy0 - px1)), -(bq1*(-(bq1*dx0) - py1)), 0.f};
1888 
1889  float_v dc2dr1[6] = {bq2*(bq1*bq2*dx0 + bq2*py1 - bq1*py2), bq2*(bq1*bq2*dy0 - bq2*px1 + bq1*px2), 0.f, bq2*(-(bq2*dy0) - px2), bq2*(bq2*dx0 - py2), 0.f};
1890  float_v dc2dr2[6] = {bq2*(-(bq1*bq2*dx0) - bq2*py1 + bq1*py2), bq2*(-(bq1*bq2*dy0) + bq2*px1 - bq1*px2), 0.f, bq2*(bq1*dy0 - px1) + 2.f*bq1*px2, bq2*(-(bq1*dx0) - py1) + 2.f*bq1*py2, 0.f};
1891 
1892  float_v dd1dr1[6] = {0.f,0.f,0.f,0.f,0.f,0.f};
1893  float_v dd1dr2[6] = {0.f,0.f,0.f,0.f,0.f,0.f};
1894  {
1895  for(int i=0; i<6; i++)
1896  {
1897  dd1dr1[i](d1>0.f) = -kd/d1*dkddr1[i];
1898  dd1dr2[i](d1>0.f) = -kd/d1*dkddr2[i];
1899  }
1900  dd1dr1[3](d1>0.f) += px1/d1*pt22; dd1dr1[4](d1>0.f) += py1/d1*pt22;
1901  dd1dr2[3](d1>0.f) += px2/d1*pt12; dd1dr2[4](d1>0.f) += py2/d1*pt12;
1902  }
1903 
1904  // find two points of closest approach in XY plane
1905  if( ! ( (!isStraight1).isEmpty() ) )
1906  {
1907  dS1[0](!isStraight1) = KFPMath::ATan2( (bq1*k11*c1 + k21*d1*bq1), (bq1*k11*d1*bq1 - k21*c1) )/bq1;
1908  dS1[1](!isStraight1) = KFPMath::ATan2( (bq1*k11*c1 - k21*d1*bq1), (-bq1*k11*d1*bq1 - k21*c1) )/bq1;
1909 
1910  float_v a = bq1*(k11*c1 + k21*d1);
1911  float_v b = bq1*k11*d1*bq1 - k21*c1;
1912  for(int iP=0; iP<6; iP++)
1913  {
1914  const float_v dadr1 = bq1*( dk11dr1[iP]*c1 + k11*dc1dr1[iP] + dk21dr1[iP]*d1 + k21*dd1dr1[iP] );
1915  const float_v dadr2 = bq1*( dk11dr2[iP]*c1 + k11*dc1dr2[iP] + dk21dr2[iP]*d1 + k21*dd1dr2[iP] );
1916  const float_v dbdr1 = bq1*bq1*( dk11dr1[iP]*d1 + k11*dd1dr1[iP] ) - ( dk21dr1[iP]*c1 + k21*dc1dr1[iP] );
1917  const float_v dbdr2 = bq1*bq1*( dk11dr2[iP]*d1 + k11*dd1dr2[iP] ) - ( dk21dr2[iP]*c1 + k21*dc1dr2[iP] );
1918 
1919  dS1dR1[0][iP](!isStraight1) = 1/bq1 * 1/( b*b + a*a ) * ( dadr1*b - dbdr1*a );
1920  dS1dR2[0][iP](!isStraight1) = 1/bq1 * 1/( b*b + a*a ) * ( dadr2*b - dbdr2*a );
1921  }
1922 
1923  a = bq1*(k11*c1 - k21*d1);
1924  b = -bq1*k11*d1*bq1 - k21*c1;
1925  for(int iP=0; iP<6; iP++)
1926  {
1927  const float_v dadr1 = bq1*( dk11dr1[iP]*c1 + k11*dc1dr1[iP] - (dk21dr1[iP]*d1 + k21*dd1dr1[iP]) );
1928  const float_v dadr2 = bq1*( dk11dr2[iP]*c1 + k11*dc1dr2[iP] - (dk21dr2[iP]*d1 + k21*dd1dr2[iP]) );
1929  const float_v dbdr1 = -bq1*bq1*( dk11dr1[iP]*d1 + k11*dd1dr1[iP] ) - ( dk21dr1[iP]*c1 + k21*dc1dr1[iP] );
1930  const float_v dbdr2 = -bq1*bq1*( dk11dr2[iP]*d1 + k11*dd1dr2[iP] ) - ( dk21dr2[iP]*c1 + k21*dc1dr2[iP] );
1931 
1932  dS1dR1[1][iP](!isStraight1) = 1/bq1 * 1/( b*b + a*a ) * ( dadr1*b - dbdr1*a );
1933  dS1dR2[1][iP](!isStraight1) = 1/bq1 * 1/( b*b + a*a ) * ( dadr2*b - dbdr2*a );
1934  }
1935  }
1936  if( ! ( (!isStraight2).isEmpty() ) )
1937  {
1938  dS2[0](!isStraight2) = KFPMath::ATan2( (bq2*k12*c2 + k22*d2*bq2), (bq2*k12*d2*bq2 - k22*c2) )/bq2;
1939  dS2[1](!isStraight2) = KFPMath::ATan2( (bq2*k12*c2 - k22*d2*bq2), (-bq2*k12*d2*bq2 - k22*c2) )/bq2;
1940 
1941  float_v a = bq2*(k12*c2 + k22*d2);
1942  float_v b = bq2*k12*d2*bq2 - k22*c2;
1943  for(int iP=0; iP<6; iP++)
1944  {
1945  const float_v dadr1 = bq2*( dk12dr1[iP]*c2 + k12*dc2dr1[iP] + dk22dr1[iP]*d1 + k22*dd1dr1[iP] );
1946  const float_v dadr2 = bq2*( dk12dr2[iP]*c2 + k12*dc2dr2[iP] + dk22dr2[iP]*d1 + k22*dd1dr2[iP] );
1947  const float_v dbdr1 = bq2*bq2*( dk12dr1[iP]*d1 + k12*dd1dr1[iP] ) - (dk22dr1[iP]*c2 + k22*dc2dr1[iP]);
1948  const float_v dbdr2 = bq2*bq2*( dk12dr2[iP]*d1 + k12*dd1dr2[iP] ) - (dk22dr2[iP]*c2 + k22*dc2dr2[iP]);
1949 
1950  dS2dR1[0][iP](!isStraight2) = 1/bq2 * 1/( b*b + a*a ) * ( dadr1*b - dbdr1*a );
1951  dS2dR2[0][iP](!isStraight2) = 1/bq2 * 1/( b*b + a*a ) * ( dadr2*b - dbdr2*a );
1952  }
1953 
1954  a = bq2*(k12*c2 - k22*d2);
1955  b = -bq2*k12*d2*bq2 - k22*c2;
1956  for(int iP=0; iP<6; iP++)
1957  {
1958  const float_v dadr1 = bq2*( dk12dr1[iP]*c2 + k12*dc2dr1[iP] - (dk22dr1[iP]*d1 + k22*dd1dr1[iP]) );
1959  const float_v dadr2 = bq2*( dk12dr2[iP]*c2 + k12*dc2dr2[iP] - (dk22dr2[iP]*d1 + k22*dd1dr2[iP]) );
1960  const float_v dbdr1 = -bq2*bq2*( dk12dr1[iP]*d1 + k12*dd1dr1[iP] ) - (dk22dr1[iP]*c2 + k22*dc2dr1[iP]);
1961  const float_v dbdr2 = -bq2*bq2*( dk12dr2[iP]*d1 + k12*dd1dr2[iP] ) - (dk22dr2[iP]*c2 + k22*dc2dr2[iP]);
1962 
1963  dS2dR1[1][iP](!isStraight2) = 1/bq2 * 1/( b*b + a*a ) * ( dadr1*b - dbdr1*a );
1964  dS2dR2[1][iP](!isStraight2) = 1/bq2 * 1/( b*b + a*a ) * ( dadr2*b - dbdr2*a );
1965  }
1966  }
1967  if( ! ( isStraight1.isEmpty() ) )
1968  {
1969  dS1[0](isStraight1 && (pt12>float_v(Vc::Zero)) ) = (k11*c1 + k21*d1)/(- k21*c1);
1970  dS1[1](isStraight1 && (pt12>float_v(Vc::Zero)) ) = (k11*c1 - k21*d1)/(- k21*c1);
1971 
1972  float_v a = k11*c1 + k21*d1;
1973  float_v b = -k21*c1;
1974 
1975  for(int iP=0; iP<6; iP++)
1976  {
1977  const float_v dadr1 = ( dk11dr1[iP]*c1 + k11*dc1dr1[iP] + dk21dr1[iP]*d1 + k21*dd1dr1[iP] );
1978  const float_v dadr2 = ( dk11dr2[iP]*c1 + k11*dc1dr2[iP] + dk21dr2[iP]*d1 + k21*dd1dr2[iP] );
1979  const float_v dbdr1 = -( dk21dr1[iP]*c1 + k21*dc1dr1[iP] );
1980  const float_v dbdr2 = -( dk21dr2[iP]*c1 + k21*dc1dr2[iP] );
1981 
1982  dS1dR1[0][iP](isStraight1 && (pt12>float_v(Vc::Zero)) ) = dadr1/b - dbdr1*a/(b*b) ;
1983  dS1dR2[0][iP](isStraight1 && (pt12>float_v(Vc::Zero)) ) = dadr2/b - dbdr2*a/(b*b) ;
1984  }
1985 
1986  a = k11*c1 - k21*d1;
1987  for(int iP=0; iP<6; iP++)
1988  {
1989  const float_v dadr1 = ( dk11dr1[iP]*c1 + k11*dc1dr1[iP] - dk21dr1[iP]*d1 - k21*dd1dr1[iP] );
1990  const float_v dadr2 = ( dk11dr2[iP]*c1 + k11*dc1dr2[iP] - dk21dr2[iP]*d1 - k21*dd1dr2[iP] );
1991  const float_v dbdr1 = -( dk21dr1[iP]*c1 + k21*dc1dr1[iP] );
1992  const float_v dbdr2 = -( dk21dr2[iP]*c1 + k21*dc1dr2[iP] );
1993 
1994  dS1dR1[1][iP](isStraight1 && (pt12>float_v(Vc::Zero)) ) = dadr1/b - dbdr1*a/(b*b) ;
1995  dS1dR2[1][iP](isStraight1 && (pt12>float_v(Vc::Zero)) ) = dadr2/b - dbdr2*a/(b*b) ;
1996  }
1997  }
1998  if( ! ( isStraight2.isEmpty() ) )
1999  {
2000  dS2[0](isStraight2 && (pt22>float_v(Vc::Zero)) ) = (k12*c2 + k22*d2)/(- k22*c2);
2001  dS2[1](isStraight2 && (pt22>float_v(Vc::Zero)) ) = (k12*c2 - k22*d2)/(- k22*c2);
2002 
2003  float_v a = k12*c2 + k22*d1;
2004  float_v b = -k22*c2;
2005 
2006  for(int iP=0; iP<6; iP++)
2007  {
2008  const float_v dadr1 = ( dk12dr1[iP]*c2 + k12*dc2dr1[iP] + dk22dr1[iP]*d1 + k22*dd1dr1[iP] );
2009  const float_v dadr2 = ( dk12dr2[iP]*c2 + k12*dc2dr2[iP] + dk22dr2[iP]*d1 + k22*dd1dr2[iP] );
2010  const float_v dbdr1 = -( dk22dr1[iP]*c2 + k22*dc2dr1[iP] );
2011  const float_v dbdr2 = -( dk22dr2[iP]*c2 + k22*dc2dr2[iP] );
2012 
2013  dS2dR1[0][iP](isStraight2 && (pt22>float_v(Vc::Zero)) ) = dadr1/b - dbdr1*a/(b*b) ;
2014  dS2dR2[0][iP](isStraight2 && (pt22>float_v(Vc::Zero)) ) = dadr2/b - dbdr2*a/(b*b) ;
2015  }
2016 
2017  a = k12*c2 - k22*d1;
2018  for(int iP=0; iP<6; iP++)
2019  {
2020  const float_v dadr1 = ( dk12dr1[iP]*c2 + k12*dc2dr1[iP] - dk22dr1[iP]*d1 - k22*dd1dr1[iP] );
2021  const float_v dadr2 = ( dk12dr2[iP]*c2 + k12*dc2dr2[iP] - dk22dr2[iP]*d1 - k22*dd1dr2[iP] );
2022  const float_v dbdr1 = -( dk22dr1[iP]*c2 + k22*dc2dr1[iP] );
2023  const float_v dbdr2 = -( dk22dr2[iP]*c2 + k22*dc2dr2[iP] );
2024 
2025  dS2dR1[1][iP](isStraight2 && (pt22>float_v(Vc::Zero)) ) = dadr1/b - dbdr1*a/(b*b) ;
2026  dS2dR2[1][iP](isStraight2 && (pt22>float_v(Vc::Zero)) ) = dadr2/b - dbdr2*a/(b*b) ;
2027  }
2028  }
2029 
2030  //select a point which is close to the primary vertex (with the smallest r)
2031 
2032  float_v dr2[2];
2033  for(int iP = 0; iP<2; iP++)
2034  {
2035  const float_v& bs1 = bq1*dS1[iP];
2036  const float_v& bs2 = bq2*dS2[iP];
2037  float_v sss = sin(bs1), ccc = cos(bs1);
2038 
2039  const float_m& bs1Big = abs(bs1) > 1.e-8f;
2040  const float_m& bs2Big = abs(bs2) > 1.e-8f;
2041 
2042  float_v sB(Vc::Zero), cB(Vc::Zero);
2043  sB(bs1Big) = sss/bq1;
2044  sB(!bs1Big) = ((1.f-bs1*kOvSqr6)*(1.f+bs1*kOvSqr6)*dS1[iP]);
2045  cB(bs1Big) = (1.f-ccc)/bq1;
2046  cB(!bs1Big) = .5f*sB*bs1;
2047 
2048  const float_v& x1 = param1[0] + sB*px1 + cB*py1;
2049  const float_v& y1 = param1[1] - cB*px1 + sB*py1;
2050  const float_v& z1 = param1[2] + dS1[iP]*param1[5];
2051 
2052  sss = sin(bs2); ccc = cos(bs2);
2053 
2054  sB(bs2Big) = sss/bq2;
2055  sB(!bs2Big) = ((1.f-bs2*kOvSqr6)*(1.f+bs2*kOvSqr6)*dS2[iP]);
2056  cB(bs2Big) = (1.f-ccc)/bq2;
2057  cB(!bs2Big) = .5f*sB*bs2;
2058 
2059  const float_v& x2 = param2[0] + sB*px2 + cB*py2;
2060  const float_v& y2 = param2[1] - cB*px2 + sB*py2;
2061  const float_v& z2 = param2[2] + dS2[iP]*param2[5];
2062 
2063  float_v dx = (x1-x2);
2064  float_v dy = (y1-y2);
2065  float_v dz = (z1-z2);
2066 
2067  dr2[iP] = dx*dx + dy*dy + dz*dz;
2068  }
2069 
2070  float_v pointParam[2][8];
2071  float_v pointCov[2][36];
2072 
2073  float_v dsdrM1[6] = {0.f,0.f,0.f,0.f,0.f,0.f};
2074  for(int iP=0; iP<6; iP++)
2075  dsdrM1[iP] = (dS1dR1[0][iP] + dS1dR1[1][iP])/2.f;
2076  Transport((dS1[0] + dS1[1]) / 2.f, dsdrM1, pointParam[0], pointCov[0]);
2077  float_v dsdrM2[6] = {0.f,0.f,0.f,0.f,0.f,0.f};
2078  for(int iP=0; iP<6; iP++)
2079  dsdrM2[iP] = (dS2dR2[0][iP] + dS2dR2[1][iP])/2.f;
2080  p.Transport((dS2[0] + dS2[1]) / 2.f, dsdrM2, pointParam[1], pointCov[1]);
2081 
2082  const float_v drPoint[3] = { pointParam[0][0] - pointParam[1][0], pointParam[0][1] - pointParam[1][1], pointParam[0][2] - pointParam[1][2] } ;
2083  const float_v covPoint[6] = { pointCov[0][0] + pointCov[1][0],
2084  pointCov[0][1] + pointCov[1][1],
2085  pointCov[0][2] + pointCov[1][2],
2086  pointCov[0][3] + pointCov[1][3],
2087  pointCov[0][4] + pointCov[1][4],
2088  pointCov[0][5] + pointCov[1][5] };
2089  float_v dr2Points = drPoint[0]*drPoint[0] + drPoint[1]*drPoint[1] + drPoint[2]*drPoint[2];
2090  float_v dr2PointCov = drPoint[0]*drPoint[0]*covPoint[0] + drPoint[1]*drPoint[1]*covPoint[2] + drPoint[2]*drPoint[2]*covPoint[5] +
2091  2.f*( drPoint[0]*drPoint[1]*covPoint[1] + drPoint[0]*drPoint[2]*covPoint[3] + drPoint[1]*drPoint[2]*covPoint[4] );
2092  const float_m isMiddlePoint = (dr2Points*dr2Points < 25.f*dr2PointCov);// && (abs(fPDG)==int_v(11)) && (abs(p.fPDG)==int_v(11));
2093  const float_m isFirstRoot = (dr2[0] < dr2[1]) & (!isMiddlePoint);
2094 
2095  // if(!(isFirstRoot.isEmpty()))
2096  {
2097  dS[0](isFirstRoot) = dS1[0];
2098  dS[1](isFirstRoot) = dS2[0];
2099 
2100  for(int iP=0; iP<6; iP++)
2101  {
2102  dsdr[0][iP](isFirstRoot) = dS1dR1[0][iP];
2103  dsdr[1][iP](isFirstRoot) = dS1dR2[0][iP];
2104  dsdr[2][iP](isFirstRoot) = dS2dR1[0][iP];
2105  dsdr[3][iP](isFirstRoot) = dS2dR2[0][iP];
2106  }
2107  }
2108  // if( !( (!isFirstRoot).isEmpty() ) )
2109  {
2110  dS[0](!isFirstRoot) = dS1[1];
2111  dS[1](!isFirstRoot) = dS2[1];
2112 
2113  for(int iP=0; iP<6; iP++)
2114  {
2115  dsdr[0][iP](!isFirstRoot) = dS1dR1[1][iP];
2116  dsdr[1][iP](!isFirstRoot) = dS1dR2[1][iP];
2117  dsdr[2][iP](!isFirstRoot) = dS2dR1[1][iP];
2118  dsdr[3][iP](!isFirstRoot) = dS2dR2[1][iP];
2119  }
2120  }
2121  // if(!(isMiddlePoint.isEmpty()))
2122  {
2123  dS[0](isMiddlePoint) = (dS1[0] + dS1[1]) / 2.f;
2124  dS[1](isMiddlePoint) = (dS2[0] + dS2[1]) / 2.f;
2125 
2126  for(int iP=0; iP<6; iP++)
2127  {
2128  dsdr[0][iP](isMiddlePoint) = (dS1dR1[1][iP] + dS1dR1[0][iP])/2.f;
2129  dsdr[1][iP](isMiddlePoint) = (dS1dR2[1][iP] + dS1dR2[0][iP])/2.f;
2130  dsdr[2][iP](isMiddlePoint) = (dS2dR1[1][iP] + dS2dR1[0][iP])/2.f;
2131  dsdr[3][iP](isMiddlePoint) = (dS2dR2[1][iP] + dS2dR2[0][iP])/2.f;
2132  }
2133  }
2134 
2135  //find correct parts of helices
2136  int_v n1(Vc::Zero);
2137  int_v n2(Vc::Zero);
2138 // float_v dzMin = abs( (z01-z02) + dS[0]*pz1 - dS[1]*pz2 );
2139  const float_v pi2(6.283185307f);
2140 
2141 // //TODO optimise for loops for neutral particles
2142 // const float_v& i1Float = -bq1/pi2*(z01/pz1+dS[0]);
2143 // for(int di1=-1; di1<=1; di1++)
2144 // {
2145 // int_v i1(Vc::Zero);
2146 // i1(int_m(!isStraight1)) = int_v(i1Float) + di1;
2147 //
2148 // const float_v& i2Float = ( ((z01-z02) + (dS[0]+pi2*i1/bq1)*pz1)/pz2 - dS[1]) * bq2/pi2;
2149 // for(int di2 = -1; di2<=1; di2++)
2150 // {
2151 // int_v i2(Vc::Zero);
2152 // i2(int_m(!isStraight2)) = int_v(i2Float) + di2;
2153 //
2154 // const float_v& z1 = z01 + (dS[0]+pi2*i1/bq1)*pz1;
2155 // const float_v& z2 = z02 + (dS[1]+pi2*i2/bq2)*pz2;
2156 // const float_v& dz = abs( z1-z2 );
2157 //
2158 // n1(int_m(dz < dzMin)) = i1;
2159 // n2(int_m(dz < dzMin)) = i2;
2160 // dzMin(dz < dzMin) = dz;
2161 // }
2162 // }
2163 //
2164 // dS[0](!isStraight1) += float_v(n1)*pi2/bq1;
2165 // dS[1](!isStraight2) += float_v(n2)*pi2/bq2;
2166 
2167  //add a correction on z-coordinate
2168 #if 0
2169  {
2170  const float_v& bs1 = bq1*dS[0];
2171  const float_v& bs2 = bq2*dS[1];
2172 
2173  float_v sss = KFPMath::Sin(bs1), ccc = KFPMath::Cos(bs1);
2174  const float_v& xr1 = sss*px1 - ccc*py1;
2175  const float_v& yr1 = ccc*px1 + sss*py1;
2176 
2177  float_v sss1 = KFPMath::Sin(bs2), ccc1 = KFPMath::Cos(bs2);
2178  const float_v& xr2 = sss1*px2 - ccc1*py2;
2179  const float_v& yr2 = ccc1*px2 + sss1*py2;
2180 
2181  const float_v& br = xr1*xr2 + yr1*yr2;
2182  const float_v& dx0mod = dx0*bq1*bq2 + py1*bq2 - py2*bq1;
2183  const float_v& dy0mod = dy0*bq1*bq2 - px1*bq2 + px2*bq1;
2184  const float_v& ar1 = dx0mod*xr1 + dy0mod*yr1;
2185  const float_v& ar2 = dx0mod*xr2 + dy0mod*yr2;
2186  const float_v& cz = (z01 - z02) + dS[0]*pz1 - dS[1]*pz2;
2187 
2188  float_v kz11 = - ar1 + bq1*br + bq2*pz1*pz1;
2189  float_v kz12 = -bq2*(br+pz1*pz2);
2190  float_v kz21 = bq1*(br+pz1*pz2);
2191  float_v kz22 = - ar2 - bq2*br - bq1*pz2*pz2;
2192 
2193  kz11(isStraight2) = pz1*pz1 + (px1*px1+py1*py1)*ccc + bq1*( (px2*dS[1] - dx0)*xr1 + (py2*dS[1] - dy0)*yr1 );
2194  kz12(isStraight2) = -(br + pz1*pz2);
2195  kz21(isStraight1) = (br + pz1*pz2);
2196  kz22(isStraight1) = -pz2*pz2 - (px2*px2+py2*py2)*ccc1 - bq2*( (px1*dS[0] + dx0)*xr2 + (py1*dS[0] + dy0)*yr2 );
2197 
2198  const float_v& delta = kz11*kz22 - kz12*kz21;
2199  float_v sz1(Vc::Zero);
2200  float_v sz2(Vc::Zero);
2201 
2202  {
2203  float_v aaa1 = -cz*(pz1*bq2*kz22 - pz2*bq1*kz12);
2204  float_v aaa2 = -cz*(pz2*bq1*kz11 - pz1*bq2*kz21);
2205 
2206  aaa1(isStraight2) = -cz*(pz1*kz22 - pz2*bq1*kz12);
2207  aaa2(isStraight2) = -cz*(pz2*bq1*kz11 - pz1*kz21);
2208 
2209  aaa1(isStraight1) = -cz*(pz1*bq2*kz22 - pz2*kz12);
2210  aaa2(isStraight1) = -cz*(pz2*kz11 - pz1*bq2*kz21);
2211 
2212  sz1( abs(delta) > 1.e-16f ) = aaa1 / delta;
2213  sz2( abs(delta) > 1.e-16f ) = aaa2 / delta;
2214 
2215  float_v dkz11dr1[6] = {-(bq1*bq2*xr1), -(bq1*bq2*yr1), 0.f,
2216  -ccc*dy0mod - dx0mod*sss + bq2*yr1 + bq1*(sss*xr2 + ccc*yr2),
2217  ccc*dx0mod - dy0mod*sss - bq2*xr1 + bq1*(-ccc*xr2 + sss*yr2), 2.f*bq2*pz1};
2218  dkz11dr1[0](isStraight2) = -bq1*xr1;
2219  dkz11dr1[1](isStraight2) = -bq1*yr1;
2220  dkz11dr1[3](isStraight2) = 2.f*ccc*px1 + bq1*(sss*(dS[1]*px2 - x01 + x02) + ccc*(dS[1]*py2 - y01 + y02));
2221  dkz11dr1[4](isStraight2) = 2.f*ccc*py1 + bq1*(-(ccc*(dS[1]*px2 - x01 + x02)) + sss*(dS[1]*py2 - y01 + y02));
2222  dkz11dr1[5](isStraight2) = 2.f*pz1;
2223  float_v dkz11dr2[6] = {bq1*bq2*xr1, bq1*bq2*yr1, 0.f, -bq1*yr1 + bq1*(sss1*xr1 + ccc1*yr1), bq1*xr1 + bq1*(-ccc1*xr1 + sss1*yr1), 0.f};
2224  dkz11dr2[0](isStraight2) = bq1*xr1;
2225  dkz11dr2[1](isStraight2) = bq1*yr1;
2226  dkz11dr2[3](isStraight2) = bq1*dS[1]*xr1;
2227  dkz11dr2[4](isStraight2) = bq1*dS[1]*yr1;
2228 
2229  float_v dkz12dr1[6] = {0.f, 0.f, 0.f, -bq2*(sss*xr2 + ccc*yr2), -bq2*(-ccc*xr2 + sss*yr2), -bq2*pz2};
2230  dkz12dr1[3](isStraight2) = -(sss*xr2 + ccc*yr2);
2231  dkz12dr1[4](isStraight2) = -(-ccc*xr2 + sss*yr2);
2232  dkz12dr1[5](isStraight2) = -pz2;
2233  float_v dkz12dr2[6] = {0.f, 0.f, 0.f, -bq2*(sss1*xr1 + ccc1*yr1), -bq2*(-ccc1*xr1 + sss1*yr1), -bq2*pz1};
2234  dkz12dr2[3](isStraight2) = -(sss1*xr1 + ccc1*yr1);
2235  dkz12dr2[4](isStraight2) = -(-ccc1*xr1 + sss1*yr1);
2236  dkz12dr2[5](isStraight2) = -pz1;
2237  float_v dkz21dr1[6] = {0.f, 0.f, 0.f, bq1*(sss*xr2 + ccc*yr2), bq1*(-ccc*xr2 + sss*yr2), bq1*pz2};
2238  dkz21dr1[3](isStraight1) = yr2;
2239  dkz21dr1[4](isStraight1) = -xr2;
2240  dkz21dr1[5](isStraight1) = pz2;
2241  float_v dkz21dr2[6] = {0.f, 0.f, 0.f, bq1*(sss1*xr1 + ccc1*yr1), bq1*(-ccc1*xr1 + sss1*yr1), bq1*pz1};
2242  dkz21dr2[3](isStraight1) = (sss1*xr1 + ccc1*yr1);
2243  dkz21dr2[4](isStraight1) = (-ccc1*xr1 + sss1*yr1);
2244  dkz21dr2[5](isStraight1) = pz1;
2245  float_v dkz22dr1[6] = {-bq1*bq2*xr2, -bq1*bq2*yr2, 0.f, bq2*yr2 - bq2*(sss*xr2 + ccc*yr2), -bq2*xr2 - bq2*(-ccc*xr2 + sss*yr2), 0.f};
2246  dkz22dr1[0](isStraight1) = -(bq2*xr2);
2247  dkz22dr1[1](isStraight1) = -(bq2*yr2);
2248  dkz22dr1[3](isStraight1) = -(bq2*dS[0]*xr2);
2249  dkz22dr1[4](isStraight1) = -(bq2*dS[0]*yr2);
2250  float_v dkz22dr2[6] = {bq1*bq2*xr2, bq1*bq2*yr2, 0.f,
2251  -ccc1*dy0mod - dx0mod*sss1 - bq2*(sss1*xr1 + ccc1*yr1) - bq1*yr2,
2252  ccc1*dx0mod - dy0mod*sss1 + bq1*xr2 - bq2*(-ccc1*xr1 + sss1*yr1), -2.f*bq1*pz2};
2253  dkz22dr2[0](isStraight1) = bq2*xr2;
2254  dkz22dr2[1](isStraight1) = bq2*yr2;
2255  dkz22dr2[3](isStraight1) = -2.f*ccc1*px2 - bq2*(ccc1*(dy0 + dS[0]*py1) + (dx0 + dS[0]*px1)*sss1);
2256  dkz22dr2[4](isStraight1) = -2.f*ccc1*py2 - bq2*(-(ccc1*(dx0 + dS[0]*px1)) + (dy0 + dS[0]*py1)*sss1);
2257  dkz22dr2[5](isStraight1) = -2.f*pz2;
2258 
2259  float_v dczdr1[6] = {0.f, 0.f, 1.f, 0.f, 0.f, dS[0]};
2260  float_v dczdr2[6] = {0.f, 0.f, -1.f, 0.f, 0.f, -dS[1]};
2261 
2262  float_v daaa1dr1[6];
2263  float_v daaa1dr2[6];
2264  float_v daaa2dr2[6];
2265  float_v daaa2dr1[6];
2266  float_v dDeltadr1[6];
2267  float_v dDeltadr2[6];
2268  for(int iP=0; iP<6; iP++)
2269  {
2270  daaa1dr1[iP] = -( dczdr1[iP]*(pz1*bq2*kz22 - pz2*bq1*kz12) + cz*( bq2*pz1*dkz22dr1[iP] - bq1*pz2*dkz12dr1[iP] ) );
2271  daaa1dr2[iP] = -( dczdr2[iP]*(pz1*bq2*kz22 - pz2*bq1*kz12) + cz*( bq2*pz1*dkz22dr2[iP] - bq1*pz2*dkz12dr2[iP] ) );
2272 
2273  daaa2dr2[iP] = -( dczdr2[iP]*(pz2*bq1*kz11 - pz1*bq2*kz21) + cz*( bq1*pz2*dkz11dr2[iP] - bq2*pz1*dkz21dr2[iP] ) );
2274  daaa2dr1[iP] = -( dczdr1[iP]*(pz2*bq1*kz11 - pz1*bq2*kz21) + cz*( bq1*pz2*dkz11dr1[iP] - bq2*pz1*dkz21dr1[iP] ) );
2275 
2276  dDeltadr1[iP] = kz11*dkz22dr1[iP] + dkz11dr1[iP]*kz11 - kz12*dkz21dr1[iP] - dkz12dr1[iP]*kz21;
2277  dDeltadr2[iP] = kz11*dkz22dr2[iP] + dkz11dr2[iP]*kz11 - kz12*dkz21dr2[iP] - dkz12dr2[iP]*kz21;
2278  }
2279  daaa1dr1[5] -= cz*bq2*kz22;
2280  daaa1dr2[5] += cz*bq1*kz12;
2281  daaa2dr2[5] -= cz*bq1*kz11;
2282  daaa2dr1[5] += cz*bq2*kz21;
2283 
2284  //derivatives by s0 and s1
2285  float_v dkz11ds0 = bq1*(dy0mod*xr1 - dx0mod*yr1 + bq1*(xr2*yr1 - xr1*yr2));
2286  dkz11ds0(isStraight2) = -(bq1*(px1*px1 + py1*py1)*sss) + bq1*(bq1*yr1*(dS[1]*px2 - x01 + x02) -bq1*xr1*(dS[1]*py2 - y01 + y02));
2287  float_v dkz11ds1 = bq1*bq2*( xr1*yr2 - xr2*yr1 );
2288  dkz11ds1(isStraight2) = bq1*(px2*xr1 + py2*yr1);
2289  float_v dkz12ds0 = bq2*bq1*( xr1*yr2 - xr2*yr1 );
2290  dkz12ds0(isStraight2) = bq1*( xr1*yr2 - xr2*yr1 );
2291  float_v dkz12ds1 = bq2*bq2*( xr2*yr1 - xr1*yr2 );
2292  dkz12ds1(isStraight2) = 0;
2293  float_v dkz21ds0 = bq1*bq1*( xr2*yr1 - xr1*yr2 );
2294  dkz21ds0(isStraight1) = 0.f;
2295  float_v dkz21ds1 = bq1*bq2*( xr1*yr2 - xr2*yr1 );
2296  dkz21ds1(isStraight1) = px1*(bq2*ccc1*py2 - bq2*px2*sss1) - py1*(bq2*ccc1*px2 + bq2*py2*sss1);
2297  float_v dkz22ds0 = bq1*bq2*( xr1*yr2 - xr2*yr1 );
2298  dkz22ds0(isStraight1) = -bq2*(px1*xr2 + py1*yr2);
2299  float_v dkz22ds1 = -bq2*( dy0mod*xr2 - dx0mod*yr2 - bq2*(xr2*yr1 - xr1*yr2) );
2300  dkz22ds1(isStraight1) = bq2*(px2*px2 + py2*py2)*sss1 - bq2*((dy0 + dS[0]*py1)*(bq2*ccc1*py2 - bq2*px2*sss1) + (dx0 + dS[0]*px1)*(bq2*ccc1*px2 + bq2*py2*sss1));
2301  const float_v dczds0 = pz1;
2302  const float_v dczds1 = -pz2;
2303  const float_v da1ds0 = -( dczds0*(pz1*bq2*kz22 - pz2*bq1*kz12) + cz*(pz1*bq2*dkz22ds0 - pz2*bq1*dkz12ds0));
2304  const float_v da1ds1 = -( dczds1*(pz1*bq2*kz22 - pz2*bq1*kz12) + cz*(pz1*bq2*dkz22ds1 - pz2*bq1*dkz12ds1));
2305  const float_v da2ds0 = -( dczds0*(pz2*bq1*kz11 - pz1*bq2*kz21) + cz*(pz2*bq1*dkz11ds0 - pz1*bq2*dkz21ds0));
2306  const float_v da2ds1 = -( dczds1*(pz2*bq1*kz11 - pz1*bq2*kz21) + cz*(pz2*bq1*dkz11ds1 - pz1*bq2*dkz21ds1));
2307  const float_v dDeltads0 = kz11*dkz22ds0 + dkz11ds0*kz11 - kz12*dkz21ds0 - dkz12ds0*kz21;
2308  const float_v dDeltads1 = kz11*dkz22ds1 + dkz11ds1*kz11 - kz12*dkz21ds1 - dkz12ds1*kz21;
2309 
2310  const float_v dsz1ds0 = da1ds0/delta - aaa1*dDeltads0/(delta*delta);
2311  const float_v dsz1ds1 = da1ds1/delta - aaa1*dDeltads1/(delta*delta);
2312  const float_v dsz2ds0 = da2ds0/delta - aaa2*dDeltads0/(delta*delta);
2313  const float_v dsz2ds1 = da2ds1/delta - aaa2*dDeltads1/(delta*delta);
2314 
2315  float_v dszdr[4][6];
2316  for(int iP=0; iP<6; iP++)
2317  {
2318  dszdr[0][iP] = dsz1ds0*dsdr[0][iP] + dsz1ds1*dsdr[2][iP];
2319  dszdr[1][iP] = dsz1ds0*dsdr[1][iP] + dsz1ds1*dsdr[3][iP];
2320  dszdr[2][iP] = dsz2ds0*dsdr[0][iP] + dsz2ds1*dsdr[2][iP];
2321  dszdr[3][iP] = dsz2ds0*dsdr[1][iP] + dsz2ds1*dsdr[3][iP];
2322  }
2323 
2324  for(int iP=0; iP<6; iP++)
2325  {
2326  dsdr[0][iP]( abs(delta) > 1.e-16f ) += daaa1dr1[iP]/delta - aaa1*dDeltadr1[iP]/(delta*delta) + dszdr[0][iP];
2327  dsdr[1][iP]( abs(delta) > 1.e-16f ) += daaa1dr2[iP]/delta - aaa1*dDeltadr2[iP]/(delta*delta) + dszdr[1][iP];
2328  dsdr[2][iP]( abs(delta) > 1.e-16f ) += daaa2dr1[iP]/delta - aaa2*dDeltadr1[iP]/(delta*delta) + dszdr[2][iP];
2329  dsdr[3][iP]( abs(delta) > 1.e-16f ) += daaa2dr2[iP]/delta - aaa2*dDeltadr2[iP]/(delta*delta) + dszdr[3][iP];
2330  }
2331  }
2332 
2333  dS[0] += sz1;
2334  dS[1] += sz2;
2335  }
2336 #endif
2337 
2338  //Line correction
2339  {
2340  const float_v& bs1 = bq1*dS[0];
2341  const float_v& bs2 = bq2*dS[1];
2342  float_v sss = sin(bs1), ccc = cos(bs1);
2343 
2344  const float_m& bs1Big = abs(bs1) > 1.e-8f;
2345  const float_m& bs2Big = abs(bs2) > 1.e-8f;
2346 
2347  float_v sB(0.f), cB(0.f);
2348  sB(bs1Big) = sss/bq1;
2349  cB(bs1Big) = (1.f-ccc)/bq1;
2350  sB(!bs1Big) = ((1.f-bs1*kOvSqr6)*(1.f+bs1*kOvSqr6)*dS[0]);
2351  cB(!bs1Big) = .5f*sB*bs1;
2352 
2353  const float_v& x1 = x01 + sB*px1 + cB*py1;
2354  const float_v& y1 = y01 - cB*px1 + sB*py1;
2355  const float_v& z1 = z01 + dS[0]*pz1;
2356  const float_v& ppx1 = ccc*px1 + sss*py1;
2357  const float_v& ppy1 = -sss*px1 + ccc*py1;
2358  const float_v& ppz1 = pz1;
2359 
2360  float_v sss1 = sin(bs2), ccc1 = cos(bs2);
2361 
2362  float_v sB1(0.f), cB1(0.f);
2363  sB1(bs2Big) = sss1/bq2;
2364  cB1(bs2Big) = (1.f-ccc1)/bq2;
2365  sB1(!bs2Big) = ((1.f-bs2*kOvSqr6)*(1.f+bs2*kOvSqr6)*dS[1]);
2366  cB1(!bs2Big) = .5f*sB1*bs2;
2367 
2368  const float_v& x2 = x02 + sB1*px2 + cB1*py2;
2369  const float_v& y2 = y02 - cB1*px2 + sB1*py2;
2370  const float_v& z2 = z02 + dS[1]*pz2;
2371  const float_v& ppx2 = ccc1*px2 + sss1*py2;
2372  const float_v& ppy2 = -sss1*px2 + ccc1*py2;
2373  const float_v& ppz2 = pz2;
2374 
2375  const float_v& p12 = ppx1*ppx1 + ppy1*ppy1 + ppz1*ppz1;
2376  const float_v& p22 = ppx2*ppx2 + ppy2*ppy2 + ppz2*ppz2;
2377  const float_v& lp1p2 = ppx1*ppx2 + ppy1*ppy2 + ppz1*ppz2;
2378 
2379  const float_v& dx = (x2 - x1);
2380  const float_v& dy = (y2 - y1);
2381  const float_v& dz = (z2 - z1);
2382 
2383  const float_v& ldrp1 = ppx1*dx + ppy1*dy + ppz1*dz;
2384  const float_v& ldrp2 = ppx2*dx + ppy2*dy + ppz2*dz;
2385 
2386  float_v detp = lp1p2*lp1p2 - p12*p22;
2387  detp(abs(detp)<1.e-4f) = 1; //TODO correct!!!
2388 
2389  //dsdr calculation
2390  const float_v a1 = ldrp2*lp1p2 - ldrp1*p22;
2391  const float_v a2 = ldrp2*p12 - ldrp1*lp1p2;
2392  const float_v lp1p2_ds0 = bq1*( ppx2*ppy1 - ppy2*ppx1);
2393  const float_v lp1p2_ds1 = bq2*( ppx1*ppy2 - ppy1*ppx2);
2394  const float_v ldrp1_ds0 = -p12 + bq1*(ppy1*dx - ppx1*dy);
2395  const float_v ldrp1_ds1 = lp1p2;
2396  const float_v ldrp2_ds0 = -lp1p2;
2397  const float_v ldrp2_ds1 = p22 + bq2*(ppy2*dx - ppx2*dy);
2398  const float_v detp_ds0 = 2.f*lp1p2*lp1p2_ds0;
2399  const float_v detp_ds1 = 2.f*lp1p2*lp1p2_ds1;
2400  const float_v a1_ds0 = ldrp2_ds0*lp1p2 + ldrp2*lp1p2_ds0 - ldrp1_ds0*p22;
2401  const float_v a1_ds1 = ldrp2_ds1*lp1p2 + ldrp2*lp1p2_ds1 - ldrp1_ds1*p22;
2402  const float_v a2_ds0 = ldrp2_ds0*p12 - ldrp1_ds0*lp1p2 - ldrp1*lp1p2_ds0;
2403  const float_v a2_ds1 = ldrp2_ds1*p12 - ldrp1_ds1*lp1p2 - ldrp1*lp1p2_ds1;
2404 
2405  const float_v dsl1ds0 = a1_ds0/detp - a1*detp_ds0/(detp*detp);
2406  const float_v dsl1ds1 = a1_ds1/detp - a1*detp_ds1/(detp*detp);
2407  const float_v dsl2ds0 = a2_ds0/detp - a2*detp_ds0/(detp*detp);
2408  const float_v dsl2ds1 = a2_ds1/detp - a2*detp_ds1/(detp*detp);
2409 
2410  float_v dsldr[4][6];
2411  for(int iP=0; iP<6; iP++)
2412  {
2413  dsldr[0][iP] = dsl1ds0*dsdr[0][iP] + dsl1ds1*dsdr[2][iP];
2414  dsldr[1][iP] = dsl1ds0*dsdr[1][iP] + dsl1ds1*dsdr[3][iP];
2415  dsldr[2][iP] = dsl2ds0*dsdr[0][iP] + dsl2ds1*dsdr[2][iP];
2416  dsldr[3][iP] = dsl2ds0*dsdr[1][iP] + dsl2ds1*dsdr[3][iP];
2417  }
2418 
2419  for(int iDS=0; iDS<4; iDS++)
2420  for(int iP=0; iP<6; iP++)
2421  dsdr[iDS][iP] += dsldr[iDS][iP];
2422 
2423  const float_v lp1p2_dr0[6] = {0.f, 0.f, 0.f, ccc*ppx2 - ppy2*sss, ccc*ppy2 + ppx2*sss, pz2};
2424  const float_v lp1p2_dr1[6] = {0.f, 0.f, 0.f, ccc1*ppx1 - ppy1*sss1, ccc1*ppy1 + ppx1*sss1, pz1};
2425  const float_v ldrp1_dr0[6] = {-ppx1, -ppy1, -pz1, cB*ppy1 - ppx1*sB + ccc*dx - sss*dy, -cB*ppx1-ppy1*sB + sss*dx + ccc*dy, -dS[0]*pz1 + dz};
2426  const float_v ldrp1_dr1[6] = { ppx1, ppy1, pz1, -cB1*ppy1 + ppx1*sB1, cB1*ppx1 + ppy1*sB1, dS[1]*pz1};
2427  const float_v ldrp2_dr0[6] = {-ppx2, -ppy2, -pz2, cB*ppy2 - ppx2*sB, -cB*ppx2-ppy2*sB, -dS[0]*pz2};
2428  const float_v ldrp2_dr1[6] = { ppx2, ppy2, pz2, -cB1*ppy2 + ppx2*sB1 + ccc1*dx- sss1*dy, cB1*ppx2 + ppy2*sB1 + sss1*dx + ccc1*dy, dz + dS[1]*pz2};
2429  const float_v p12_dr0[6] = {0.f, 0.f, 0.f, 2.f*px1, 2.f*py1, 2.f*pz1};
2430  const float_v p22_dr1[6] = {0.f, 0.f, 0.f, 2.f*px2, 2.f*py2, 2.f*pz2};
2431  float_v a1_dr0[6], a1_dr1[6], a2_dr0[6], a2_dr1[6], detp_dr0[6], detp_dr1[6];
2432  for(int iP=0; iP<6; iP++)
2433  {
2434  a1_dr0[iP] = ldrp2_dr0[iP]*lp1p2 + ldrp2*lp1p2_dr0[iP] - ldrp1_dr0[iP]*p22;
2435  a1_dr1[iP] = ldrp2_dr1[iP]*lp1p2 + ldrp2*lp1p2_dr1[iP] - ldrp1_dr1[iP]*p22 - ldrp1*p22_dr1[iP];
2436  a2_dr0[iP] = ldrp2_dr0[iP]*p12 + ldrp2*p12_dr0[iP] - ldrp1_dr0[iP]*lp1p2 - ldrp1*lp1p2_dr0[iP];
2437  a2_dr1[iP] = ldrp2_dr1[iP]*p12 - ldrp1_dr1[iP]*lp1p2 - ldrp1*lp1p2_dr1[iP];
2438  detp_dr0[iP] = 2.f*lp1p2*lp1p2_dr0[iP] - p12_dr0[iP]*p22;
2439  detp_dr1[iP] = 2.f*lp1p2*lp1p2_dr1[iP] - p12*p22_dr1[iP];
2440 
2441  dsdr[0][iP] += a1_dr0[iP]/detp - a1*detp_dr0[iP]/(detp*detp);
2442  dsdr[1][iP] += a1_dr1[iP]/detp - a1*detp_dr1[iP]/(detp*detp);
2443  dsdr[2][iP] += a2_dr0[iP]/detp - a2*detp_dr0[iP]/(detp*detp);
2444  dsdr[3][iP] += a2_dr1[iP]/detp - a2*detp_dr1[iP]/(detp*detp);
2445  }
2446 
2447  dS[0] += (ldrp2*lp1p2 - ldrp1*p22) /detp;
2448  dS[1] += (ldrp2*p12 - ldrp1*lp1p2)/detp;
2449  }
2450 }
2451 
2453  float_v dS[2], const float_v* param1, const float_v* param2 ) const
2454 {
2470  if(!param1)
2471  {
2472  param1 = fP;
2473  param2 = p.fP;
2474  }
2475 
2476  //* Get dS to another particle for Bz field
2477  const float_v kOvSqr6 = 1.f/sqrt(float_v(6.f));
2478  const float_v kCLight = 0.000299792458f;
2479 
2480  //in XY plane
2481  //first root
2482  const float_v& bq1 = B*simd_cast<float_v>(fQ)*kCLight;
2483  const float_v& bq2 = B*simd_cast<float_v>(p.fQ)*kCLight;
2484 
2485  const float_m& isStraight1 = abs(bq1) < float_v(1.e-8f);
2486  const float_m& isStraight2 = abs(bq2) < float_v(1.e-8f);
2487 
2488  if( isStraight1.isFull() && isStraight2.isFull() )
2489  {
2490  GetDStoParticleLine(p, dS);
2491  return;
2492  }
2493 
2494  const float_v& px1 = param1[3];
2495  const float_v& py1 = param1[4];
2496  const float_v& pz1 = param1[5];
2497 
2498  const float_v& px2 = param2[3];
2499  const float_v& py2 = param2[4];
2500  const float_v& pz2 = param2[5];
2501 
2502  const float_v& pt12 = px1*px1 + py1*py1;
2503  const float_v& pt22 = px2*px2 + py2*py2;
2504 
2505  const float_v& x01 = param1[0];
2506  const float_v& y01 = param1[1];
2507  const float_v& z01 = param1[2];
2508 
2509  const float_v& x02 = param2[0];
2510  const float_v& y02 = param2[1];
2511  const float_v& z02 = param2[2];
2512 
2513  float_v dS1[2] = {0.f, 0.f}, dS2[2]={0.f, 0.f};
2514 
2515  const float_v& dx0 = (x01 - x02);
2516  const float_v& dy0 = (y01 - y02);
2517  const float_v& dr02 = dx0*dx0 + dy0*dy0;
2518  const float_v& drp1 = dx0*px1 + dy0*py1;
2519  const float_v& dxyp1 = dx0*py1 - dy0*px1;
2520  const float_v& drp2 = dx0*px2 + dy0*py2;
2521  const float_v& dxyp2 = dx0*py2 - dy0*px2;
2522  const float_v& p1p2 = px1*px2 + py1*py2;
2523  const float_v& dp1p2 = px1*py2 - px2*py1;
2524 
2525  const float_v& k11 = (bq2*drp1 - dp1p2);
2526  const float_v& k21 = (bq1*(bq2*dxyp1 - p1p2) + bq2*pt12);
2527  const float_v& k12 = ((bq1*drp2 - dp1p2));
2528  const float_v& k22 = (bq2*(bq1*dxyp2 + p1p2) - bq1*pt22);
2529 
2530  const float_v& kp = (dxyp1*bq2 - dxyp2*bq1 - p1p2);
2531  const float_v& kd = dr02/2.f*bq1*bq2 + kp;
2532  const float_v& c1 = -(bq1*kd + pt12*bq2);
2533  const float_v& c2 = bq2*kd + pt22*bq1;
2534 
2535  float_v d1 = pt12*pt22 - kd*kd;
2536  d1(d1 < float_v(Vc::Zero)) = float_v(Vc::Zero);
2537  d1 = sqrt( d1 );
2538  float_v d2 = pt12*pt22 - kd*kd;
2539  d2(d2 < float_v(Vc::Zero)) = float_v(Vc::Zero);
2540  d2 = sqrt( d2 );
2541 
2542  // find two points of closest approach in XY plane
2543  if( ! ( (!isStraight1).isEmpty() ) )
2544  {
2545  dS1[0](!isStraight1) = KFPMath::ATan2( (bq1*k11*c1 + k21*d1*bq1), (bq1*k11*d1*bq1 - k21*c1) )/bq1;
2546  dS1[1](!isStraight1) = KFPMath::ATan2( (bq1*k11*c1 - k21*d1*bq1), (-bq1*k11*d1*bq1 - k21*c1) )/bq1;
2547  }
2548  if( ! ( (!isStraight2).isEmpty() ) )
2549  {
2550  dS2[0](!isStraight2) = KFPMath::ATan2( (bq2*k12*c2 + k22*d2*bq2), (bq2*k12*d2*bq2 - k22*c2) )/bq2;
2551  dS2[1](!isStraight2) = KFPMath::ATan2( (bq2*k12*c2 - k22*d2*bq2), (-bq2*k12*d2*bq2 - k22*c2) )/bq2;
2552  }
2553  if( ! ( isStraight1.isEmpty() ) )
2554  {
2555  dS1[0](isStraight1 && (pt12>float_v(Vc::Zero)) ) = (k11*c1 + k21*d1)/(- k21*c1);
2556  dS1[1](isStraight1 && (pt12>float_v(Vc::Zero)) ) = (k11*c1 - k21*d1)/(- k21*c1);
2557  }
2558  if( ! ( isStraight2.isEmpty() ) )
2559  {
2560  dS2[0](isStraight2 && (pt22>float_v(Vc::Zero)) ) = (k12*c2 + k22*d2)/(- k22*c2);
2561  dS2[1](isStraight2 && (pt22>float_v(Vc::Zero)) ) = (k12*c2 - k22*d2)/(- k22*c2);
2562  }
2563 
2564  //select a point which is close to the primary vertex (with the smallest r)
2565 
2566  float_v dr2[2];
2567  for(int iP = 0; iP<2; iP++)
2568  {
2569  const float_v& bs1 = bq1*dS1[iP];
2570  const float_v& bs2 = bq2*dS2[iP];
2571  float_v sss = KFPMath::Sin(bs1), ccc = KFPMath::Cos(bs1);
2572 
2573  const float_m& bs1Big = abs(bs1) > 1.e-8f;
2574  const float_m& bs2Big = abs(bs2) > 1.e-8f;
2575 
2576  float_v sB(Vc::Zero), cB(Vc::Zero);
2577  sB(bs1Big) = sss/bq1;
2578  sB(!bs1Big) = ((1.f-bs1*kOvSqr6)*(1.f+bs1*kOvSqr6)*dS1[iP]);
2579  cB(bs1Big) = (1.f-ccc)/bq1;
2580  cB(!bs1Big) = .5f*sB*bs1;
2581 
2582  const float_v& x1 = param1[0] + sB*px1 + cB*py1;
2583  const float_v& y1 = param1[1] - cB*px1 + sB*py1;
2584  const float_v& z1 = param1[2] + dS1[iP]*param1[5];
2585 
2586  sss = KFPMath::Sin(bs2); ccc = KFPMath::Cos(bs2);
2587 
2588  sB(bs2Big) = sss/bq2;
2589  sB(!bs2Big) = ((1.f-bs2*kOvSqr6)*(1.f+bs2*kOvSqr6)*dS2[iP]);
2590  cB(bs2Big) = (1.f-ccc)/bq2;
2591  cB(!bs2Big) = .5f*sB*bs2;
2592 
2593  const float_v& x2 = param2[0] + sB*px2 + cB*py2;
2594  const float_v& y2 = param2[1] - cB*px2 + sB*py2;
2595  const float_v& z2 = param2[2] + dS2[iP]*param2[5];
2596 
2597  float_v dx = (x1-x2);
2598  float_v dy = (y1-y2);
2599  float_v dz = (z1-z2);
2600 
2601  dr2[iP] = dx*dx + dy*dy + dz*dz;
2602  }
2603 
2604 
2605  const float_m isFirstRoot = (dr2[0] < dr2[1]);
2606 
2607  // if(!(isFirstRoot.isEmpty()))
2608  {
2609  dS[0](isFirstRoot) = dS1[0];
2610  dS[1](isFirstRoot) = dS2[0];
2611  }
2612  // if( !( (!isFirstRoot).isEmpty() ) )
2613  {
2614  dS[0](!isFirstRoot) = dS1[1];
2615  dS[1](!isFirstRoot) = dS2[1];
2616  }
2617 
2618  //find correct parts of helices
2619  int_v n1(Vc::Zero);
2620  int_v n2(Vc::Zero);
2621 // float_v dzMin = abs( (z01-z02) + dS[0]*pz1 - dS[1]*pz2 );
2622  const float_v pi2(6.283185307f);
2623 
2624 // //TODO optimise for loops for neutral particles
2625 // const float_v& i1Float = -bq1/pi2*(z01/pz1+dS[0]);
2626 // for(int di1=-1; di1<=1; di1++)
2627 // {
2628 // int_v i1(Vc::Zero);
2629 // i1(int_m(!isStraight1)) = int_v(i1Float) + di1;
2630 //
2631 // const float_v& i2Float = ( ((z01-z02) + (dS[0]+pi2*i1/bq1)*pz1)/pz2 - dS[1]) * bq2/pi2;
2632 // for(int di2 = -1; di2<=1; di2++)
2633 // {
2634 // int_v i2(Vc::Zero);
2635 // i2(int_m(!isStraight2)) = int_v(i2Float) + di2;
2636 //
2637 // const float_v& z1 = z01 + (dS[0]+pi2*i1/bq1)*pz1;
2638 // const float_v& z2 = z02 + (dS[1]+pi2*i2/bq2)*pz2;
2639 // const float_v& dz = abs( z1-z2 );
2640 //
2641 // n1(int_m(dz < dzMin)) = i1;
2642 // n2(int_m(dz < dzMin)) = i2;
2643 // dzMin(dz < dzMin) = dz;
2644 // }
2645 // }
2646 //
2647 // dS[0](!isStraight1) += float_v(n1)*pi2/bq1;
2648 // dS[1](!isStraight2) += float_v(n2)*pi2/bq2;
2649 
2650  //Line correction
2651  {
2652  const float_v& bs1 = bq1*dS[0];
2653  const float_v& bs2 = bq2*dS[1];
2654  float_v sss = KFPMath::Sin(bs1), ccc = KFPMath::Cos(bs1);
2655 
2656  const float_m& bs1Big = abs(bs1) > 1.e-8f;
2657  const float_m& bs2Big = abs(bs2) > 1.e-8f;
2658 
2659  float_v sB(0.f), cB(0.f);
2660  sB(bs1Big) = sss/bq1;
2661  cB(bs1Big) = (1.f-ccc)/bq1;
2662  sB(!bs1Big) = ((1.f-bs1*kOvSqr6)*(1.f+bs1*kOvSqr6)*dS[0]);
2663  cB(!bs1Big) = .5f*sB*bs1;
2664 
2665  const float_v& x1 = x01 + sB*px1 + cB*py1;
2666  const float_v& y1 = y01 - cB*px1 + sB*py1;
2667  const float_v& z1 = z01 + dS[0]*pz1;
2668  const float_v& ppx1 = ccc*px1 + sss*py1;
2669  const float_v& ppy1 = -sss*px1 + ccc*py1;
2670  const float_v& ppz1 = pz1;
2671 
2672  float_v sss1 = KFPMath::Sin(bs2), ccc1 = KFPMath::Cos(bs2);
2673 
2674  float_v sB1(0.f), cB1(0.f);
2675  sB1(bs2Big) = sss1/bq2;
2676  cB1(bs2Big) = (1.f-ccc1)/bq2;
2677  sB1(!bs2Big) = ((1.f-bs2*kOvSqr6)*(1.f+bs2*kOvSqr6)*dS[1]);
2678  cB1(!bs2Big) = .5f*sB1*bs2;
2679 
2680  const float_v& x2 = x02 + sB1*px2 + cB1*py2;
2681  const float_v& y2 = y02 - cB1*px2 + sB1*py2;
2682  const float_v& z2 = z02 + dS[1]*pz2;
2683  const float_v& ppx2 = ccc1*px2 + sss1*py2;
2684  const float_v& ppy2 = -sss1*px2 + ccc1*py2;
2685  const float_v& ppz2 = pz2;
2686 
2687  const float_v& p12 = ppx1*ppx1 + ppy1*ppy1 + ppz1*ppz1;
2688  const float_v& p22 = ppx2*ppx2 + ppy2*ppy2 + ppz2*ppz2;
2689  const float_v& lp1p2 = ppx1*ppx2 + ppy1*ppy2 + ppz1*ppz2;
2690 
2691  const float_v& dx = (x2 - x1);
2692  const float_v& dy = (y2 - y1);
2693  const float_v& dz = (z2 - z1);
2694 
2695  const float_v& ldrp1 = ppx1*dx + ppy1*dy + ppz1*dz;
2696  const float_v& ldrp2 = ppx2*dx + ppy2*dy + ppz2*dz;
2697 
2698  float_v detp = lp1p2*lp1p2 - p12*p22;
2699  detp(abs(detp)<1.e-4f) = 1; //TODO correct!!!
2700 
2701  dS[0] += (ldrp2*lp1p2 - ldrp1*p22) /detp;
2702  dS[1] += (ldrp2*p12 - ldrp1*lp1p2)/detp;
2703  }
2704 }
2705 
2706 void KFParticleBaseSIMD::GetDStoParticleBy( float_v B, const KFParticleBaseSIMD &p, float_v dS[2], float_v dsdr[4][6] ) const
2707 {
2729  const float_v param1[6] = { fP[0], -fP[2], fP[1], fP[3], -fP[5], fP[4] };
2730  const float_v param2[6] = { p.fP[0], -p.fP[2], p.fP[1], p.fP[3], -p.fP[5], p.fP[4] };
2731 
2732  float_v dsdrBz[4][6];
2733  for(int i1=0; i1<4; i1++)
2734  for(int i2=0; i2<6; i2++)
2735  dsdrBz[i1][i2] = 0.f;
2736 
2737  GetDStoParticleBz(B, p, dS, dsdrBz, param1, param2);
2738 
2739  for(int iDs=0; iDs<4; iDs++)
2740  {
2741  dsdr[iDs][0] = dsdrBz[iDs][0];
2742  dsdr[iDs][1] = dsdrBz[iDs][2];
2743  dsdr[iDs][2] = -dsdrBz[iDs][1];
2744  dsdr[iDs][3] = dsdrBz[iDs][3];
2745  dsdr[iDs][4] = dsdrBz[iDs][5];
2746  dsdr[iDs][5] = -dsdrBz[iDs][4];
2747  }
2748 }
2749 
2750 void KFParticleBaseSIMD::GetDStoParticleBy( float_v B, const KFParticleBaseSIMD &p, float_v dS[2] ) const
2751 {
2764  const float_v param1[6] = { fP[0], -fP[2], fP[1], fP[3], -fP[5], fP[4] };
2765  const float_v param2[6] = { p.fP[0], -p.fP[2], p.fP[1], p.fP[3], -p.fP[5], p.fP[4] };
2766 
2767  GetDStoParticleBz(B, p, dS, param1, param2);
2768 }
2769 
2770 void KFParticleBaseSIMD::GetDStoParticleB( float_v B[3], const KFParticleBaseSIMD &p, float_v dS[2], float_v dsdr[4][6] ) const
2771 {
2793  const float_v& Bx = B[0];
2794  const float_v& By = B[1];
2795  const float_v& Bz = B[2];
2796 
2797  const float_v& Bxz = sqrt(Bx*Bx + Bz*Bz);
2798  const float_v& Br = sqrt(Bx*Bx + By*By + Bz*Bz);
2799 
2800  float_v cosA = 1.f;
2801  float_v sinA = 0.f;
2802 
2803  cosA( abs(Bxz) > 1.e-8f ) = Bz/Bxz;
2804  sinA( abs(Bxz) > 1.e-8f ) = Bx/Bxz;
2805 
2806  const float_v& sinP = By/Br;
2807  const float_v& cosP = Bxz/Br;
2808 
2809 
2810  const float_v param1[6] = { cosA*fP[0] - sinA*fP[2],
2811  -sinA*sinP*fP[0] + cosP*fP[1] - cosA*sinP*fP[2],
2812  cosP*sinA*fP[0] + sinP*fP[1] + cosA*cosP*fP[2],
2813  cosA*fP[3] - sinA*fP[5],
2814  -sinA*sinP*fP[3] + cosP*fP[4] - cosA*sinP*fP[5],
2815  cosP*sinA*fP[3] + sinP*fP[4] + cosA*cosP*fP[5]};
2816  const float_v param2[6] = { cosA*p.fP[0] - sinA*p.fP[2],
2817  -sinA*sinP*p.fP[0] + cosP*p.fP[1] - cosA*sinP*p.fP[2],
2818  cosP*sinA*p.fP[0] + sinP*p.fP[1] + cosA*cosP*p.fP[2],
2819  cosA*p.fP[3] - sinA*p.fP[5],
2820  -sinA*sinP*p.fP[3] + cosP*p.fP[4] - cosA*sinP*p.fP[5],
2821  cosP*sinA*p.fP[3] + sinP*p.fP[4] + cosA*cosP*p.fP[5]};
2822 
2823  float_v dsdrBz[4][6];
2824  for(int i1=0; i1<4; i1++)
2825  for(int i2=0; i2<6; i2++)
2826  dsdrBz[i1][i2] = 0.f;
2827 
2828  GetDStoParticleBz(Br, p, dS, dsdrBz, param1, param2);
2829 
2830  for(int iDs=0; iDs<4; iDs++)
2831  {
2832  dsdr[iDs][0] = dsdrBz[iDs][0]*cosA - dsdrBz[iDs][1]*sinA*sinP + dsdrBz[iDs][2]*sinA*cosP;
2833  dsdr[iDs][1] = dsdrBz[iDs][1]*cosP + dsdrBz[iDs][2]*sinP;
2834  dsdr[iDs][2] = -dsdrBz[iDs][0]*sinA - dsdrBz[iDs][1]*cosA*sinP + dsdrBz[iDs][2]*cosA*cosP;
2835  dsdr[iDs][3] = dsdrBz[iDs][3]*cosA - dsdrBz[iDs][4]*sinA*sinP + dsdrBz[iDs][5]*sinA*cosP;
2836  dsdr[iDs][4] = dsdrBz[iDs][4]*cosP + dsdrBz[iDs][5]*sinP;
2837  dsdr[iDs][5] = -dsdrBz[iDs][3]*sinA - dsdrBz[iDs][4]*cosA*sinP + dsdrBz[iDs][5]*cosA*cosP;
2838  }
2839 }
2840 
2841 void KFParticleBaseSIMD::GetDStoParticleB( float_v B[3], const KFParticleBaseSIMD &p, float_v dS[2] ) const
2842 {
2855  const float_v& Bx = B[0];
2856  const float_v& By = B[1];
2857  const float_v& Bz = B[2];
2858 
2859  const float_v& Bxz = sqrt(Bx*Bx + Bz*Bz);
2860  const float_v& Br = sqrt(Bx*Bx + By*By + Bz*Bz);
2861 
2862  float_v cosA = 1.f;
2863  float_v sinA = 0.f;
2864 
2865  cosA( abs(Bxz) > 1.e-8f ) = Bz/Bxz;
2866  sinA( abs(Bxz) > 1.e-8f ) = Bx/Bxz;
2867 
2868  const float_v& sinP = By/Br;
2869  const float_v& cosP = Bxz/Br;
2870 
2871 
2872  const float_v param1[6] = { cosA*fP[0] - sinA*fP[2],
2873  -sinA*sinP*fP[0] + cosP*fP[1] - cosA*sinP*fP[2],
2874  cosP*sinA*fP[0] + sinP*fP[1] + cosA*cosP*fP[2],
2875  cosA*fP[3] - sinA*fP[5],
2876  -sinA*sinP*fP[3] + cosP*fP[4] - cosA*sinP*fP[5],
2877  cosP*sinA*fP[3] + sinP*fP[4] + cosA*cosP*fP[5]};
2878  const float_v param2[6] = { cosA*p.fP[0] - sinA*p.fP[2],
2879  -sinA*sinP*p.fP[0] + cosP*p.fP[1] - cosA*sinP*p.fP[2],
2880  cosP*sinA*p.fP[0] + sinP*p.fP[1] + cosA*cosP*p.fP[2],
2881  cosA*p.fP[3] - sinA*p.fP[5],
2882  -sinA*sinP*p.fP[3] + cosP*p.fP[4] - cosA*sinP*p.fP[5],
2883  cosP*sinA*p.fP[3] + sinP*p.fP[4] + cosA*cosP*p.fP[5]};
2884 
2885  GetDStoParticleBz(Br, p, dS, param1, param2);
2886 }
2887 
2888 void KFParticleBaseSIMD::GetDStoParticleLine( const KFParticleBaseSIMD &p, float_v dS[2], float_v dsdr[4][6] ) const
2889 {
2907  float_v p12 = fP[3]*fP[3] + fP[4]*fP[4] + fP[5]*fP[5];
2908  float_v p22 = p.fP[3]*p.fP[3] + p.fP[4]*p.fP[4] + p.fP[5]*p.fP[5];
2909  float_v p1p2 = fP[3]*p.fP[3] + fP[4]*p.fP[4] + fP[5]*p.fP[5];
2910 
2911  float_v drp1 = fP[3]*(p.fP[0]-fP[0]) + fP[4]*(p.fP[1]-fP[1]) + fP[5]*(p.fP[2]-fP[2]);
2912  float_v drp2 = p.fP[3]*(p.fP[0]-fP[0]) + p.fP[4]*(p.fP[1]-fP[1]) + p.fP[5]*(p.fP[2]-fP[2]);
2913 
2914  float_v detp = p1p2*p1p2 - p12*p22;
2915  detp( abs(detp)<float_v(1.e-4f) ) = float_v(1.f); //TODO correct!!!
2916 
2917  dS[0] = (drp2*p1p2 - drp1*p22) /detp;
2918  dS[1] = (drp2*p12 - drp1*p1p2)/detp;
2919 
2920  const float_v x01 = fP[0];
2921  const float_v y01 = fP[1];
2922  const float_v z01 = fP[2];
2923  const float_v px1 = fP[3];
2924  const float_v py1 = fP[4];
2925  const float_v pz1 = fP[5];
2926 
2927  const float_v x02 = p.fP[0];
2928  const float_v y02 = p.fP[1];
2929  const float_v z02 = p.fP[2];
2930  const float_v px2 = p.fP[3];
2931  const float_v py2 = p.fP[4];
2932  const float_v pz2 = p.fP[5];
2933 
2934  const float_v drp1_dr1[6] = {-px1, -py1, -pz1, -x01 + x02, -y01 + y02, -z01 + z02};
2935  const float_v drp1_dr2[6] = {px1, py1, pz1, 0.f, 0.f, 0.f};
2936  const float_v drp2_dr1[6] = {-px2, -py2, -pz2, 0.f, 0.f, 0.f};
2937  const float_v drp2_dr2[6] = {px2, py2, pz2, -x01 + x02, -y01 + y02, -z01 + z02};
2938  const float_v dp1p2_dr1[6] = {0.f, 0.f, 0.f, px2, py2, pz2};
2939  const float_v dp1p2_dr2[6] = {0.f, 0.f, 0.f, px1, py1, pz1};
2940  const float_v dp12_dr1[6] = {0.f, 0.f, 0.f, 2.f*px1, 2.f*py1, 2.f*pz1};
2941  const float_v dp12_dr2[6] = {0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
2942  const float_v dp22_dr1[6] = {0.f, 0.f, 0.f, 0.f, 0.f, 0.f};
2943  const float_v dp22_dr2[6] = {0.f, 0.f, 0.f, 2.f*px2, 2.f*py2, 2.f*pz2};
2944  const float_v ddetp_dr1[6] = {0.f, 0.f, 0.f, -2.f*p22*px1 + 2.f*p1p2*px2, -2.f*p22*py1 + 2.f*p1p2*py2, -2.f*p22*pz1 + 2.f*p1p2*pz2};
2945  const float_v ddetp_dr2[6] = {0.f, 0.f, 0.f, 2.f*p1p2*px1 - 2.f*p12*px2, 2.f*p1p2*py1 - 2.f*p12*py2, 2.f*p1p2*pz1 - 2.f*p12*pz2};
2946 
2947 
2948  float_v da1_dr1[6], da1_dr2[6], da2_dr1[6], da2_dr2[6];
2949 
2950  const float_v a1 = drp2*p1p2 - drp1*p22;
2951  const float_v a2 = drp2*p12 - drp1*p1p2;
2952  for(int i=0; i<6; i++)
2953  {
2954  da1_dr1[i] = drp2_dr1[i]*p1p2 + drp2*dp1p2_dr1[i] - drp1_dr1[i]*p22 - drp1*dp22_dr1[i];
2955  da1_dr2[i] = drp2_dr2[i]*p1p2 + drp2*dp1p2_dr2[i] - drp1_dr2[i]*p22 - drp1*dp22_dr2[i];
2956 
2957  da2_dr1[i] = drp2_dr1[i]*p12 + drp2*dp12_dr1[i] - drp1_dr1[i]*p1p2 - drp1*dp1p2_dr1[i];
2958  da2_dr2[i] = drp2_dr2[i]*p12 + drp2*dp12_dr2[i] - drp1_dr2[i]*p1p2 - drp1*dp1p2_dr2[i];
2959 
2960  dsdr[0][i] = da1_dr1[i]/detp - a1/(detp*detp)*ddetp_dr1[i];
2961  dsdr[1][i] = da1_dr2[i]/detp - a1/(detp*detp)*ddetp_dr2[i];
2962 
2963  dsdr[2][i] = da2_dr1[i]/detp - a2/(detp*detp)*ddetp_dr1[i];
2964  dsdr[3][i] = da2_dr2[i]/detp - a2/(detp*detp)*ddetp_dr2[i];
2965  }
2966 }
2967 
2969 {
2979  float_v p12 = fP[3]*fP[3] + fP[4]*fP[4] + fP[5]*fP[5];
2980  float_v p22 = p.fP[3]*p.fP[3] + p.fP[4]*p.fP[4] + p.fP[5]*p.fP[5];
2981  float_v p1p2 = fP[3]*p.fP[3] + fP[4]*p.fP[4] + fP[5]*p.fP[5];
2982 
2983  float_v drp1 = fP[3]*(p.fP[0]-fP[0]) + fP[4]*(p.fP[1]-fP[1]) + fP[5]*(p.fP[2]-fP[2]);
2984  float_v drp2 = p.fP[3]*(p.fP[0]-fP[0]) + p.fP[4]*(p.fP[1]-fP[1]) + p.fP[5]*(p.fP[2]-fP[2]);
2985 
2986  float_v detp = p1p2*p1p2 - p12*p22;
2987  detp( abs(detp)<float_v(1.e-4f) ) = float_v(1.f); //TODO correct!!!
2988 
2989  dS[0] = (drp2*p1p2 - drp1*p22) /detp;
2990  dS[1] = (drp2*p12 - drp1*p1p2)/detp;
2991 }
2992 
2993 void KFParticleBaseSIMD::GetDStoParticleCBM( const KFParticleBaseSIMD &p, float_v dS[2], float_v dsdr[4][6] ) const
2994 {
3016  float_v fld[3];
3017  GetFieldValue( fP, fld );
3018 
3019  const float_v& bq1 = fld[1]*simd_cast<float_v>(fQ);
3020  const float_v& bq2 = fld[1]*simd_cast<float_v>(p.fQ);
3021  const float_m& isStraight1 = abs(bq1) < float_v(1.e-8f);
3022  const float_m& isStraight2 = abs(bq2) < float_v(1.e-8f);
3023 
3024  if( isStraight1.isFull() && isStraight2.isFull() )
3025  GetDStoParticleLine(p, dS, dsdr);
3026  else
3027  GetDStoParticleBy(fld[1], p, dS, dsdr);
3028 }
3029 
3030 void KFParticleBaseSIMD::GetDStoParticleCBM( const KFParticleBaseSIMD &p, float_v dS[2] ) const
3031 {
3045  float_v fld[3];
3046  GetFieldValue( fP, fld );
3047 
3048  const float_v& bq1 = fld[1]*simd_cast<float_v>(fQ);
3049  const float_v& bq2 = fld[1]*simd_cast<float_v>(p.fQ);
3050  const float_m& isStraight1 = abs(bq1) < float_v(1.e-8f);
3051  const float_m& isStraight2 = abs(bq2) < float_v(1.e-8f);
3052 
3053  if( isStraight1.isFull() && isStraight2.isFull() )
3054  GetDStoParticleLine(p, dS);
3055  else
3056  GetDStoParticleBy(fld[1], p, dS);
3057 }
3058 
3059 void KFParticleBaseSIMD::TransportCBM( float_v dS, const float_v* dsdr, float_v P[], float_v C[], float_v* dsdr1, float_v* F, float_v* F1 ) const
3060 {
3082  if( (fQ == int_v(Vc::Zero)).isFull() ){
3083  TransportLine( dS, dsdr, P, C, dsdr1, F, F1 );
3084  return;
3085  }
3086 
3087  const float_v kCLight = 0.000299792458f;
3088 
3089  float_v c = simd_cast<float_v>(fQ)*kCLight;
3090 
3091  // construct coefficients
3092 
3093  float_v
3094  px = fP[3],
3095  py = fP[4],
3096  pz = fP[5];
3097 
3098  float_v sx=0.f, sy=0.f, sz=0.f, syy=0.f, syz=0.f, syyy=0.f, ssx=0.f, ssy=0.f, ssz=0.f, ssyy=0.f, ssyz=0.f, ssyyy=0.f;
3099 
3100  { // get field integrals
3101 
3102  float_v fld[3][3];
3103  float_v p0[3], p1[3], p2[3];
3104 
3105  // line track approximation
3106 
3107  p0[0] = fP[0];
3108  p0[1] = fP[1];
3109  p0[2] = fP[2];
3110 
3111  p2[0] = fP[0] + px*dS;
3112  p2[1] = fP[1] + py*dS;
3113  p2[2] = fP[2] + pz*dS;
3114 
3115  p1[0] = 0.5f*(p0[0]+p2[0]);
3116  p1[1] = 0.5f*(p0[1]+p2[1]);
3117  p1[2] = 0.5f*(p0[2]+p2[2]);
3118 
3119  // first order track approximation
3120  {
3121  GetFieldValue( p0, fld[0] );
3122  GetFieldValue( p1, fld[1] );
3123  GetFieldValue( p2, fld[2] );
3124 
3125  float_v ssy1 = ( 7.f*fld[0][1] + 6.f*fld[1][1]-fld[2][1] )*c*dS*dS/96.f;
3126  float_v ssy2 = ( fld[0][1] + 2.f*fld[1][1] )*c*dS*dS/6.f;
3127 
3128  p1[0] -= ssy1*pz;
3129  p1[2] += ssy1*px;
3130  p2[0] -= ssy2*pz;
3131  p2[2] += ssy2*px;
3132  }
3133 
3134  GetFieldValue( p0, fld[0] );
3135  GetFieldValue( p1, fld[1] );
3136  GetFieldValue( p2, fld[2] );
3137 
3138  for(int iF1=0; iF1<3; iF1++)
3139  for(int iF2=0; iF2<3; iF2++)
3140  fld[iF1][iF2](abs(fld[iF1][iF2]) > float_v(100.f)) = 0.f;
3141 
3142  sx = c*( fld[0][0] + 4*fld[1][0] + fld[2][0] )*dS/6.f;
3143  sy = c*( fld[0][1] + 4*fld[1][1] + fld[2][1] )*dS/6.f;
3144  sz = c*( fld[0][2] + 4*fld[1][2] + fld[2][2] )*dS/6.f;
3145 
3146  ssx = c*( fld[0][0] + 2*fld[1][0])*dS*dS/6.f;
3147  ssy = c*( fld[0][1] + 2*fld[1][1])*dS*dS/6.f;
3148  ssz = c*( fld[0][2] + 2*fld[1][2])*dS*dS/6.f;
3149 
3150  float_v c2[3][3] = { { 5.f, -4.f, -1.f},{ 44.f, 80.f, -4.f},{ 11.f, 44.f, 5.f} }; // /=360.
3151  float_v cc2[3][3] = { { 38.f, 8.f, -4.f},{ 148.f, 208.f, -20.f},{ 3.f, 36.f, 3.f} }; // /=2520.
3152  for(Int_t n=0; n<3; n++)
3153  for(Int_t m=0; m<3; m++)
3154  {
3155  syz += c2[n][m]*fld[n][1]*fld[m][2];
3156  ssyz += cc2[n][m]*fld[n][1]*fld[m][2];
3157  }
3158 
3159  syz *= c*c*dS*dS/360.f;
3160  ssyz *= c*c*dS*dS*dS/2520.f;
3161 
3162  syy = c*( fld[0][1] + 4.f*fld[1][1] + fld[2][1] )*dS;
3163  syyy = syy*syy*syy / 1296.f;
3164  syy = syy*syy/72.f;
3165 
3166  ssyy = ( fld[0][1]*( 38.f*fld[0][1] + 156.f*fld[1][1] - fld[2][1] )+
3167  fld[1][1]*( 208.f*fld[1][1] +16.f*fld[2][1] )+
3168  fld[2][1]*( 3.f*fld[2][1] )
3169  )*dS*dS*dS*c*c/2520.f;
3170  ssyyy =
3171  (
3172  fld[0][1]*( fld[0][1]*( 85.f*fld[0][1] + 526.f*fld[1][1] - 7.f*fld[2][1] )+
3173  fld[1][1]*( 1376.f*fld[1][1] +84.f*fld[2][1] )+
3174  fld[2][1]*( 19.f*fld[2][1] ) )+
3175  fld[1][1]*( fld[1][1]*( 1376.f*fld[1][1] +256.f*fld[2][1] )+
3176  fld[2][1]*( 62.f*fld[2][1] ) )+
3177  fld[2][1]*fld[2][1] *( 3.f*fld[2][1] )
3178  )*dS*dS*dS*dS*c*c*c/90720.f;
3179 
3180  }
3181 
3182 // float_v mJ[11];
3183 //
3184 // mJ[0]=dS-ssyy; mJ[1]=ssx; mJ[2]=ssyyy-ssy;
3185 // mJ[3]=-ssz; mJ[4]=dS; mJ[5]=ssx+ssyz;
3186 //
3187 // mJ[6]=1-syy; mJ[7]=sx; mJ[8]=syyy-sy;
3188 // mJ[9]=-sz; mJ[10]=sx+syz;
3189 //
3190 //
3191 //
3192 // P[0] = fP[0] + mJ[0]*px + mJ[1]*py + mJ[2]*pz;
3193 // P[1] = fP[1] + mJ[3]*px + mJ[4]*py + mJ[5]*pz;
3194 // P[2] = fP[2] - mJ[2]*px - mJ[1]*py + mJ[0]*pz;
3195 // P[3] = mJ[6]*px + mJ[7]*py + mJ[8]*pz;
3196 // P[4] = mJ[9]*px + py + mJ[10]*pz;
3197 // P[5] = -mJ[8]*px - mJ[7]*py + mJ[6]*pz;
3198 // P[6] = fP[6];
3199 // P[7] = fP[7];
3200 
3201 // if(C!=fC)
3202 // {
3203 // for(int iC=0; iC<36; iC++)
3204 // C[iC] = fC[iC];
3205 // }
3206 //
3207 // multQSQt1( mJ, C);
3208 
3209  float_v mJ[8][8];
3210  for( Int_t i=0; i<8; i++ ) for( Int_t j=0; j<8; j++) mJ[i][j]=0;
3211 
3212  mJ[0][0]=1; mJ[0][1]=0; mJ[0][2]=0; mJ[0][3]=dS-ssyy; mJ[0][4]=ssx; mJ[0][5]=ssyyy-ssy;
3213  mJ[1][0]=0; mJ[1][1]=1; mJ[1][2]=0; mJ[1][3]=-ssz; mJ[1][4]=dS; mJ[1][5]=ssx+ssyz;
3214  mJ[2][0]=0; mJ[2][1]=0; mJ[2][2]=1; mJ[2][3]=ssy-ssyyy; mJ[2][4]=-ssx; mJ[2][5]=dS-ssyy;
3215 
3216  mJ[3][0]=0; mJ[3][1]=0; mJ[3][2]=0; mJ[3][3]=1-syy; mJ[3][4]=sx; mJ[3][5]=syyy-sy;
3217  mJ[4][0]=0; mJ[4][1]=0; mJ[4][2]=0; mJ[4][3]=-sz; mJ[4][4]=1; mJ[4][5]=sx+syz;
3218  mJ[5][0]=0; mJ[5][1]=0; mJ[5][2]=0; mJ[5][3]=sy-syyy; mJ[5][4]=-sx; mJ[5][5]=1-syy;
3219  mJ[6][6] = mJ[7][7] = 1;
3220 
3221  P[0] = fP[0] + mJ[0][3]*px + mJ[0][4]*py + mJ[0][5]*pz;
3222  P[1] = fP[1] + mJ[1][3]*px + mJ[1][4]*py + mJ[1][5]*pz;
3223  P[2] = fP[2] + mJ[2][3]*px + mJ[2][4]*py + mJ[2][5]*pz;
3224  P[3] = mJ[3][3]*px + mJ[3][4]*py + mJ[3][5]*pz;
3225  P[4] = mJ[4][3]*px + mJ[4][4]*py + mJ[4][5]*pz;
3226  P[5] = mJ[5][3]*px + mJ[5][4]*py + mJ[5][5]*pz;
3227  P[6] = fP[6];
3228  P[7] = fP[7];
3229 
3230  float_v mJds[6][6];
3231  for( Int_t i=0; i<6; i++ ) for( Int_t j=0; j<6; j++) mJds[i][j]=0;
3232 
3233  mJds[0][3]= 1.f;
3234  mJds[1][4]= 1.f;
3235  mJds[2][5]= 1.f;
3236 
3237  mJds[0][3](abs(dS)>0.f)= 1.f - 3.f*ssyy/dS; mJds[0][4](abs(dS)>0.f)= 2.f*ssx/dS; mJds[0][5](abs(dS)>0.f)= (4.f*ssyyy-2.f*ssy)/dS;
3238  mJds[1][3](abs(dS)>0.f)= -2.f*ssz/dS; mJds[1][4](abs(dS)>0.f)= 1.f; mJds[1][5](abs(dS)>0.f)= (2.f*ssx + 3.f*ssyz)/dS;
3239  mJds[2][3](abs(dS)>0.f)= (2.f*ssy-4.f*ssyyy)/dS; mJds[2][4](abs(dS)>0.f)=-2.f*ssx/dS; mJds[2][5](abs(dS)>0.f)= 1.f - 3.f*ssyy/dS;
3240 
3241  mJds[3][3](abs(dS)>0.f)= -2.f*syy/dS; mJds[3][4](abs(dS)>0.f)= sx/dS; mJds[3][5](abs(dS)>0.f)= 3.f*syyy/dS - sy/dS;
3242  mJds[4][3](abs(dS)>0.f)= -sz/dS; mJds[4][4](abs(dS)>0.f)=0.f; mJds[4][5](abs(dS)>0.f) = sx/dS + 2.f*syz/dS;
3243  mJds[5][3](abs(dS)>0.f)= sy/dS - 3.f*syyy/dS; mJds[5][4](abs(dS)>0.f)=-sx/dS; mJds[5][5](abs(dS)>0.f)= -2.f*syy/dS;
3244 
3245  for(int i1=0; i1<6; i1++)
3246  for(int i2=0; i2<6; i2++)
3247  mJ[i1][i2] += mJds[i1][3]*px*dsdr[i2] + mJds[i1][4]*py*dsdr[i2] + mJds[i1][5]*pz*dsdr[i2];
3248 
3249  MultQSQt( mJ[0], fC, C, 8);
3250 
3251  if(F)
3252  {
3253  for(int i=0; i<6; i++)
3254  for(int j=0; j<6; j++)
3255  F[i*6+j] = mJ[i][j];
3256 
3257  for(int i1=0; i1<6; i1++)
3258  for(int i2=0; i2<6; i2++)
3259  F1[i1*6 + i2] = mJds[i1][3]*px*dsdr1[i2] + mJds[i1][4]*py*dsdr1[i2] + mJds[i1][5]*pz*dsdr1[i2];
3260  }
3261 }
3262 
3263 void KFParticleBaseSIMD::TransportCBM( float_v dS, float_v P[] ) const
3264 {
3273  if( (fQ == int_v(Vc::Zero)).isFull() ){
3274  TransportLine( dS, P );
3275  return;
3276  }
3277 
3278  const float_v kCLight = 0.000299792458f;
3279 
3280  float_v c = simd_cast<float_v>(fQ)*kCLight;
3281 
3282  // construct coefficients
3283 
3284  float_v
3285  px = fP[3],
3286  py = fP[4],
3287  pz = fP[5];
3288 
3289  float_v sx=0.f, sy=0.f, sz=0.f, syy=0.f, syz=0.f, syyy=0.f, ssx=0.f, ssy=0.f, ssz=0.f, ssyy=0.f, ssyz=0.f, ssyyy=0.f;
3290 
3291  { // get field integrals
3292 
3293  float_v fld[3][3];
3294  float_v p0[3], p1[3], p2[3];
3295 
3296  // line track approximation
3297 
3298  p0[0] = fP[0];
3299  p0[1] = fP[1];
3300  p0[2] = fP[2];
3301 
3302  p2[0] = fP[0] + px*dS;
3303  p2[1] = fP[1] + py*dS;
3304  p2[2] = fP[2] + pz*dS;
3305 
3306  p1[0] = 0.5f*(p0[0]+p2[0]);
3307  p1[1] = 0.5f*(p0[1]+p2[1]);
3308  p1[2] = 0.5f*(p0[2]+p2[2]);
3309 
3310  // first order track approximation
3311  {
3312  GetFieldValue( p0, fld[0] );
3313  GetFieldValue( p1, fld[1] );
3314  GetFieldValue( p2, fld[2] );
3315 
3316  float_v ssy1 = ( 7.f*fld[0][1] + 6.f*fld[1][1]-fld[2][1] )*c*dS*dS/96.f;
3317  float_v ssy2 = ( fld[0][1] + 2.f*fld[1][1] )*c*dS*dS/6.f;
3318 
3319  p1[0] -= ssy1*pz;
3320  p1[2] += ssy1*px;
3321  p2[0] -= ssy2*pz;
3322  p2[2] += ssy2*px;
3323  }
3324 
3325  GetFieldValue( p0, fld[0] );
3326  GetFieldValue( p1, fld[1] );
3327  GetFieldValue( p2, fld[2] );
3328 
3329  for(int iF1=0; iF1<3; iF1++)
3330  for(int iF2=0; iF2<3; iF2++)
3331  fld[iF1][iF2](abs(fld[iF1][iF2]) > float_v(100.f)) = 0.f;
3332 
3333  sx = c*( fld[0][0] + 4*fld[1][0] + fld[2][0] )*dS/6.f;
3334  sy = c*( fld[0][1] + 4*fld[1][1] + fld[2][1] )*dS/6.f;
3335  sz = c*( fld[0][2] + 4*fld[1][2] + fld[2][2] )*dS/6.f;
3336 
3337  ssx = c*( fld[0][0] + 2*fld[1][0])*dS*dS/6.f;
3338  ssy = c*( fld[0][1] + 2*fld[1][1])*dS*dS/6.f;
3339  ssz = c*( fld[0][2] + 2*fld[1][2])*dS*dS/6.f;
3340 
3341  float_v c2[3][3] = { { 5.f, -4.f, -1.f},{ 44.f, 80.f, -4.f},{ 11.f, 44.f, 5.f} }; // /=360.
3342  float_v cc2[3][3] = { { 38.f, 8.f, -4.f},{ 148.f, 208.f, -20.f},{ 3.f, 36.f, 3.f} }; // /=2520.
3343  for(Int_t n=0; n<3; n++)
3344  for(Int_t m=0; m<3; m++)
3345  {
3346  syz += c2[n][m]*fld[n][1]*fld[m][2];
3347  ssyz += cc2[n][m]*fld[n][1]*fld[m][2];
3348  }
3349 
3350  syz *= c*c*dS*dS/360.f;
3351  ssyz *= c*c*dS*dS*dS/2520.f;
3352 
3353  syy = c*( fld[0][1] + 4.f*fld[1][1] + fld[2][1] )*dS;
3354  syyy = syy*syy*syy / 1296.f;
3355  syy = syy*syy/72.f;
3356 
3357  ssyy = ( fld[0][1]*( 38.f*fld[0][1] + 156.f*fld[1][1] - fld[2][1] )+
3358  fld[1][1]*( 208.f*fld[1][1] +16.f*fld[2][1] )+
3359  fld[2][1]*( 3.f*fld[2][1] )
3360  )*dS*dS*dS*c*c/2520.f;
3361  ssyyy =
3362  (
3363  fld[0][1]*( fld[0][1]*( 85.f*fld[0][1] + 526.f*fld[1][1] - 7.f*fld[2][1] )+
3364  fld[1][1]*( 1376.f*fld[1][1] +84.f*fld[2][1] )+
3365  fld[2][1]*( 19.f*fld[2][1] ) )+
3366  fld[1][1]*( fld[1][1]*( 1376.f*fld[1][1] +256.f*fld[2][1] )+
3367  fld[2][1]*( 62.f*fld[2][1] ) )+
3368  fld[2][1]*fld[2][1] *( 3.f*fld[2][1] )
3369  )*dS*dS*dS*dS*c*c*c/90720.f;
3370 
3371  }
3372 
3373  float_v mJ[8][8];
3374  for( Int_t i=0; i<8; i++ ) for( Int_t j=0; j<8; j++) mJ[i][j]=0;
3375 
3376  mJ[0][0]=1; mJ[0][1]=0; mJ[0][2]=0; mJ[0][3]=dS-ssyy; mJ[0][4]=ssx; mJ[0][5]=ssyyy-ssy;
3377  mJ[1][0]=0; mJ[1][1]=1; mJ[1][2]=0; mJ[1][3]=-ssz; mJ[1][4]=dS; mJ[1][5]=ssx+ssyz;
3378  mJ[2][0]=0; mJ[2][1]=0; mJ[2][2]=1; mJ[2][3]=ssy-ssyyy; mJ[2][4]=-ssx; mJ[2][5]=dS-ssyy;
3379 
3380  mJ[3][0]=0; mJ[3][1]=0; mJ[3][2]=0; mJ[3][3]=1-syy; mJ[3][4]=sx; mJ[3][5]=syyy-sy;
3381  mJ[4][0]=0; mJ[4][1]=0; mJ[4][2]=0; mJ[4][3]=-sz; mJ[4][4]=1; mJ[4][5]=sx+syz;
3382  mJ[5][0]=0; mJ[5][1]=0; mJ[5][2]=0; mJ[5][3]=sy-syyy; mJ[5][4]=-sx; mJ[5][5]=1-syy;
3383  mJ[6][6] = mJ[7][7] = 1;
3384 
3385  P[0] = fP[0] + mJ[0][3]*px + mJ[0][4]*py + mJ[0][5]*pz;
3386  P[1] = fP[1] + mJ[1][3]*px + mJ[1][4]*py + mJ[1][5]*pz;
3387  P[2] = fP[2] + mJ[2][3]*px + mJ[2][4]*py + mJ[2][5]*pz;
3388  P[3] = mJ[3][3]*px + mJ[3][4]*py + mJ[3][5]*pz;
3389  P[4] = mJ[4][3]*px + mJ[4][4]*py + mJ[4][5]*pz;
3390  P[5] = mJ[5][3]*px + mJ[5][4]*py + mJ[5][5]*pz;
3391  P[6] = fP[6];
3392  P[7] = fP[7];
3393 }
3394 
3395 void KFParticleBaseSIMD::TransportBz( float_v Bz, float_v dS, const float_v* dsdr, float_v P[], float_v C[], float_v* dsdr1, float_v* F, float_v* F1 ) const
3396 {
3419  const float_v kCLight = 0.000299792458f;
3420  Bz = Bz*simd_cast<float_v>(fQ)*kCLight;
3421  float_v bs= Bz*dS;
3422  float_v s = sin(bs), c = cos(bs);
3423 
3424  float_v sB(Vc::Zero), cB(Vc::Zero);
3425 
3426  const float_v kOvSqr6 = 1.f/sqrt(float_v(6.f));
3427  const float_v LocalSmall = 1.e-10f;
3428 
3429  Bz(abs(bs) <= LocalSmall) = LocalSmall;
3430  sB(LocalSmall < abs(bs)) = s/Bz;
3431  sB(LocalSmall >= abs(bs)) = (1.f-bs*kOvSqr6)*(1.f+bs*kOvSqr6)*dS;
3432  cB(LocalSmall < abs(bs)) = (1.f-c)/Bz;
3433  cB(LocalSmall >= abs(bs)) = .5f*sB*bs;
3434 
3435  float_v px = fP[3];
3436  float_v py = fP[4];
3437  float_v pz = fP[5];
3438 
3439  P[0] = fP[0] + sB*px + cB*py;
3440  P[1] = fP[1] - cB*px + sB*py;
3441  P[2] = fP[2] + dS*pz;
3442  P[3] = c*px + s*py;
3443  P[4] = -s*px + c*py;
3444  P[5] = fP[5];
3445  P[6] = fP[6];
3446  P[7] = fP[7];
3447 
3448  float_v mJ[8][8];
3449  for( Int_t i=0; i<8; i++ ) for( Int_t j=0; j<8; j++) mJ[i][j]=0;
3450 
3451  for(int i=0; i<8; i++) mJ[i][i]=1;
3452  mJ[0][3] = sB; mJ[0][4] = cB;
3453  mJ[1][3] = -cB; mJ[1][4] = sB;
3454  mJ[2][5] = dS;
3455  mJ[3][3] = c; mJ[3][4] = s;
3456  mJ[4][3] = -s; mJ[4][4] = c;
3457 
3458 
3459  float_v mJds[6][6];
3460  for( Int_t i=0; i<6; i++ ) for( Int_t j=0; j<6; j++) mJds[i][j]=0;
3461  mJds[0][3] = c; mJds[0][4] = s;
3462  mJds[1][3] = -s; mJds[1][4] = c;
3463  mJds[2][5] = 1;
3464  mJds[3][3] = -Bz*s; mJds[3][4] = Bz*c;
3465  mJds[4][3] = -Bz*c; mJds[4][4] = -Bz*s;
3466 
3467  for(int i1=0; i1<6; i1++)
3468  for(int i2=0; i2<6; i2++)
3469  mJ[i1][i2] += mJds[i1][3]*px*dsdr[i2] + mJds[i1][4]*py*dsdr[i2] + mJds[i1][5]*pz*dsdr[i2];
3470 
3471  MultQSQt( mJ[0], fC, C, 8);
3472 
3473  if(F)
3474  {
3475  for(int i=0; i<6; i++)
3476  for(int j=0; j<6; j++)
3477  F[i*6+j] = mJ[i][j];
3478 
3479  for(int i1=0; i1<6; i1++)
3480  for(int i2=0; i2<6; i2++)
3481  F1[i1*6 + i2] = mJds[i1][3]*px*dsdr1[i2] + mJds[i1][4]*py*dsdr1[i2] + mJds[i1][5]*pz*dsdr1[i2];
3482  }
3483 }
3484 
3485 void KFParticleBaseSIMD::TransportBz( float_v Bz, float_v dS, float_v P[] ) const
3486 {
3497  const float_v kCLight = 0.000299792458f;
3498  Bz = Bz*simd_cast<float_v>(fQ)*kCLight;
3499  float_v bs= Bz*dS;
3500  float_v s = KFPMath::Sin(bs), c = KFPMath::Cos(bs);
3501 
3502  float_v sB(Vc::Zero), cB(Vc::Zero);
3503 
3504  const float_v kOvSqr6 = 1.f/sqrt(float_v(6.f));
3505  const float_v LocalSmall = 1.e-10f;
3506 
3507  Bz(abs(bs) <= LocalSmall) = LocalSmall;
3508  sB(LocalSmall < abs(bs)) = s/Bz;
3509  sB(LocalSmall >= abs(bs)) = (1.f-bs*kOvSqr6)*(1.f+bs*kOvSqr6)*dS;
3510  cB(LocalSmall < abs(bs)) = (1.f-c)/Bz;
3511  cB(LocalSmall >= abs(bs)) = .5f*sB*bs;
3512 
3513  float_v px = fP[3];
3514  float_v py = fP[4];
3515  float_v pz = fP[5];
3516 
3517  P[0] = fP[0] + sB*px + cB*py;
3518  P[1] = fP[1] - cB*px + sB*py;
3519  P[2] = fP[2] + dS*pz;
3520  P[3] = c*px + s*py;
3521  P[4] = -s*px + c*py;
3522  P[5] = fP[5];
3523  P[6] = fP[6];
3524  P[7] = fP[7];
3525 }
3526 
3527 
3528 
3530 {
3535  return GetDistanceFromVertex( Vtx.fP );
3536 }
3537 
3538 float_v KFParticleBaseSIMD::GetDistanceFromVertex( const float_v vtx[] ) const
3539 {
3544  float_v mP[8], mC[36];
3545  float_v dsdr[6] = {0.f,0.f,0.f,0.f,0.f,0.f};
3546  const float_v dS = GetDStoPoint(vtx, dsdr);
3547  Transport( dS, dsdr, mP, mC );
3548  float_v d[3]={ vtx[0]-mP[0], vtx[1]-mP[1], vtx[2]-mP[2]};
3549  return sqrt( d[0]*d[0]+d[1]*d[1]+d[2]*d[2] );
3550 }
3551 
3553 {
3558  float_v dS[2];
3559  GetDStoParticleFast( p, dS );
3560  float_v mP[8], mP1[8];
3561  TransportFast( dS[0], mP );
3562  p.TransportFast( dS[1], mP1 );
3563  float_v dx = mP[0]-mP1[0];
3564  float_v dy = mP[1]-mP1[1];
3565  float_v dz = mP[2]-mP1[2];
3566  return sqrt(dx*dx+dy*dy+dz*dz);
3567 }
3568 
3570 {
3575  return GetDeviationFromVertex( Vtx.fP, Vtx.fC );
3576 }
3577 
3578 
3579 float_v KFParticleBaseSIMD::GetDeviationFromVertex( const float_v v[], const float_v Cv[] ) const
3580 {
3586  float_v mP[8];
3587  float_v mC[36];
3588  float_v dsdr[6] = {0.f,0.f,0.f,0.f,0.f,0.f};
3589  const float_v dS = GetDStoPoint(v, dsdr);
3590  float_v dsdp[6] = {-dsdr[0], -dsdr[1], -dsdr[2], 0.f, 0.f, 0.f};
3591  float_v F[36], F1[36];
3592  for(int i2=0; i2<36; i2++)
3593  {
3594  F[i2] = 0.f;
3595  F1[i2] = 0.f;
3596  }
3597  Transport( dS, dsdr, mP, mC, dsdp, F, F1 );
3598 
3599  if(Cv)
3600  {
3601  float_v VFT[3][6];
3602  for(int i=0; i<3; i++)
3603  for(int j=0; j<6; j++)
3604  {
3605  VFT[i][j] = 0;
3606  for(int k=0; k<3; k++)
3607  {
3608  VFT[i][j] += Cv[IJ(i,k)] * F1[j*6+k];
3609  }
3610  }
3611 
3612  float_v FVFT[6][6];
3613  for(int i=0; i<6; i++)
3614  for(int j=0; j<6; j++)
3615  {
3616  FVFT[i][j] = 0;
3617  for(int k=0; k<3; k++)
3618  {
3619  FVFT[i][j] += F1[i*6+k] * VFT[k][j];
3620  }
3621  }
3622  mC[0] += FVFT[0][0] + Cv[0];
3623  mC[1] += FVFT[1][0] + Cv[1];
3624  mC[2] += FVFT[1][1] + Cv[2];
3625  mC[3] += FVFT[2][0] + Cv[3];
3626  mC[4] += FVFT[2][1] + Cv[4];
3627  mC[5] += FVFT[2][2] + Cv[5];
3628  }
3629 
3630  InvertCholetsky3(mC);
3631 
3632  float_v d[3]={ v[0]-mP[0], v[1]-mP[1], v[2]-mP[2]};
3633 
3634  return ( ( mC[0]*d[0] + mC[1]*d[1] + mC[3]*d[2])*d[0]
3635  +(mC[1]*d[0] + mC[2]*d[1] + mC[4]*d[2])*d[1]
3636  +(mC[3]*d[0] + mC[4]*d[1] + mC[5]*d[2])*d[2] );
3637 }
3638 
3640 {
3645  float_v ds[2] = {0.f,0.f};
3646  float_v dsdr[4][6];
3647  float_v F1[36], F2[36], F3[36], F4[36];
3648  for(int i1=0; i1<36; i1++)
3649  {
3650  F1[i1] = 0;
3651  F2[i1] = 0;
3652  F3[i1] = 0;
3653  F4[i1] = 0;
3654  }
3655  GetDStoParticle( p, ds, dsdr );
3656 
3657  float_v V0Tmp[36] ;
3658  float_v V1Tmp[36] ;
3659 
3660 
3661  float_v mP1[8], mC1[36];
3662  float_v mP2[8], mC2[36];
3663 
3664  Transport(ds[0], dsdr[0], mP1, mC1, dsdr[1], F1, F2);
3665  p.Transport(ds[1], dsdr[3], mP2, mC2, dsdr[2], F4, F3);
3666 
3667  MultQSQt(F2, p.fC, V0Tmp, 6);
3668  MultQSQt(F3, fC, V1Tmp, 6);
3669 
3670  for(int iC=0; iC<6; iC++)
3671  mC1[iC] += V0Tmp[iC] + mC2[iC] + V1Tmp[iC];
3672 
3673  float_v d[3]={ mP2[0]-mP1[0], mP2[1]-mP1[1], mP2[2]-mP1[2]};
3674 
3675  return ( ( mC1[0]*d[0] + mC1[1]*d[1] + mC1[3]*d[2])*d[0]
3676  +(mC1[1]*d[0] + mC1[2]*d[1] + mC1[4]*d[2])*d[1]
3677  +(mC1[3]*d[0] + mC1[4]*d[1] + mC1[5]*d[2])*d[2] );
3678 }
3679 
3681 {
3686  float_v m[8];
3687  float_v mCm[36];
3688  float_v D[3][3];
3689  Vtx.GetMeasurement( *this, m, mCm, D );
3690  //*
3691 
3692  float_v mS[6] = { mCm[0] - Vtx.fC[0] + (D[0][0] + D[0][0]),
3693  mCm[1] - Vtx.fC[1] + (D[1][0] + D[0][1]), mCm[2] - Vtx.fC[2] + (D[1][1] + D[1][1]),
3694  mCm[3] - Vtx.fC[3] + (D[2][0] + D[0][2]), mCm[4] - Vtx.fC[4] + (D[1][2] + D[2][1]), mCm[5] - Vtx.fC[5] + (D[2][2] + D[2][2]) };
3695  InvertCholetsky3(mS);
3696 
3697  //* Residual (measured - estimated)
3698 
3699  float_v zeta[3] = { m[0]-Vtx.fP[0], m[1]-Vtx.fP[1], m[2]-Vtx.fP[2] };
3700 
3701  //* mCHt = mCH' - D'
3702 
3703  float_v mCHt0[3], mCHt1[3], mCHt2[3];
3704 
3705  mCHt0[0]=Vtx.fC[ 0] ; mCHt1[0]=Vtx.fC[ 1] ; mCHt2[0]=Vtx.fC[ 3] ;
3706  mCHt0[1]=Vtx.fC[ 1] ; mCHt1[1]=Vtx.fC[ 2] ; mCHt2[1]=Vtx.fC[ 4] ;
3707  mCHt0[2]=Vtx.fC[ 3] ; mCHt1[2]=Vtx.fC[ 4] ; mCHt2[2]=Vtx.fC[ 5] ;
3708 
3709  //* Kalman gain K = mCH'*S
3710 
3711  float_v k0[3], k1[3], k2[3];
3712 
3713  for(Int_t i=0;i<3;++i){
3714  k0[i] = mCHt0[i]*mS[0] + mCHt1[i]*mS[1] + mCHt2[i]*mS[3];
3715  k1[i] = mCHt0[i]*mS[1] + mCHt1[i]*mS[2] + mCHt2[i]*mS[4];
3716  k2[i] = mCHt0[i]*mS[3] + mCHt1[i]*mS[4] + mCHt2[i]*mS[5];
3717  }
3718 
3719  //* New estimation of the vertex position r += K*zeta
3720 
3721  float_v dChi2 = ((mS[0]*zeta[0] + mS[1]*zeta[1] + mS[3]*zeta[2])*zeta[0]
3722  + (mS[1]*zeta[0] + mS[2]*zeta[1] + mS[4]*zeta[2])*zeta[1]
3723  + (mS[3]*zeta[0] + mS[4]*zeta[1] + mS[5]*zeta[2])*zeta[2]);
3724 
3725  for(Int_t i=0;i<3;++i)
3726  Vtx.fP[i] -= k0[i]*zeta[0] + k1[i]*zeta[1] + k2[i]*zeta[2];
3727 
3728  //* New covariance matrix C -= K*(mCH')'
3729 
3730  for(Int_t i=0, k=0;i<3;++i){
3731  for(Int_t j=0;j<=i;++j,++k)
3732  Vtx.fC[k] += k0[i]*mCHt0[j] + k1[i]*mCHt1[j] + k2[i]*mCHt2[j];
3733  }
3734 
3735  //* Calculate Chi^2
3736 
3737  Vtx.fNDF -= 2;
3738  Vtx.fChi2 -= dChi2;
3739 }
3740 
3742 {
3748  float_v m[8];
3749  float_v mV[36];
3750 
3751  float_v D[3][3];
3752  Vtx.GetMeasurement( *this, m, mV, D );
3753 
3754  float_v mS[6] = { mV[0] - Vtx.fC[0] + (D[0][0] + D[0][0]),
3755  mV[1] - Vtx.fC[1] + (D[1][0] + D[0][1]), mV[2] - Vtx.fC[2] + (D[1][1] + D[1][1]),
3756  mV[3] - Vtx.fC[3] + (D[2][0] + D[0][2]), mV[4] - Vtx.fC[4] + (D[1][2] + D[2][1]), mV[5] - Vtx.fC[5] + (D[2][2] + D[2][2]) };
3757  InvertCholetsky3(mS);
3758 
3759  //* Residual (measured - estimated)
3760 
3761  float_v zeta[3] = { m[0]-Vtx.fP[0], m[1]-Vtx.fP[1], m[2]-Vtx.fP[2] };
3762 
3763  //* CHt = CH' - D'
3764 
3765  float_v mCHt0[7], mCHt1[7], mCHt2[7];
3766 
3767  mCHt0[0]=mV[ 0] ; mCHt1[0]=mV[ 1] ; mCHt2[0]=mV[ 3] ;
3768  mCHt0[1]=mV[ 1] ; mCHt1[1]=mV[ 2] ; mCHt2[1]=mV[ 4] ;
3769  mCHt0[2]=mV[ 3] ; mCHt1[2]=mV[ 4] ; mCHt2[2]=mV[ 5] ;
3770  mCHt0[3]=Vtx.fC[ 6]-mV[ 6]; mCHt1[3]=Vtx.fC[ 7]-mV[ 7]; mCHt2[3]=Vtx.fC[ 8]-mV[ 8];
3771  mCHt0[4]=Vtx.fC[10]-mV[10]; mCHt1[4]=Vtx.fC[11]-mV[11]; mCHt2[4]=Vtx.fC[12]-mV[12];
3772  mCHt0[5]=Vtx.fC[15]-mV[15]; mCHt1[5]=Vtx.fC[16]-mV[16]; mCHt2[5]=Vtx.fC[17]-mV[17];
3773  mCHt0[6]=Vtx.fC[21]-mV[21]; mCHt1[6]=Vtx.fC[22]-mV[22]; mCHt2[6]=Vtx.fC[23]-mV[23];
3774 
3775  //* Kalman gain K = mCH'*S
3776 
3777  float_v k0[7], k1[7], k2[7];
3778 
3779  for(Int_t i=0;i<7;++i){
3780  k0[i] = mCHt0[i]*mS[0] + mCHt1[i]*mS[1] + mCHt2[i]*mS[3];
3781  k1[i] = mCHt0[i]*mS[1] + mCHt1[i]*mS[2] + mCHt2[i]*mS[4];
3782  k2[i] = mCHt0[i]*mS[3] + mCHt1[i]*mS[4] + mCHt2[i]*mS[5];
3783  }
3784 
3785  //* Add the daughter momentum to the particle momentum
3786 
3787  Vtx.fP[ 3] -= m[ 3];
3788  Vtx.fP[ 4] -= m[ 4];
3789  Vtx.fP[ 5] -= m[ 5];
3790  Vtx.fP[ 6] -= m[ 6];
3791 
3792  Vtx.fC[ 9] -= mV[ 9];
3793  Vtx.fC[13] -= mV[13];
3794  Vtx.fC[14] -= mV[14];
3795  Vtx.fC[18] -= mV[18];
3796  Vtx.fC[19] -= mV[19];
3797  Vtx.fC[20] -= mV[20];
3798  Vtx.fC[24] -= mV[24];
3799  Vtx.fC[25] -= mV[25];
3800  Vtx.fC[26] -= mV[26];
3801  Vtx.fC[27] -= mV[27];
3802 
3803  //* New estimation of the vertex position r += K*zeta
3804 
3805  for(Int_t i=0;i<3;++i)
3806  Vtx.fP[i] = m[i] - (k0[i]*zeta[0] + k1[i]*zeta[1] + k2[i]*zeta[2]);
3807  for(Int_t i=3;i<7;++i)
3808  Vtx.fP[i] = Vtx.fP[i] - (k0[i]*zeta[0] + k1[i]*zeta[1] + k2[i]*zeta[2]);
3809 
3810  //* New covariance matrix C -= K*(mCH')'
3811 
3812  float_v ffC[28] = {-mV[ 0],
3813  -mV[ 1], -mV[ 2],
3814  -mV[ 3], -mV[ 4], -mV[ 5],
3815  mV[ 6], mV[ 7], mV[ 8], Vtx.fC[ 9],
3816  mV[10], mV[11], mV[12], Vtx.fC[13], Vtx.fC[14],
3817  mV[15], mV[16], mV[17], Vtx.fC[18], Vtx.fC[19], Vtx.fC[20],
3818  mV[21], mV[22], mV[23], Vtx.fC[24], Vtx.fC[25], Vtx.fC[26], Vtx.fC[27] };
3819 
3820  for(Int_t i=0, k=0;i<7;++i){
3821  for(Int_t j=0;j<=i;++j,++k){
3822  Vtx.fC[k] = ffC[k] + (k0[i]*mCHt0[j] + k1[i]*mCHt1[j] + k2[i]*mCHt2[j] );
3823  }
3824  }
3825 
3826  //* Calculate Chi^2
3827  Vtx.fNDF -= 2;
3828  Vtx.fQ -= GetQ();
3829  Vtx.fSFromDecay = 0;
3830  Vtx.fChi2 -= ((mS[0]*zeta[0] + mS[1]*zeta[1] + mS[3]*zeta[2])*zeta[0]
3831  + (mS[1]*zeta[0] + mS[2]*zeta[1] + mS[4]*zeta[2])*zeta[1]
3832  + (mS[3]*zeta[0] + mS[4]*zeta[1] + mS[5]*zeta[2])*zeta[2]);
3833 }
3834 
3835 void KFParticleBaseSIMD::TransportLine( float_v dS, const float_v* dsdr, float_v P[], float_v C[], float_v* dsdr1, float_v* F, float_v* F1 ) const
3836 {
3858  float_v mJ[8][8];
3859  for( Int_t i=0; i<8; i++ ) for( Int_t j=0; j<8; j++) mJ[i][j]=0;
3860 
3861  mJ[0][0]=1; mJ[0][1]=0; mJ[0][2]=0; mJ[0][3]=dS; mJ[0][4]=0; mJ[0][5]=0;
3862  mJ[1][0]=0; mJ[1][1]=1; mJ[1][2]=0; mJ[1][3]=0; mJ[1][4]=dS; mJ[1][5]=0;
3863  mJ[2][0]=0; mJ[2][1]=0; mJ[2][2]=1; mJ[2][3]=0; mJ[2][4]=0; mJ[2][5]=dS;
3864 
3865  mJ[3][0]=0; mJ[3][1]=0; mJ[3][2]=0; mJ[3][3]=1; mJ[3][4]=0; mJ[3][5]=0;
3866  mJ[4][0]=0; mJ[4][1]=0; mJ[4][2]=0; mJ[4][3]=0; mJ[4][4]=1; mJ[4][5]=0;
3867  mJ[5][0]=0; mJ[5][1]=0; mJ[5][2]=0; mJ[5][3]=0; mJ[5][4]=0; mJ[5][5]=1;
3868  mJ[6][6] = mJ[7][7] = 1;
3869 
3870  float_v px = fP[3], py = fP[4], pz = fP[5];
3871 
3872  P[0] = fP[0] + dS*fP[3];
3873  P[1] = fP[1] + dS*fP[4];
3874  P[2] = fP[2] + dS*fP[5];
3875  P[3] = fP[3];
3876  P[4] = fP[4];
3877  P[5] = fP[5];
3878  P[6] = fP[6];
3879  P[7] = fP[7];
3880 
3881  float_v mJds[6][6];
3882  for( Int_t i=0; i<6; i++ ) for( Int_t j=0; j<6; j++) mJds[i][j]=0;
3883 
3884  mJds[0][3]= 1;
3885  mJds[1][4]= 1;
3886  mJds[2][5]= 1;
3887 
3888  for(int i1=0; i1<6; i1++)
3889  for(int i2=0; i2<6; i2++)
3890  mJ[i1][i2] += mJds[i1][3]*px*dsdr[i2] + mJds[i1][4]*py*dsdr[i2] + mJds[i1][5]*pz*dsdr[i2];
3891  MultQSQt( mJ[0], fC, C, 8);
3892 
3893  if(F)
3894  {
3895  for(int i=0; i<6; i++)
3896  for(int j=0; j<6; j++)
3897  F[i*6+j] = mJ[i][j];
3898 
3899  for(int i1=0; i1<6; i1++)
3900  for(int i2=0; i2<6; i2++)
3901  F1[i1*6 + i2] = mJds[i1][3]*px*dsdr1[i2] + mJds[i1][4]*py*dsdr1[i2] + mJds[i1][5]*pz*dsdr1[i2];
3902  }
3903 }
3904 
3905 void KFParticleBaseSIMD::TransportLine( float_v dS, float_v P[] ) const
3906 {
3916  P[0] = fP[0] + dS*fP[3];
3917  P[1] = fP[1] + dS*fP[4];
3918  P[2] = fP[2] + dS*fP[5];
3919  P[3] = fP[3];
3920  P[4] = fP[4];
3921  P[5] = fP[5];
3922  P[6] = fP[6];
3923  P[7] = fP[7];
3924 }
3925 
3927 {
3945  float_v alpha = 0.f, qt = 0.f;
3946  float_v spx = positive.GetPx() + negative.GetPx();
3947  float_v spy = positive.GetPy() + negative.GetPy();
3948  float_v spz = positive.GetPz() + negative.GetPz();
3949  float_v sp = sqrt(spx*spx + spy*spy + spz*spz);
3950  float_m mask = float_m( abs(sp) < float_v(1.e-10f));
3951  float_v pn, pp, pln, plp;
3952 
3953  pn = sqrt(negative.GetPx()*negative.GetPx() + negative.GetPy()*negative.GetPy() + negative.GetPz()*negative.GetPz());
3954  pp = sqrt(positive.GetPx()*positive.GetPx() + positive.GetPy()*positive.GetPy() + positive.GetPz()*positive.GetPz());
3955  pln = (negative.GetPx()*spx+negative.GetPy()*spy+negative.GetPz()*spz)/sp;
3956  plp = (positive.GetPx()*spx+positive.GetPy()*spy+positive.GetPz()*spz)/sp;
3957 
3958  mask = float_m(mask & float_m( abs(pn) < float_v(1.E-10f)));
3959  float_v ptm = (1.f-((pln/pn)*(pln/pn)));
3960  qt(ptm >= 0.f) = pn*sqrt(ptm);
3961  alpha = (plp-pln)/(plp+pln);
3962 
3963  QtAlfa[0](mask) = qt;
3964  QtAlfa[1](mask) = alpha;
3965 }
3966 
3967 void KFParticleBaseSIMD::RotateXY(float_v angle, float_v Vtx[3])
3968 {
3974  // Before rotation the center of the coordinat system should be moved to the vertex position; move back after rotation
3975  X() = X() - Vtx[0];
3976  Y() = Y() - Vtx[1];
3977  Z() = Z() - Vtx[2];
3978 
3979  // Rotate the kf particle
3980  float_v s = sin(angle);
3981  float_v c = cos(angle);
3982 
3983  float_v mA[8][ 8];
3984  for( Int_t i=0; i<8; i++ ){
3985  for( Int_t j=0; j<8; j++){
3986  mA[i][j] = 0;
3987  }
3988  }
3989  for( int i=0; i<8; i++ ){
3990  mA[i][i] = 1;
3991  }
3992  mA[0][0] = c; mA[0][1] = s;
3993  mA[1][0] = -s; mA[1][1] = c;
3994  mA[3][3] = c; mA[3][4] = s;
3995  mA[4][3] = -s; mA[4][4] = c;
3996 
3997  float_v mAC[8][8];
3998  float_v mAp[8];
3999 
4000  for( Int_t i=0; i<8; i++ ){
4001  mAp[i] = 0;
4002  for( Int_t k=0; k<8; k++){
4003  mAp[i]+=mA[i][k] * fP[k];
4004  }
4005  }
4006 
4007  for( Int_t i=0; i<8; i++){
4008  fP[i] = mAp[i];
4009  }
4010 
4011  for( Int_t i=0; i<8; i++ ){
4012  for( Int_t j=0; j<8; j++ ){
4013  mAC[i][j] = 0;
4014  for( Int_t k=0; k<8; k++ ){
4015  mAC[i][j]+= mA[i][k] * GetCovariance(k,j);
4016  }
4017  }
4018  }
4019 
4020  for( Int_t i=0; i<8; i++ ){
4021  for( Int_t j=0; j<=i; j++ ){
4022  float_v xx = 0.f;
4023  for( Int_t k=0; k<8; k++){
4024  xx+= mAC[i][k]*mA[j][k];
4025  }
4026  Covariance(i,j) = xx;
4027  }
4028  }
4029 
4030  X() = GetX() + Vtx[0];
4031  Y() = GetY() + Vtx[1];
4032  Z() = GetZ() + Vtx[2];
4033 }
4034 
4036 {
4041  float_v d[3], uud, u[3][3];
4042  for(int i=0; i<3; i++)
4043  {
4044  d[i]=0.f;
4045  for(int j=0; j<3; j++)
4046  u[i][j]=0.f;
4047  }
4048 
4049  for(int i=0; i<3 ; i++)
4050  {
4051  uud=0.f;
4052  for(int j=0; j<i; j++)
4053  uud += u[j][i]*u[j][i]*d[j];
4054  uud = a[i*(i+3)/2] - uud;
4055 
4056  uud(abs(uud)<1.e-8f) = 1.e-8f;
4057  d[i] = uud/abs(uud);
4058  u[i][i] = sqrt(abs(uud));
4059 
4060  for(int j=i+1; j<3; j++)
4061  {
4062  uud = 0.f;
4063  for(int k=0; k<i; k++)
4064  uud += u[k][i]*u[k][j]*d[k];
4065  uud = a[j*(j+1)/2+i] - uud;
4066  u[i][j] = d[i]/u[i][i]*uud;
4067  }
4068  }
4069 
4070  float_v u1[3];
4071 
4072  for(int i=0; i<3; i++)
4073  {
4074  u1[i] = u[i][i];
4075  u[i][i] = 1.f/u[i][i];
4076  }
4077  for(int i=0; i<2; i++)
4078  {
4079  u[i][i+1] = - u[i][i+1]*u[i][i]*u[i+1][i+1];
4080  }
4081  for(int i=0; i<1; i++)
4082  {
4083  u[i][i+2] = u[i][i+1]*u1[i+1]*u[i+1][i+2]-u[i][i+2]*u[i][i]*u[i+2][i+2];
4084  }
4085 
4086  for(int i=0; i<3; i++)
4087  a[i+3] = u[i][2]*u[2][2]*d[2];
4088  for(int i=0; i<2; i++)
4089  a[i+1] = u[i][1]*u[1][1]*d[1] + u[i][2]*u[1][2]*d[2];
4090  a[0] = u[0][0]*u[0][0]*d[0] + u[0][1]*u[0][1]*d[1] + u[0][2]*u[0][2]*d[2];
4091 }
4092 
4093 void KFParticleBaseSIMD::MultQSQt( const float_v Q[], const float_v S[], float_v SOut[], const int kN )
4094 {
4102  float_v* mA = new float_v[kN*kN];
4103 
4104  for( Int_t i=0, ij=0; i<kN; i++ ){
4105  for( Int_t j=0; j<kN; j++, ++ij ){
4106  mA[ij] = 0 ;
4107  for( Int_t k=0; k<kN; ++k ) mA[ij]+= S[( k<=i ) ? i*(i+1)/2+k :k*(k+1)/2+i] * Q[ j*kN+k];
4108  }
4109  }
4110 
4111  for( Int_t i=0; i<kN; i++ ){
4112  for( Int_t j=0; j<=i; j++ ){
4113  Int_t ij = ( j<=i ) ? i*(i+1)/2+j :j*(j+1)/2+i;
4114  SOut[ij] = 0 ;
4115  for( Int_t k=0; k<kN; k++ ) SOut[ij] += Q[ i*kN+k ] * mA[ k*kN+j ];
4116  }
4117  }
4118 
4119  if(mA) delete[] mA;
4120 }
4121 
4122 
4123 // 72-charachters line to define the printer border
4124 //3456789012345678901234567890123456789012345678901234567890123456789012
4125