Analysis Software
Documentation for sPHENIX simulation software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
compareRootFiles.hpp
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file compareRootFiles.hpp
1 // This file is part of the Acts project.
2 //
3 // Copyright (C) 2017 CERN for the benefit of the Acts project
4 //
5 // This Source Code Form is subject to the terms of the Mozilla Public
6 // License, v. 2.0. If a copy of the MPL was not distributed with this
7 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 
9 #pragma once
10 
11 #include <cmath>
12 #include <exception>
13 #include <functional>
14 #include <sstream>
15 #include <vector>
16 
17 #include "TDictionary.h"
18 #include "TTreeReaderValue.h"
19 
20 // Pairs of elements of the same type
21 template <typename T>
22 using HomogeneousPair = std::pair<T, T>;
23 
24 // === TYPE ERASURE FOR CONCRETE DATA ===
25 
26 // Minimal type-erasure wrapper for std::vector<T>. This will be used as a
27 // workaround to compensate for the absence of C++17's std::any in Cling.
28 class AnyVector {
29  public:
30  // Create a type-erased vector<T>, using proposed constructor arguments.
31  // Returns a pair containing the type-erased vector and a pointer to the
32  // underlying concrete vector.
33  template <typename T, typename... Args>
34  static std::pair<AnyVector, std::vector<T>*> create(Args&&... args) {
35  std::vector<T>* vector = new std::vector<T>(std::forward<Args>(args)...);
36  std::function<void()> deleter = [vector] { delete vector; };
37  return std::make_pair(
38  AnyVector{static_cast<void*>(vector), std::move(deleter)}, vector);
39  }
40 
41  // Default-construct a null type-erased vector
42  AnyVector() = default;
43 
44  // Move-construct a type-erased vector
46  : m_vector{other.m_vector}, m_deleter{std::move(other.m_deleter)} {
47  other.m_vector = nullptr;
48  }
49 
50  // Move-assign a type-erased vector
51  AnyVector& operator=(AnyVector&& other) {
52  if (&other != this) {
53  m_vector = other.m_vector;
54  m_deleter = std::move(other.m_deleter);
55  other.m_vector = nullptr;
56  }
57  return *this;
58  }
59 
60  // Forbid copies of type-erased vectors
61  AnyVector(const AnyVector&) = delete;
62  AnyVector& operator=(const AnyVector&) = delete;
63 
64  // Delete a type-erased vector
66  if (m_vector != nullptr) {
67  m_deleter();
68  }
69  }
70 
71  private:
72  // Construct a type-erased vector from a concrete vector
73  AnyVector(void* vector, std::function<void()>&& deleter)
74  : m_vector{vector}, m_deleter{std::move(deleter)} {}
75 
76  void* m_vector{nullptr}; // Casted std::vector<T>*
77  std::function<void()> m_deleter; // Deletes the underlying vector
78 };
79 
80 // === GENERIC DATA ORDERING ===
81 
82 // We want to check, in a single operation, how two pieces of data are ordered
83 enum class Ordering { SMALLER, EQUAL, GREATER };
84 
85 // In general, any type which implements comparison operators that behave as a
86 // mathematical total order can use this comparison function...
87 template <typename T>
88 Ordering compare(const T& x, const T& y) {
89  if (x < y) {
90  return Ordering::SMALLER;
91  } else if (x == y) {
92  return Ordering::EQUAL;
93  } else {
94  return Ordering::GREATER;
95  }
96 }
97 
98 // ...but we'll want to tweak that a little for floats, to handle NaNs better...
99 template <typename T>
100 Ordering compareFloat(const T& x, const T& y) {
101  if (std::isless(x, y)) {
102  return Ordering::SMALLER;
103  } else if (std::isgreater(x, y)) {
104  return Ordering::GREATER;
105  } else {
106  return Ordering::EQUAL;
107  }
108 }
109 
110 template <>
111 Ordering compare(const float& x, const float& y) {
112  return compareFloat(x, y);
113 }
114 
115 template <>
116 Ordering compare(const double& x, const double& y) {
117  return compareFloat(x, y);
118 }
119 
120 // ...and for vectors, where the default lexicographic comparison cannot
121 // efficiently tell all of what we want in a single vector iteration pass.
122 template <typename U>
123 Ordering compare(const std::vector<U>& v1, const std::vector<U>& v2) {
124  // First try to order by size...
125  if (v1.size() < v2.size()) {
126  return Ordering::SMALLER;
127  } else if (v1.size() > v2.size()) {
128  return Ordering::GREATER;
129  }
130  // ...if the size is identical...
131  else {
132  // ...then try to order by contents of increasing index...
133  for (std::size_t i = 0; i < v1.size(); ++i) {
134  if (v1[i] < v2[i]) {
135  return Ordering::SMALLER;
136  } else if (v1[i] > v2[i]) {
137  return Ordering::GREATER;
138  }
139  }
140 
141  // ...and declare the vectors equal if the contents are equal
142  return Ordering::EQUAL;
143  }
144 }
145 
146 // === GENERIC SORTING MECHANISM ===
147 
148 // The following functions are generic implementations of sorting algorithms,
149 // which require only a comparison operator, a swapping operator, and an
150 // inclusive range of indices to be sorted in order to operate
151 using IndexComparator = std::function<Ordering(std::size_t, std::size_t)>;
152 using IndexSwapper = std::function<void(std::size_t, std::size_t)>;
153 
154 // Selection sort has pertty bad asymptotic scaling, but it is non-recursive
155 // and in-place, which makes it a good choice for smaller inputs
156 void selectionSort(const std::size_t firstIndex, const std::size_t lastIndex,
157  const IndexComparator& compare, const IndexSwapper& swap) {
158  using namespace std;
159  for (size_t targetIndex = firstIndex; targetIndex < lastIndex;
160  ++targetIndex) {
161  size_t minIndex = targetIndex;
162  for (std::size_t readIndex = targetIndex + 1; readIndex <= lastIndex;
163  ++readIndex) {
164  if (compare(readIndex, minIndex) == Ordering::SMALLER) {
165  minIndex = readIndex;
166  }
167  }
168  if (minIndex != targetIndex) {
169  swap(minIndex, targetIndex);
170  }
171  }
172 }
173 
174 // Quick sort is used as the top-level sorting algorithm for our datasets
175 void quickSort(const std::size_t firstIndex, const std::size_t lastIndex,
176  const IndexComparator& compare, const IndexSwapper& swap) {
177  // We switch to non-recursive selection sort when the range becomes too small.
178  // This optimization voids the need for detection of 0- and 1-element input.
179  static const std::size_t NON_RECURSIVE_THRESHOLD = 25;
180  if (lastIndex - firstIndex < NON_RECURSIVE_THRESHOLD) {
181  selectionSort(firstIndex, lastIndex, compare, swap);
182  return;
183  }
184 
185  // We'll use the midpoint as a pivot. Later on, we can switch to more
186  // elaborate pivot selection schemes if their usefulness for our use case
187  // (pseudorandom events with thread-originated reordering) is demonstrated.
188  std::size_t pivotIndex = firstIndex + (lastIndex - firstIndex) / 2;
189 
190  // Partition the data around the pivot using Hoare's scheme
191  std::size_t splitIndex;
192  {
193  // Start with two indices one step beyond each side of the array
194  std::size_t i = firstIndex - 1;
195  std::size_t j = lastIndex + 1;
196  while (true) {
197  // Move left index forward at least once, and until an element which is
198  // greater than or equal to the pivot is detected.
199  do {
200  i = i + 1;
201  } while (compare(i, pivotIndex) == Ordering::SMALLER);
202 
203  // Move right index backward at least once, and until an element which is
204  // smaller than or equal to the pivot is detected
205  do {
206  j = j - 1;
207  } while (compare(j, pivotIndex) == Ordering::GREATER);
208 
209  // By transitivity of inequality, the element at location i is greater
210  // than or equal to the one at location j, and a swap could be required
211  if (i < j) {
212  // These elements are in the wrong order, swap them
213  swap(i, j);
214 
215  // Don't forget to keep track the pivot's index along the way, as this
216  // is currently the only way by which we can refer to the pivot element.
217  if (i == pivotIndex) {
218  pivotIndex = j;
219  } else if (j == pivotIndex) {
220  pivotIndex = i;
221  }
222  } else {
223  // If i and j went past each other, our partitioning is done
224  splitIndex = j;
225  break;
226  }
227  }
228  }
229 
230  // Now, we'll recursively sort both partitions using quicksort. We should
231  // recurse in the smaller range first, so as to leverage compiler tail call
232  // optimization if available.
233  if (splitIndex - firstIndex <= lastIndex - splitIndex - 1) {
234  quickSort(firstIndex, splitIndex, compare, swap);
235  quickSort(splitIndex + 1, lastIndex, compare, swap);
236  } else {
237  quickSort(splitIndex + 1, lastIndex, compare, swap);
238  quickSort(firstIndex, splitIndex, compare, swap);
239  }
240 }
241 
242 // === GENERIC TTREE BRANCH MANIPULATION MECHANISM ===
243 
244 // When comparing a pair of TTrees, we'll need to set up quite a few facilities
245 // for each branch. Since this setup is dependent on the branch data type, which
246 // is only known at runtime, it is quite involved, which is why we extracted it
247 // to a separate struct and its constructor.
249  // We'll keep track of the branch name for debugging purposes
251 
252  // Type-erased event data for the current branch, in both trees being compared
254 
255  // Function which loads the active event data for the current branch. This is
256  // to be performed for each branch and combined with TTreeReader-based event
257  // iteration on both trees.
258  void loadCurrentEvent() { (*m_eventLoaderPtr)(); }
259 
260  // Functors which compare two events within a given tree and order them
261  // with respect to one another, and which swap two events. By combining such
262  // functionality for each branch, a global tree order can be produced.
264 
265  // Functor which compares the current event data in *both* trees and tells
266  // whether it is identical. The comparison is order-sensitive, so events
267  // should previously have been sorted in a canonical order in both trees.
268  // By combining the results for each branch, global tree equality is defined.
269  using TreeComparator = std::function<bool()>;
271 
272  // Functor which dumps the event data for the active event side by side, in
273  // two columns. This enables manual comparison during debugging.
274  std::function<void()> dumpEventData;
275 
276  // General metadata about the tree which is identical for every branch
277  struct TreeMetadata {
278  TTreeReader& tree1Reader;
279  TTreeReader& tree2Reader;
280  const std::size_t entryCount;
281  };
282 
283  // This exception will be thrown if an unsupported branch type is encountered
284  class UnsupportedBranchType : public std::exception {};
285 
286  // Type-erased factory of branch comparison harnesses, taking ROOT run-time
287  // type information as input in order to select an appropriate C++ constructor
289  const std::string& branchName,
290  const EDataType dataType,
291  const std::string& className) {
292  switch (dataType) {
293  case kChar_t:
294  return BranchComparisonHarness::create<char>(treeMetadata, branchName);
295  case kUChar_t:
296  return BranchComparisonHarness::create<unsigned char>(treeMetadata,
297  branchName);
298  case kShort_t:
299  return BranchComparisonHarness::create<short>(treeMetadata, branchName);
300  case kUShort_t:
301  return BranchComparisonHarness::create<unsigned short>(treeMetadata,
302  branchName);
303  case kInt_t:
304  return BranchComparisonHarness::create<int>(treeMetadata, branchName);
305  case kUInt_t:
306  return BranchComparisonHarness::create<unsigned int>(treeMetadata,
307  branchName);
308  case kLong_t:
309  return BranchComparisonHarness::create<long>(treeMetadata, branchName);
310  case kULong_t:
311  return BranchComparisonHarness::create<unsigned long>(treeMetadata,
312  branchName);
313  case kULong64_t:
314  return BranchComparisonHarness::create<unsigned long long>(treeMetadata,
315  branchName);
316 
317  case kFloat_t:
318  return BranchComparisonHarness::create<float>(treeMetadata, branchName);
319  case kDouble_t:
320  return BranchComparisonHarness::create<double>(treeMetadata,
321  branchName);
322  case kBool_t:
323  return BranchComparisonHarness::create<bool>(treeMetadata, branchName);
324  case kOther_t:
325  if (className.substr(0, 6) == "vector") {
326  std::string elementType = className.substr(7, className.size() - 8);
327  return BranchComparisonHarness::createVector(treeMetadata, branchName,
328  std::move(elementType));
329  } else {
330  throw UnsupportedBranchType();
331  }
332  default:
333  throw UnsupportedBranchType();
334  }
335  }
336 
337  private:
338  // Under the hood, the top-level factory calls the following function
339  // template, parametrized with the proper C++ data type
340  template <typename T>
342  const std::string& branchName) {
343  // Our result will eventually go there
345 
346  // Save the branch name for debugging purposes
347  result.branchName = branchName;
348 
349  // Setup type-erased event data storage
350  auto tree1DataStorage = AnyVector::create<T>();
351  auto tree2DataStorage = AnyVector::create<T>();
352  result.eventData = std::make_pair(std::move(tree1DataStorage.first),
353  std::move(tree2DataStorage.first));
354  std::vector<T>& tree1Data = *tree1DataStorage.second;
355  std::vector<T>& tree2Data = *tree2DataStorage.second;
356 
357  // Use our advance knowledge of the event count to preallocate storage
358  tree1Data.reserve(treeMetadata.entryCount);
359  tree2Data.reserve(treeMetadata.entryCount);
360 
361  // Setup event data readout
362  result.m_eventLoaderPtr.reset(
363  new EventLoaderT<T>{treeMetadata.tree1Reader, treeMetadata.tree2Reader,
364  branchName, tree1Data, tree2Data});
365 
366  // Setup event comparison and swapping for each tree
367  result.sortHarness = std::make_pair(
368  std::make_pair(
369  [&tree1Data](std::size_t i, std::size_t j) -> Ordering {
370  return compare(tree1Data[i], tree1Data[j]);
371  },
372  [&tree1Data](std::size_t i, std::size_t j) {
373  std::swap(tree1Data[i], tree1Data[j]);
374  }),
375  std::make_pair(
376  [&tree2Data](std::size_t i, std::size_t j) -> Ordering {
377  return compare(tree2Data[i], tree2Data[j]);
378  },
379  [&tree2Data](std::size_t i, std::size_t j) {
380  std::swap(tree2Data[i], tree2Data[j]);
381  }));
382 
383  // Setup order-sensitive tree comparison
384  result.eventDataEqual = [&tree1Data, &tree2Data]() -> bool {
385  for (std::size_t i = 0; i < tree1Data.size(); ++i) {
386  if (compare(tree1Data[i], tree2Data[i]) != Ordering::EQUAL) {
387  return false;
388  }
389  }
390  return true;
391  };
392 
393  // Add a debugging method to dump event data
394  result.dumpEventData = [&tree1Data, &tree2Data] {
395  std::cout << "File 1 \tFile 2" << std::endl;
396  for (std::size_t i = 0; i < tree1Data.size(); ++i) {
397  std::cout << toString(tree1Data[i]) << " \t"
398  << toString(tree2Data[i]) << std::endl;
399  }
400  };
401 
402  // ...and we're good to go!
403  return std::move(result);
404  }
405 
406  // Because the people who created TTreeReaderValue could not bother to make it
407  // movable (for moving it into a lambda), or even just virtually destructible
408  // (for moving a unique_ptr into the lambda), loadEventData can only be
409  // implemented through lots of unpleasant C++98-ish boilerplate.
410  class IEventLoader {
411  public:
412  virtual ~IEventLoader() = default;
413  virtual void operator()() = 0;
414  };
415 
416  template <typename T>
417  class EventLoaderT : public IEventLoader {
418  public:
419  EventLoaderT(TTreeReader& tree1Reader, TTreeReader& tree2Reader,
420  const std::string& branchName, std::vector<T>& tree1Data,
421  std::vector<T>& tree2Data)
422  : branch1Reader{tree1Reader, branchName.c_str()},
423  branch2Reader{tree2Reader, branchName.c_str()},
424  branch1Data(tree1Data),
425  branch2Data(tree2Data) {}
426 
427  void operator()() override {
428  branch1Data.push_back(*branch1Reader);
429  branch2Data.push_back(*branch2Reader);
430  }
431 
432  private:
433  TTreeReaderValue<T> branch1Reader, branch2Reader;
434  std::vector<T>& branch1Data;
435  std::vector<T>& branch2Data;
436  };
437 
438  std::unique_ptr<IEventLoader> m_eventLoaderPtr;
439 
440  // This helper factory helps building branches associated with std::vectors
441  // of data, which are the only STL collection that we support at the moment.
443  const std::string& branchName,
444  const std::string elemType) {
445 // We support vectors of different types by switching across type (strings)
446 #define CREATE_VECTOR__HANDLE_TYPE(type_name) \
447  if (elemType == #type_name) { \
448  return BranchComparisonHarness::create<std::vector<type_name>>( \
449  treeMetadata, branchName); \
450  }
451 
452  // Handle vectors of booleans
454 
455  // Handle vectors of all standard floating-point types
457  double)
458 
459 // For integer types, we'll want to handle both signed and unsigned versions
460 #define CREATE_VECTOR__HANDLE_INTEGER_TYPE(integer_type_name) \
461  CREATE_VECTOR__HANDLE_TYPE(integer_type_name) \
462  else CREATE_VECTOR__HANDLE_TYPE(unsigned integer_type_name)
463 
464  // Handle vectors of all standard integer types
467 
468  // Throw an exception if the vector element type is not recognized
469  else throw UnsupportedBranchType();
470  }
471 
472  // This helper method provides general string conversion for all supported
473  // branch event data types.
474  template <typename T>
475  static std::string toString(const T& data) {
476  std::ostringstream oss;
477  oss << data;
478  return oss.str();
479  }
480 
481  template <typename U>
482  static std::string toString(const std::vector<U>& vector) {
483  std::ostringstream oss{"{ "};
484  for (const auto& data : vector) {
485  oss << data << " \t";
486  }
487  oss << " }";
488  return oss.str();
489  }
490 };