Analysis Software
Documentation for sPHENIX simulation software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hash_root.py
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file hash_root.py
1 #!/usr/bin/env python3
2 import hashlib
3 from pathlib import Path
4 import sys
5 from typing import Optional
6 import argparse
7 
8 import uproot
9 import numpy as np
10 import awkward as ak
11 
12 
13 def hash_root_file(path: Path, ordering_invariant: bool = True) -> str:
14  rf = uproot.open(path)
15 
16  gh = hashlib.sha256()
17 
18  for tree_name in sorted(rf.keys(cycle=False)):
19  gh.update(tree_name.encode("utf8"))
20 
21  try:
22  tree = rf[tree_name]
23  if not isinstance(tree, uproot.TTree):
24  continue
25  except NotImplementedError:
26  continue
27  keys = list(sorted(tree.keys()))
28 
29  branches = tree.arrays(library="ak")
30 
31  if not ordering_invariant:
32 
33  h = hashlib.sha256()
34  for name in keys:
35  h.update(name.encode("utf8"))
36  arr = branches[name]
37  arr = ak.flatten(arr, axis=None)
38  arr = np.array(arr)
39  h.update(arr.tobytes())
40  gh.update(h.digest())
41 
42  else:
43  items = np.array([])
44 
45  for row in zip(*[branches[b] for b in keys]):
46  h = hashlib.md5()
47  for obj in row:
48  if isinstance(obj, ak.highlevel.Array):
49  if obj.ndim == 1:
50  h.update(ak.to_numpy(obj).tobytes())
51  else:
52  arr = ak.to_numpy(ak.flatten(obj, axis=None))
53  h.update(arr.tobytes())
54  else:
55  h.update(np.array([obj]).tobytes())
56  items = np.append(items, h.digest())
57 
58  items.sort()
59 
60  h = hashlib.sha256()
61  h.update("".join(keys).encode("utf8"))
62  h.update(items.tobytes())
63 
64  gh.update(h.digest())
65  return gh.hexdigest()
66 
67 
68 if "__main__" == __name__:
69  p = argparse.ArgumentParser(
70  description="Calculate a hash of the numeric content of a root file"
71  )
72 
73  p.add_argument(
74  "input_file", type=Path, help="The input ROOT file to calculate a hash for"
75  )
76  p.add_argument(
77  "--no-ordering-invariant",
78  "-n",
79  action="store_true",
80  help="Calculate a hash that is not invariant under reordering of entries? (faster than invariant)",
81  )
82 
83  args = p.parse_args()
84 
85  print(
87  path=args.input_file,
88  ordering_invariant=not args.no_ordering_invariant,
89  )
90  )