Analysis Software
Documentation for sPHENIX simulation software
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CsvMeasurementReader.cpp
Go to the documentation of this file. Or view the newest version in sPHENIX GitHub for file CsvMeasurementReader.cpp
1 // This file is part of the Acts project.
2 //
3 // Copyright (C) 2021 CERN for the benefit of the Acts project
4 //
5 // This Source Code Form is subject to the terms of the Mozilla Public
6 // License, v. 2.0. If a copy of the MPL was not distributed with this
7 // file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 
10 
22 
23 #include <algorithm>
24 #include <array>
25 #include <cstdint>
26 #include <functional>
27 #include <iterator>
28 #include <list>
29 #include <stdexcept>
30 #include <vector>
31 
32 #include <dfe/dfe_io_dsv.hpp>
33 
34 #include "CsvOutputData.hpp"
35 
39  : m_cfg(config),
40  m_eventsRange(
41  determineEventFilesRange(m_cfg.inputDir, "measurements.csv")),
42  m_logger(Acts::getDefaultLogger("CsvMeasurementReader", level)) {
43  if (m_cfg.outputMeasurements.empty()) {
44  throw std::invalid_argument("Missing measurement output collection");
45  }
46 
54 
55  // Check if event ranges match (should also catch missing files)
56  auto checkRange = [&](const std::string& fileStem) {
57  const auto hitmapRange = determineEventFilesRange(m_cfg.inputDir, fileStem);
58  if (hitmapRange.first > m_eventsRange.first or
59  hitmapRange.second < m_eventsRange.second) {
60  throw std::runtime_error("event range mismatch for 'event**-" + fileStem +
61  "'");
62  }
63  };
64 
65  checkRange("measurement-simhit-map.csv");
66  if (not m_cfg.outputClusters.empty()) {
67  checkRange("cells.csv");
68  }
69 }
70 
72  const {
73  return "CsvMeasurementReader";
74 }
75 
77  const {
78  return m_eventsRange;
79 }
80 
81 namespace {
82 struct CompareHitId {
83  // support transparent comparison between identifiers and full objects
84  using is_transparent = void;
85  template <typename T>
86  constexpr bool operator()(const T& left, const T& right) const {
87  return left.hit_id < right.hit_id;
88  }
89  template <typename T>
90  constexpr bool operator()(uint64_t left_id, const T& right) const {
91  return left_id < right.hit_id;
92  }
93  template <typename T>
94  constexpr bool operator()(const T& left, uint64_t right_id) const {
95  return left.hit_id < right_id;
96  }
97 };
98 
99 struct CompareGeometryId {
100  bool operator()(const ActsExamples::MeasurementData& left,
101  const ActsExamples::MeasurementData& right) const {
102  return left.geometry_id < right.geometry_id;
103  }
104 };
105 
106 template <typename Data>
107 inline std::vector<Data> readEverything(
108  const std::string& inputDir, const std::string& filename,
109  const std::vector<std::string>& optionalColumns, size_t event) {
110  std::string path = ActsExamples::perEventFilepath(inputDir, filename, event);
111  dfe::NamedTupleCsvReader<Data> reader(path, optionalColumns);
112 
113  std::vector<Data> everything;
114  Data one;
115  while (reader.read(one)) {
116  everything.push_back(one);
117  }
118 
119  return everything;
120 }
121 
122 std::vector<ActsExamples::MeasurementData> readMeasurementsByGeometryId(
123  const std::string& inputDir, size_t event) {
124  // geometry_id and t are optional columns
125  auto measurements = readEverything<ActsExamples::MeasurementData>(
126  inputDir, "measurements.csv", {"geometry_id", "t"}, event);
127  // sort same way they will be sorted in the output container
128  std::sort(measurements.begin(), measurements.end(), CompareGeometryId{});
129  return measurements;
130 }
131 
132 ActsExamples::ClusterContainer makeClusters(
133  const std::unordered_multimap<std::size_t, ActsExamples::CellData>&
134  cellDataMap,
135  std::size_t nMeasurements) {
136  using namespace ActsExamples;
138 
139  for (auto index = 0ul; index < nMeasurements; ++index) {
140  auto [begin, end] = cellDataMap.equal_range(index);
141 
142  // Fill the channels with the iterators
143  Cluster cluster;
144  cluster.channels.reserve(std::distance(begin, end));
145 
146  for (auto it = begin; it != end; ++it) {
147  const auto& cellData = it->second;
148  ActsFatras::Channelizer::Segment2D dummySegment = {Acts::Vector2::Zero(),
149  Acts::Vector2::Zero()};
150 
152  static_cast<unsigned int>(cellData.channel0),
153  static_cast<unsigned int>(cellData.channel1)};
154 
155  cluster.channels.emplace_back(bin, dummySegment, cellData.value);
156  }
157 
158  // update the iterator
159 
160  // Compute cluster size
161  if (not cluster.channels.empty()) {
162  auto compareX = [](const auto& a, const auto& b) {
163  return a.bin[0] < b.bin[0];
164  };
165  auto compareY = [](const auto& a, const auto& b) {
166  return a.bin[1] < b.bin[1];
167  };
168 
169  auto [minX, maxX] = std::minmax_element(cluster.channels.begin(),
170  cluster.channels.end(), compareX);
171  auto [minY, maxY] = std::minmax_element(cluster.channels.begin(),
172  cluster.channels.end(), compareY);
173  cluster.sizeLoc0 = 1 + maxX->bin[0] - minX->bin[0];
174  cluster.sizeLoc1 = 1 + maxY->bin[1] - minY->bin[1];
175  }
176 
177  clusters.push_back(cluster);
178  }
179  return clusters;
180 }
181 
182 } // namespace
183 
185  const ActsExamples::AlgorithmContext& ctx) {
186  // hit_id in the files is not required to be neither continuous nor
187  // monotonic. internally, we want continuous indices within [0,#hits)
188  // to simplify data handling. to be able to perform this mapping we first
189  // read all data into memory before converting to the internal event data
190  // types.
191  //
192  // Note: the cell data is optional
193  auto measurementData =
194  readMeasurementsByGeometryId(m_cfg.inputDir, ctx.eventNumber);
195 
196  // Prepare containers for the hit data using the framework event data types
197  GeometryIdMultimap<Measurement> orderedMeasurements;
198  IndexMultimap<Index> measurementSimHitsMap;
199  IndexSourceLinkContainer sourceLinks;
200  // need list here for stable addresses
201  std::list<IndexSourceLink> sourceLinkStorage;
202  orderedMeasurements.reserve(measurementData.size());
203  // Safe long as we have single particle to sim hit association
204  measurementSimHitsMap.reserve(measurementData.size());
205  sourceLinks.reserve(measurementData.size());
206 
207  auto measurementSimHitLinkData =
208  readEverything<ActsExamples::MeasurementSimHitLink>(
209  m_cfg.inputDir, "measurement-simhit-map.csv", {}, ctx.eventNumber);
210  for (auto mshLink : measurementSimHitLinkData) {
211  measurementSimHitsMap.emplace_hint(measurementSimHitsMap.end(),
212  mshLink.measurement_id, mshLink.hit_id);
213  }
214 
215  for (const MeasurementData& m : measurementData) {
216  Acts::GeometryIdentifier geoId = m.geometry_id;
217 
218  // Create the measurement
219  DigitizedParameters dParameters;
220  for (unsigned int ipar = 0;
221  ipar < static_cast<unsigned int>(Acts::eBoundSize); ++ipar) {
222  if (((m.local_key) & (1 << (ipar + 1))) != 0) {
223  dParameters.indices.push_back(static_cast<Acts::BoundIndices>(ipar));
224  switch (ipar) {
225  case static_cast<unsigned int>(Acts::eBoundLoc0): {
226  dParameters.values.push_back(m.local0);
227  dParameters.variances.push_back(m.var_local0);
228  }; break;
229  case static_cast<unsigned int>(Acts::eBoundLoc1): {
230  dParameters.values.push_back(m.local1);
231  dParameters.variances.push_back(m.var_local1);
232  }; break;
233  case static_cast<unsigned int>(Acts::eBoundPhi): {
234  dParameters.values.push_back(m.phi);
235  dParameters.variances.push_back(m.var_phi);
236  }; break;
237  case static_cast<unsigned int>(Acts::eBoundTheta): {
238  dParameters.values.push_back(m.theta);
239  dParameters.variances.push_back(m.var_theta);
240  }; break;
241  case static_cast<unsigned int>(Acts::eBoundTime): {
242  dParameters.values.push_back(m.time);
243  dParameters.variances.push_back(m.var_time);
244  }; break;
245  default:
246  break;
247  }
248  }
249  }
250 
251  // The measurement container is unordered and the index under which
252  // the measurement will be stored is known before adding it.
253  const Index index = orderedMeasurements.size();
254  IndexSourceLink& sourceLink = sourceLinkStorage.emplace_back(geoId, index);
255  auto measurement = createMeasurement(dParameters, sourceLink);
256 
257  // Due to the previous sorting of the raw hit data by geometry id, new
258  // measurements should always end up at the end of the container. previous
259  // elements were not touched; cluster indices remain stable and can
260  // be used to identify the m.
261  auto inserted = orderedMeasurements.emplace_hint(
262  orderedMeasurements.end(), geoId, std::move(measurement));
263  if (std::next(inserted) != orderedMeasurements.end()) {
264  ACTS_FATAL("Something went horribly wrong with the hit sorting");
265  return ProcessCode::ABORT;
266  }
267 
268  sourceLinks.insert(sourceLinks.end(), std::cref(sourceLink));
269  }
270 
271  MeasurementContainer measurements;
272  for (auto& [_, meas] : orderedMeasurements) {
273  measurements.emplace_back(std::move(meas));
274  }
275 
276  // Generate measurement-particles-map
277  if (m_inputHits.isInitialized() &&
278  m_outputMeasurementParticlesMap.isInitialized()) {
279  const auto hits = m_inputHits(ctx);
280 
282 
283  for (const auto& [measIdx, hitIdx] : measurementSimHitsMap) {
284  const auto& hit = hits.nth(hitIdx);
285  outputMap.emplace(measIdx, hit->particleId());
286  }
287 
288  m_outputMeasurementParticlesMap(ctx, std::move(outputMap));
289  }
290 
291  // Write the data to the EventStore
292  m_outputMeasurements(ctx, std::move(measurements));
293  m_outputMeasurementSimHitsMap(ctx, std::move(measurementSimHitsMap));
294  m_outputSourceLinks(ctx, std::move(sourceLinks));
295 
297  // Cluster information //
299 
300  if (m_cfg.outputClusters.empty()) {
302  }
303 
304  std::vector<ActsExamples::CellData> cellData;
305 
306  // This allows seamless import of files created with an older version where
307  // the measurement_id-column is still named hit_id
308  try {
309  cellData = readEverything<ActsExamples::CellData>(
310  m_cfg.inputDir, "cells.csv", {"timestamp"}, ctx.eventNumber);
311  } catch (std::runtime_error& e) {
312  // Rethrow exception if it is not about the measurement_id-column
313  if (std::string(e.what()).find("Missing header column 'measurement_id'") ==
314  std::string::npos) {
315  throw;
316  }
317 
318  const auto oldCellData = readEverything<ActsExamples::CellDataLegacy>(
319  m_cfg.inputDir, "cells.csv", {"timestamp"}, ctx.eventNumber);
320 
321  auto fromLegacy = [](const CellDataLegacy& old) {
322  return CellData{old.geometry_id, old.hit_id, old.channel0,
323  old.channel1, old.timestamp, old.value};
324  };
325 
326  cellData.resize(oldCellData.size());
327  std::transform(oldCellData.begin(), oldCellData.end(), cellData.begin(),
328  fromLegacy);
329  }
330 
331  std::unordered_multimap<std::size_t, ActsExamples::CellData> cellDataMap;
332  for (const auto& cd : cellData) {
333  cellDataMap.emplace(cd.measurement_id, cd);
334  }
335 
336  auto clusters = makeClusters(cellDataMap, orderedMeasurements.size());
337  m_outputClusters(ctx, std::move(clusters));
338 
340 }