20 #include <type_traits>
122 __asm__
volatile(
"" : :
"g"(&clobber) :
"memory");
127 template <
typename T>
140 static_assert(
false,
"No optimization barrier available for this compiler");
150 template <
typename T>
166 template <
typename T>
184 using Duration = std::chrono::duration<double, std::nano>;
238 std::vector<Duration> sorted_timings =
run_timings;
239 std::sort(sorted_timings.begin(), sorted_timings.end());
240 return sorted_timings;
251 const size_t midpoint = sorted_timings.size() / 2;
252 if (sorted_timings.size() % 2 == 0) {
253 return (sorted_timings[midpoint - 1] + sorted_timings[midpoint]) / 2;
255 return sorted_timings[midpoint];
277 const size_t first_point = (sorted_timings.size() - 2) / 4;
278 const size_t offset = (sorted_timings.size() - 2) % 4;
279 const size_t third_point = (sorted_timings.size() - 1) - first_point;
281 return {sorted_timings[first_point], sorted_timings[third_point]};
283 const auto first_quartile = ((4 -
offset) * sorted_timings[first_point] +
284 offset * sorted_timings[first_point + 1]) /
286 const auto third_quartile = ((4 -
offset) * sorted_timings[third_point] +
287 offset * sorted_timings[third_point - 1]) /
289 return {first_quartile, third_quartile};
305 return (thirdq - firstq) / (2. * std::sqrt(2.) * 0.4769362762044698733814);
328 auto old_precision = os.precision();
329 auto old_flags = os.flags();
330 os << std::fixed << res.
run_timings.size() <<
" runs of "
331 << res.
iters_per_run <<
" iteration(s), " << std::setprecision(1)
332 << res.
totalTime().count() / 1
'000'000 <<
"ms total, "
333 << std::setprecision(4) << res.
runTimeMedian().count() / 1
'000 << "+/-"
334 << 1.96 * res.runTimeError().count() / 1'000 <<
"µs per run, "
337 os.precision(old_precision);
344 namespace benchmark_tools_internal {
347 template <
typename Callable,
typename Input,
typename Result>
349 static inline void iter(
const Callable& iteration,
const Input&
input) {
352 const auto result = iteration(input);
358 template <
typename Callable,
typename Input>
360 static inline void iter(
const Callable& iteration,
const Input&
input) {
368 template <
typename Callable,
typename Result>
370 static inline void iter(
const Callable& iteration) {
372 const auto result = iteration();
378 template <
typename Callable>
380 static inline void iter(
const Callable& iteration) {
386 template <
typename T,
typename I>
389 template <
typename T>
394 template <
typename Callable,
typename Input =
void>
397 Concepts ::exists<call_with_input_t, Callable, Input>;
398 static inline void iter(
const Callable& iteration,
const Input*
input) {
399 static_assert(
is_callable,
"Gave callable that is not callable with input");
401 using Result = std::invoke_result_t<Callable, const Input&>;
409 template <
typename Callable>
412 Concepts ::exists<call_without_input_t, Callable>;
414 static inline void iter(
const Callable& iteration,
415 const void* =
nullptr) {
417 "Gave callable that is not callable without input");
419 using Result = std::invoke_result_t<Callable>;
426 template <
typename Callable>
429 std::chrono::milliseconds warmup_time) {
430 using Clock = std::chrono::steady_clock;
436 const auto warmup_start = Clock::now();
437 while (Clock::now() - warmup_start < warmup_time) {
441 for (
size_t i = 0;
i < num_runs; ++
i) {
442 const auto start = Clock::now();
519 template <
typename Callable>
521 Callable&& iteration,
size_t iters_per_run,
size_t num_runs = 20000,
522 std::chrono::milliseconds warmup_time = std::chrono::milliseconds(2000)) {
525 for (
size_t iter = 0; iter < iters_per_run; ++iter) {
530 iters_per_run, num_runs, warmup_time);
539 template <
typename Callable,
typename Input>
541 Callable&& iterationWithInput,
const std::vector<Input>& inputs,
542 size_t num_runs = 20000,
543 std::chrono::milliseconds warmup_time = std::chrono::milliseconds(2000)) {
546 for (
const auto&
input : inputs) {
548 iterationWithInput, &
input);
551 inputs.size(), num_runs, warmup_time);