Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
bb_bench.hpp
Go to the documentation of this file.
1
2#pragma once
3
5#include <iostream>
6#include <map>
7#include <memory>
8#include <ostream>
9#include <string_view>
10#include <tracy/Tracy.hpp>
11#include <unordered_map>
12#include <vector>
13
19namespace bb::detail {
20// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
21extern bool use_bb_bench;
22
23// Compile-time string
24// See e.g. https://www.reddit.com/r/cpp_questions/comments/pumi9r/does_c20_not_support_string_literals_as_template/
25template <std::size_t N> struct OperationLabel {
26 constexpr static std::size_t size() { return N; }
27 // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
28 constexpr OperationLabel(const char (&str)[N])
29 {
30 for (std::size_t i = 0; i < N; ++i) {
31 value[i] = str[i];
32 }
33 }
34
35 // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
36 char value[N];
37};
38
39template <OperationLabel op1, OperationLabel op2> constexpr auto concat()
40{
41 // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
42 char result_cstr[op1.size() + op2.size() - 1] = {};
43 std::copy(op1.value, op1.value + op1.size() - 1, result_cstr);
44 std::copy(op2.value, op2.value + op2.size(), result_cstr + op1.size() - 1);
45 return OperationLabel{ result_cstr };
46}
47struct TimeStats;
48struct TimeStatsEntry;
49using OperationKey = std::string_view;
50
52 uint64_t time = 0;
53 uint64_t count = 0;
54};
55
56// Normalized benchmark entry - each represents a unique (function, parent) pair
58 // For convenience, even though redundant with map store
61 uint64_t time = 0;
62 uint64_t count = 0;
63 size_t num_threads = 0;
64 double time_mean = 0;
65 uint64_t time_max = 0;
66 double time_stddev = 0;
67
68 // Welford's algorithm state
69 double time_m2 = 0; // sum of squared differences from mean
70
71 void add_thread_time_sample(const TimeAndCount& stats);
72 double get_std_dev() const;
73};
74
75// AggregateData: Result of normalizing benchmark data
76// entries: Key -> ParentKey -> Entry
77// Empty string is used as key if the entry has no parent.
79
80// Contains all statically known op counts
82 public:
83 static inline thread_local TimeStatsEntry* parent = nullptr;
85 std::mutex mutex;
87 void print() const;
88 // NOTE: Should be called when other threads aren't active
89 void clear();
90 void add_entry(const char* key, const std::shared_ptr<TimeStatsEntry>& entry);
91 void print_stats_recursive(const OperationKey& key, const TimeStats* stats, const std::string& indent) const;
92 void print_aggregate_counts(std::ostream&, size_t) const;
93 void print_aggregate_counts_hierarchical(std::ostream&) const;
94 void serialize_aggregate_data_json(std::ostream&) const;
95
96 // Normalize the raw benchmark data into a clean structure for display
98};
99
100// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
102
103// Tracks operation statistics and links them to their immediate parent context.
104// Each stat is associated only with its direct parent, not the full call hierarchy.
105// This allows measuring the direct contribution of nested operations to their parent,
106// but doesn't provide recursive parent-child relationships through the entire call stack.
107struct TimeStats {
109 uint64_t count = 0;
110 uint64_t time = 0;
111 // Used if the parent changes from last call - chains to handle multiple parent contexts
113
114 TimeStats() = default;
115 TimeStats(TimeStatsEntry* parent_ptr, uint64_t count_val, uint64_t time_val)
116 : parent(parent_ptr)
117 , count(count_val)
118 , time(time_val)
119 {}
120
121 void track(TimeStatsEntry* current_parent, uint64_t time_val)
122 {
123 // Try to track with current stats if parent matches
124 // Check if 'next' already handles this parent to avoid creating duplicates
125 if (raw_track(current_parent, time_val) || (next && next->raw_track(current_parent, time_val))) {
126 return;
127 }
128 // Create new TimeStats at the front of this linked list.
129 auto new_next = std::make_unique<TimeStats>(parent, count, time);
130 new_next->next = std::move(next);
131 next = std::move(new_next);
132
133 // Reset this node.
134 parent = current_parent;
135 count = 1;
136 time = time_val;
137 }
138
139 private:
140 // Returns true if successfully tracked (parent matches), false otherwise
141 bool raw_track(TimeStatsEntry* expected_parent, uint64_t time_val)
142 {
143 if (parent != expected_parent) {
144 return false;
145 }
146 count++;
147 time += time_val;
148 return true;
149 }
150};
151
152// Each key will appear at most once *per thread*.
153// Each thread has its own count for thread-safety.
158
159// The stat entry associated with a certain label AND a certain thread.
160// These will later be aggregated, and the TimeStats itself contains stat
161// entries for each caller context change (for later summarization).
162template <OperationLabel Op> struct ThreadBenchStats {
163 public:
164 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
165 static inline thread_local std::shared_ptr<TimeStatsEntry> stats;
166
167 static void init_entry(TimeStatsEntry& entry);
168 // returns null if use_bb_bench not enabled
177};
178
179// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
180// no-op if passed null stats
188} // namespace bb::detail
189
190// Define macros. we use void(0) for empty ones as we want these to be statements that need a semicolon.
191#ifdef TRACY_INSTRUMENTED
192#define BB_TRACY() ZoneScopedN(__func__)
193#define BB_TRACY_NAME(name) ZoneScopedN(name)
194#define BB_BENCH_TRACY() ZoneScopedN(__func__)
195#define BB_BENCH_TRACY_NAME(name) ZoneScopedN(name)
196#define BB_BENCH_ONLY_NAME(name) (void)0
197#define BB_BENCH_ENABLE_NESTING() (void)0
198#define BB_BENCH_ONLY() (void)0
199#elif defined __wasm__ && !defined ENABLE_WASM_BENCH
200#define BB_TRACY() (void)0
201#define BB_TRACY_NAME(name) (void)0
202#define BB_BENCH_TRACY() (void)0
203#define BB_BENCH_TRACY_NAME(name) (void)0
204#define BB_BENCH_ONLY_NAME(name) (void)0
205#define BB_BENCH_ENABLE_NESTING() (void)0
206#define BB_BENCH_ONLY() (void)0
207#else
208#define BB_TRACY() (void)0
209#define BB_TRACY_NAME(name) (void)0
210#define BB_BENCH_TRACY() BB_BENCH_ONLY_NAME(__func__)
211#define BB_BENCH_TRACY_NAME(name) BB_BENCH_ONLY_NAME(name)
212#define BB_BENCH_ONLY_NAME(name) \
213 bb::detail::BenchReporter _bb_bench_reporter((bb::detail::ThreadBenchStats<name>::ensure_stats().get()))
214#define BB_BENCH_ENABLE_NESTING() \
215 if (_bb_bench_reporter.stats) \
216 bb::detail::GlobalBenchStatsContainer::parent = _bb_bench_reporter.stats
217#define BB_BENCH_ONLY() BB_BENCH_ONLY_NAME(__func__)
218#endif
219#define BB_BENCH_NAME(name) \
220 BB_BENCH_TRACY_NAME(name); \
221 BB_BENCH_ENABLE_NESTING()
222
223#define BB_BENCH() \
224 BB_BENCH_TRACY(); \
225 BB_BENCH_ENABLE_NESTING()
#define BB_UNLIKELY(x)
GlobalBenchStatsContainer GLOBAL_BENCH_STATS
Definition bb_bench.cpp:621
constexpr auto concat()
Definition bb_bench.hpp:39
std::unordered_map< OperationKey, std::map< OperationKey, AggregateEntry > > AggregateData
Definition bb_bench.hpp:78
bool use_bb_bench
Definition bb_bench.cpp:173
std::string_view OperationKey
Definition bb_bench.cpp:175
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
Definition bb_bench.hpp:57
void add_thread_time_sample(const TimeAndCount &stats)
Definition bb_bench.cpp:177
double time_stddev
Definition bb_bench.hpp:66
uint64_t time
Definition bb_bench.hpp:61
double time_m2
Definition bb_bench.hpp:69
double time_mean
Definition bb_bench.hpp:64
OperationKey parent
Definition bb_bench.hpp:60
size_t num_threads
Definition bb_bench.hpp:63
uint64_t time_max
Definition bb_bench.hpp:65
double get_std_dev() const
Definition bb_bench.cpp:194
OperationKey key
Definition bb_bench.hpp:59
uint64_t count
Definition bb_bench.hpp:62
TimeStatsEntry * parent
Definition bb_bench.hpp:182
TimeStatsEntry * stats
Definition bb_bench.hpp:183
void print_stats_recursive(const OperationKey &key, const TimeStats *stats, const std::string &indent) const
Definition bb_bench.cpp:258
void print_aggregate_counts_hierarchical(std::ostream &) const
Definition bb_bench.cpp:351
void print_aggregate_counts(std::ostream &, size_t) const
Definition bb_bench.cpp:274
void serialize_aggregate_data_json(std::ostream &) const
Definition bb_bench.cpp:313
void add_entry(const char *key, const std::shared_ptr< TimeStatsEntry > &entry)
Definition bb_bench.cpp:242
std::vector< std::shared_ptr< TimeStatsEntry > > entries
Definition bb_bench.hpp:86
static thread_local TimeStatsEntry * parent
Definition bb_bench.hpp:83
static constexpr std::size_t size()
Definition bb_bench.hpp:26
constexpr OperationLabel(const char(&str)[N])
Definition bb_bench.hpp:28
static std::shared_ptr< TimeStatsEntry > ensure_stats()
Definition bb_bench.hpp:169
static void init_entry(TimeStatsEntry &entry)
static thread_local std::shared_ptr< TimeStatsEntry > stats
Definition bb_bench.hpp:165
Definition bb_bench.hpp:154
OperationKey key
Definition bb_bench.hpp:155
TimeStats count
Definition bb_bench.hpp:156
bool raw_track(TimeStatsEntry *expected_parent, uint64_t time_val)
Definition bb_bench.hpp:141
TimeStats(TimeStatsEntry *parent_ptr, uint64_t count_val, uint64_t time_val)
Definition bb_bench.hpp:115
void track(TimeStatsEntry *current_parent, uint64_t time_val)
Definition bb_bench.hpp:121
TimeStatsEntry * parent
Definition bb_bench.hpp:108
std::unique_ptr< TimeStats > next
Definition bb_bench.hpp:112