Barretenberg
The ZK-SNARK library at the core of Aztec
Loading...
Searching...
No Matches
fr.bench.cpp
Go to the documentation of this file.
1#include "fr.hpp"
2#include <benchmark/benchmark.h>
3
4using namespace benchmark;
5
6/*
7ADX asm
8------------------------------------------------------------
9Benchmark Time CPU Iterations
10------------------------------------------------------------
11sqr_assign_bench 58897282 ns 59659091 ns 11
12sqr_bench 80042911 ns 79861111 ns 9
13unary_minus_bench 33735725 ns 34375000 ns 20
14mul_assign_bench 61236664 ns 61079545 ns 11
15mul_bench 81490829 ns 80357143 ns 7
16add_bench 31052175 ns 31250000 ns 24
17sub_bench 33843720 ns 33593750 ns 20
18invert_bench 5008074600 ns 5000000000 ns 1
19pow_bench 6020118300 ns 6015625000 ns 1
20field_bench 916661900 ns 921875000 ns 1
21-------------------------------------------------------------------------------------
22Benchmark Time CPU Iterations
23-------------------------------------------------------------------------------------
24pippenger_bench/8192 16347884 ns 16047297 ns 37
25pippenger_bench/16384 29240762 ns 29296875 ns 24
26pippenger_bench/32768 57061918 ns 55397727 ns 11
27pippenger_bench/65536 106352929 ns 104910714 ns 7
28pippenger_bench/131072 205578500 ns 203125000 ns 4
29pippenger_bench/262144 352079750 ns 351562500 ns 2
30pippenger_bench/524288 691483800 ns 687500000 ns 1
31pippenger_bench/1048576 1333215400 ns 1250000000 ns 1
32unsafe pippenger. 1048576 points. clock cycles = 1898152911
33unsafe pippenger clock cycles per mul = 1810
34unsafe_pippenger_bench/1048576 898753700 ns 875000000 ns 1
35*/
36/*
37Generic
38------------------------------------------------------------
39Benchmark Time CPU Iterations
40------------------------------------------------------------
41sqr_assign_bench 109600860 ns 109375000 ns 5
42sqr_bench 112329383 ns 111979167 ns 6
43unary_minus_bench 29771167 ns 29296875 ns 24
44mul_assign_bench 111395033 ns 111979167 ns 6
45mul_bench 109264250 ns 109375000 ns 6
46add_bench 29478508 ns 29947917 ns 24
47sub_bench 32852481 ns 32738095 ns 21
48invert_bench 10354704900 ns 10328125000 ns 1
49pow_bench 12036579200 ns 12031250000 ns 1
50field_bench 1557337500 ns 1562500000 ns 1
51-------------------------------------------------------------------------------------
52Benchmark Time CPU Iterations
53-------------------------------------------------------------------------------------
54pippenger_bench/8192 27198528 ns 25862069 ns 29
55pippenger_bench/16384 51719409 ns 48295455 ns 11
56pippenger_bench/32768 87673922 ns 86805556 ns 9
57pippenger_bench/65536 169227125 ns 160156250 ns 4
58pippenger_bench/131072 322899500 ns 328125000 ns 2
59pippenger_bench/262144 615274500 ns 546875000 ns 1
60pippenger_bench/524288 1119308100 ns 1078125000 ns 1
61pippenger_bench/1048576 2145468700 ns 2078125000 ns 1
62unsafe pippenger. 1048576 points. clock cycles = 3626871186
63unsafe pippenger clock cycles per mul = 3458
64unsafe_pippenger_bench/1048576 1717275300 ns 1640625000 ns 1
65*/
66using namespace bb;
67
68void field_mixed_add(const fr& x1, const fr& y1, const fr& z1, const fr& x2, const fr& y2, fr& x3, fr& y3, fr& z3)
69{
70 fr T0;
71 fr T1;
72 fr T2;
73 fr T3;
74
75 // 3 sqr // 90 cycles
76 // 1 self sqr // 30 cycles
77 // 2 * // 72 cycles
78 // 5 *= // 180 cycles
79 // 1 - // 6 cycles
80 // 5 -= // 36 cycles
81 // 2 + // 12 cycles
82 // 6 += // 36 cycles
83 // 22 + 340 + 100 = 462 cycles
84 T0 = z1.sqr();
85 T1 = x2 * T0;
86 T1 -= x1;
87 T2 = z1 * T0;
88 T2 *= y2;
89 T2 -= y1;
90 z3 = z1 + T1;
91 T2 += T2;
92 T3 = T1.sqr();
93 T0 += T3;
94 z3.self_sqr();
95 z3 -= T0;
96 T3 += T3;
97 T3 += T3;
98 T1 *= T3;
99 T3 *= x1;
100 T0 = T3 + T3;
101 T0 += T1;
102 x3 = T2.sqr();
103 x3 -= T0;
104 T3 -= x3;
105 T1 *= y1;
106 T1 += T1;
107 T3 *= T2;
108 y3 = T3 - T1;
109}
110
111uint64_t rdtsc()
112{
113#ifdef __aarch64__
114 uint64_t pmccntr;
115 __asm__ __volatile__("mrs %0, pmccntr_el0" : "=r"(pmccntr));
116 return pmccntr;
117#elif __x86_64__
118 unsigned int lo = 0;
119 unsigned int hi = 0;
120 __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi));
121 return (static_cast<uint64_t>(hi) << 32) | lo;
122#else
123 return 0;
124#endif
125}
126
127constexpr size_t NUM_POINTS = 1 << 24;
128constexpr size_t NUM_INVERSIONS = 1 << 20;
129std::vector<bb::fr> oldx;
130std::vector<bb::fr> oldy;
131
135const auto init = []() {
136 fr seed_x = fr::random_element();
137 fr seed_y = fr::random_element();
138 fr seed_z = fr::random_element();
139 accx = seed_x;
140 accy = seed_y;
141 accz = seed_z;
142 for (size_t i = 0; i < NUM_POINTS; ++i) {
143 oldx.emplace_back(accx);
144 oldy.emplace_back(accy);
145
146 accx = accx * seed_x;
147 accy = accy * seed_y;
148 accz = accz * seed_z;
149 }
150 return 1;
151}();
152
154{
155 fr acc = x;
156 for (size_t i = 0; i < NUM_POINTS; ++i) {
157 acc.self_sqr();
158 }
159 return acc;
160}
161void sqr_assign_bench(State& state) noexcept
162{
163 uint64_t clocks = 0;
164 uint64_t count = 0;
165 for (auto _ : state) {
166 uint64_t before = rdtsc();
167 DoNotOptimize(sqr_assign_impl(accx));
168 clocks += (rdtsc() - before);
169 ++count;
170 }
171 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
172 std::cout << "sqr_assign clocks per operation = " << average << std::endl;
173}
175
176fr sqr_impl(const fr& x)
177{
178 fr acc = x;
179 for (size_t i = 0; i < NUM_POINTS; ++i) {
180 acc = acc.sqr();
181 }
182 return acc;
183}
184void sqr_bench(State& state) noexcept
185{
186 uint64_t clocks = 0;
187 uint64_t count = 0;
188 for (auto _ : state) {
189 uint64_t before = rdtsc();
190 DoNotOptimize(sqr_impl(accx));
191 clocks += (rdtsc() - before);
192 ++count;
193 }
194 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
195 std::cout << "sqr clocks per operation = " << average << std::endl;
196}
198
200{
201 fr acc = x;
202 for (size_t i = 0; i < NUM_POINTS; ++i) {
203 acc = -acc;
204 }
205 return acc;
206}
207void unary_minus_bench(State& state) noexcept
208{
209 uint64_t clocks = 0;
210 uint64_t count = 0;
211 for (auto _ : state) {
212 uint64_t before = rdtsc();
213 DoNotOptimize(unary_minus_impl(accx));
214 clocks += (rdtsc() - before);
215 ++count;
216 }
217 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
218 std::cout << "unary minus clocks per operation = " << average << std::endl;
219}
221
222fr mul_assign_impl(const fr& x, const fr& y)
223{
224 fr acc = x;
225 for (size_t i = 0; i < NUM_POINTS; ++i) {
226 acc *= y;
227 }
228 return acc;
229}
230void mul_assign_bench(State& state) noexcept
231{
232 uint64_t clocks = 0;
233 uint64_t count = 0;
234 for (auto _ : state) {
235 uint64_t before = rdtsc();
236 DoNotOptimize(mul_assign_impl(accx, accy));
237 clocks += (rdtsc() - before);
238 ++count;
239 }
240 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
241 std::cout << "mul assign clocks per operation = " << average << std::endl;
242}
244
245fr mul_impl(const fr& x, const fr& y)
246{
247 fr acc = x;
248 for (size_t i = 0; i < NUM_POINTS; ++i) {
249 acc = acc * y;
250 }
251 return acc;
252}
253
254void mul_bench(State& state) noexcept
255{
256 uint64_t clocks = 0;
257 uint64_t count = 0;
258 for (auto _ : state) {
259 uint64_t before = rdtsc();
260 DoNotOptimize(mul_impl(accx, accy));
261 clocks += (rdtsc() - before);
262 ++count;
263 }
264 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
265 std::cout << "mul clocks per operation = " << average << std::endl;
266}
268
269fr self_add_impl(const fr& x, fr& y)
270{
271 fr acc = x;
272 for (size_t i = 0; i < NUM_POINTS; ++i) {
273 acc += y;
274 }
275 return acc;
276}
277
278void self_add_bench(State& state) noexcept
279{
280 uint64_t clocks = 0;
281 uint64_t count = 0;
282 for (auto _ : state) {
283 uint64_t before = rdtsc();
284 DoNotOptimize(self_add_impl(accx, accy));
285 clocks += (rdtsc() - before);
286 ++count;
287 }
288 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
289 std::cout << "self add clocks per operation = " << average << std::endl;
290}
292
293fr add_impl(const fr& x, fr& y)
294{
295 fr acc = x;
296 for (size_t i = 0; i < NUM_POINTS; ++i) {
297 acc = acc + y;
298 }
299 return acc;
300}
301
302void add_bench(State& state) noexcept
303{
304 uint64_t clocks = 0;
305 uint64_t count = 0;
306 for (auto _ : state) {
307 uint64_t before = rdtsc();
308 DoNotOptimize(add_impl(accx, accy));
309 clocks += (rdtsc() - before);
310 ++count;
311 }
312 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
313 std::cout << "add clocks per operation = " << average << std::endl;
314}
316
317fr sub_impl(const fr& x, fr& y)
318{
319 fr acc = x;
320 for (size_t i = 0; i < NUM_POINTS; ++i) {
321 acc = acc - y;
322 }
323 return acc;
324}
325
326void sub_bench(State& state) noexcept
327{
328 uint64_t clocks = 0;
329 uint64_t count = 0;
330 for (auto _ : state) {
331 uint64_t before = rdtsc();
332 DoNotOptimize(sub_impl(accx, accy));
333 clocks += (rdtsc() - before);
334 ++count;
335 }
336 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
337 std::cout << "sub clocks per operation = " << average << std::endl;
338}
340
341fr addaddmul_impl(const fr& x, const fr& y, const fr& z)
342{
343 fr acc = x;
344 for (size_t i = 0; i < NUM_POINTS; ++i) {
345 acc *= y;
346 acc += z;
347 acc += y;
348 }
349 return acc;
350}
351
352void addaddmul_bench(State& state) noexcept
353{
354 uint64_t clocks = 0;
355 uint64_t count = 0;
356 for (auto _ : state) {
357 uint64_t before = rdtsc();
358 DoNotOptimize(addaddmul_impl(accx, accy, accz));
359 clocks += (rdtsc() - before);
360 ++count;
361 }
362 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
363 std::cout << "field clocks per call = " << average << std::endl;
364}
366
367fr subaddmul_impl(const fr& x, const fr& y, const fr& z)
368{
369 fr acc = x;
370 for (size_t i = 0; i < NUM_POINTS; ++i) {
371 acc *= y;
372 acc -= z;
373 acc += y;
374 }
375 return acc;
376}
377
378void subaddmul_bench(State& state) noexcept
379{
380 uint64_t clocks = 0;
381 uint64_t count = 0;
382 for (auto _ : state) {
383 uint64_t before = rdtsc();
384 DoNotOptimize(subaddmul_impl(accx, accy, accz));
385 clocks += (rdtsc() - before);
386 ++count;
387 }
388 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
389 std::cout << "field clocks per call = " << average << std::endl;
390}
392
393void field_bench(State& state) noexcept
394{
395 uint64_t clocks = 0;
396 uint64_t count = 0;
397 for (auto _ : state) {
398 uint64_t before = rdtsc();
399 fr x = accx;
400 fr y = accy;
401 fr z = accz;
402 for (size_t i = 0; i < NUM_POINTS; ++i) {
403 field_mixed_add(x, y, z, oldx[i], oldy[i], x, y, z);
404 }
405 DoNotOptimize(z);
406 clocks += (rdtsc() - before);
407 ++count;
408 }
409 double average = static_cast<double>(clocks) / (static_cast<double>(count) * static_cast<double>(NUM_POINTS));
410 std::cout << "field clocks per call = " << average << std::endl;
411}
413
414void invert_bench(State& state) noexcept
415{
416 for (auto _ : state) {
417 fr x = accx;
418 for (size_t i = 0; i < NUM_INVERSIONS; ++i) {
419 x = x.invert();
420 }
421 DoNotOptimize(x);
422 }
423}
425
426void pow_bench(State& state) noexcept
427{
428 for (auto _ : state) {
429 constexpr uint256_t exponent = fr::modulus - uint256_t(2);
430 fr x = accx;
431 for (size_t i = 0; i < NUM_INVERSIONS; ++i) {
432 x = x.pow(exponent);
433 }
434 DoNotOptimize(x);
435 }
436}
438
439void hash_bench(State& state) noexcept
440{
441 for (auto _ : state) {
442 state.PauseTiming();
444 state.ResumeTiming();
445 DoNotOptimize(std::hash<fr>{}(a));
446 }
447}
449
450// NOLINTNEXTLINE macro invokation triggers style guideline errors from googletest code
FF a
fr add_impl(const fr &x, fr &y)
Definition fr.bench.cpp:293
std::vector< bb::fr > oldx
Definition fr.bench.cpp:129
void sqr_bench(State &state) noexcept
Definition fr.bench.cpp:184
void addaddmul_bench(State &state) noexcept
Definition fr.bench.cpp:352
fr accy
Definition fr.bench.cpp:133
void field_bench(State &state) noexcept
Definition fr.bench.cpp:393
std::vector< bb::fr > oldy
Definition fr.bench.cpp:130
fr sqr_impl(const fr &x)
Definition fr.bench.cpp:176
void sub_bench(State &state) noexcept
Definition fr.bench.cpp:326
void self_add_bench(State &state) noexcept
Definition fr.bench.cpp:278
fr unary_minus_impl(const fr &x)
Definition fr.bench.cpp:199
fr accz
Definition fr.bench.cpp:134
void invert_bench(State &state) noexcept
Definition fr.bench.cpp:414
const auto init
Definition fr.bench.cpp:135
uint64_t rdtsc()
Definition fr.bench.cpp:111
void sqr_assign_bench(State &state) noexcept
Definition fr.bench.cpp:161
BENCHMARK_MAIN()
fr subaddmul_impl(const fr &x, const fr &y, const fr &z)
Definition fr.bench.cpp:367
fr mul_impl(const fr &x, const fr &y)
Definition fr.bench.cpp:245
void add_bench(State &state) noexcept
Definition fr.bench.cpp:302
fr addaddmul_impl(const fr &x, const fr &y, const fr &z)
Definition fr.bench.cpp:341
void field_mixed_add(const fr &x1, const fr &y1, const fr &z1, const fr &x2, const fr &y2, fr &x3, fr &y3, fr &z3)
Definition fr.bench.cpp:68
fr sub_impl(const fr &x, fr &y)
Definition fr.bench.cpp:317
fr mul_assign_impl(const fr &x, const fr &y)
Definition fr.bench.cpp:222
void mul_bench(State &state) noexcept
Definition fr.bench.cpp:254
void unary_minus_bench(State &state) noexcept
Definition fr.bench.cpp:207
constexpr size_t NUM_INVERSIONS
Definition fr.bench.cpp:128
void mul_assign_bench(State &state) noexcept
Definition fr.bench.cpp:230
fr accx
Definition fr.bench.cpp:132
fr self_add_impl(const fr &x, fr &y)
Definition fr.bench.cpp:269
constexpr size_t NUM_POINTS
Definition fr.bench.cpp:127
fr sqr_assign_impl(const fr &x)
Definition fr.bench.cpp:153
void pow_bench(State &state) noexcept
Definition fr.bench.cpp:426
void subaddmul_bench(State &state) noexcept
Definition fr.bench.cpp:378
void hash_bench(State &state) noexcept
Definition fr.bench.cpp:439
Entry point for Barretenberg command-line interface.
Definition api.hpp:5
BENCHMARK(bench_commit_structured_random_poly< curve::BN254 >) -> Unit(benchmark::kMillisecond)
constexpr decltype(auto) get(::tuplet::tuple< T... > &&t) noexcept
Definition tuple.hpp:13
static constexpr uint256_t modulus
BB_INLINE constexpr field pow(const uint256_t &exponent) const noexcept
BB_INLINE constexpr void self_sqr() &noexcept
constexpr field invert() const noexcept
static field random_element(numeric::RNG *engine=nullptr) noexcept
BB_INLINE constexpr field sqr() const noexcept