benchmark  1.9.2
benchmark.h
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Support for registering benchmarks for functions.
16 
17 /* Example usage:
18 // Define a function that executes the code to be measured a
19 // specified number of times:
20 static void BM_StringCreation(benchmark::State& state) {
21  for (auto _ : state)
22  std::string empty_string;
23 }
24 
25 // Register the function as a benchmark
26 BENCHMARK(BM_StringCreation);
27 
28 // Define another benchmark
29 static void BM_StringCopy(benchmark::State& state) {
30  std::string x = "hello";
31  for (auto _ : state)
32  std::string copy(x);
33 }
34 BENCHMARK(BM_StringCopy);
35 
36 // Augment the main() program to invoke benchmarks if specified
37 // via the --benchmark_filter command line flag. E.g.,
38 // my_unittest --benchmark_filter=all
39 // my_unittest --benchmark_filter=BM_StringCreation
40 // my_unittest --benchmark_filter=String
41 // my_unittest --benchmark_filter='Copy|Creation'
42 int main(int argc, char** argv) {
43  benchmark::Initialize(&argc, argv);
44  benchmark::RunSpecifiedBenchmarks();
45  benchmark::Shutdown();
46  return 0;
47 }
48 
49 // Sometimes a family of microbenchmarks can be implemented with
50 // just one routine that takes an extra argument to specify which
51 // one of the family of benchmarks to run. For example, the following
52 // code defines a family of microbenchmarks for measuring the speed
53 // of memcpy() calls of different lengths:
54 
55 static void BM_memcpy(benchmark::State& state) {
56  char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
57  memset(src, 'x', state.range(0));
58  for (auto _ : state)
59  memcpy(dst, src, state.range(0));
60  state.SetBytesProcessed(state.iterations() * state.range(0));
61  delete[] src; delete[] dst;
62 }
63 BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
64 
65 // The preceding code is quite repetitive, and can be replaced with the
66 // following short-hand. The following invocation will pick a few
67 // appropriate arguments in the specified range and will generate a
68 // microbenchmark for each such argument.
69 BENCHMARK(BM_memcpy)->Range(8, 8<<10);
70 
71 // You might have a microbenchmark that depends on two inputs. For
72 // example, the following code defines a family of microbenchmarks for
73 // measuring the speed of set insertion.
74 static void BM_SetInsert(benchmark::State& state) {
75  set<int> data;
76  for (auto _ : state) {
77  state.PauseTiming();
78  data = ConstructRandomSet(state.range(0));
79  state.ResumeTiming();
80  for (int j = 0; j < state.range(1); ++j)
81  data.insert(RandomNumber());
82  }
83 }
84 BENCHMARK(BM_SetInsert)
85  ->Args({1<<10, 128})
86  ->Args({2<<10, 128})
87  ->Args({4<<10, 128})
88  ->Args({8<<10, 128})
89  ->Args({1<<10, 512})
90  ->Args({2<<10, 512})
91  ->Args({4<<10, 512})
92  ->Args({8<<10, 512});
93 
94 // The preceding code is quite repetitive, and can be replaced with
95 // the following short-hand. The following macro will pick a few
96 // appropriate arguments in the product of the two specified ranges
97 // and will generate a microbenchmark for each such pair.
98 BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
99 
100 // For more complex patterns of inputs, passing a custom function
101 // to Apply allows programmatic specification of an
102 // arbitrary set of arguments to run the microbenchmark on.
103 // The following example enumerates a dense range on
104 // one parameter, and a sparse range on the second.
105 static void CustomArguments(benchmark::internal::Benchmark* b) {
106  for (int i = 0; i <= 10; ++i)
107  for (int j = 32; j <= 1024*1024; j *= 8)
108  b->Args({i, j});
109 }
110 BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
111 
112 // Templated microbenchmarks work the same way:
113 // Produce then consume 'size' messages 'iters' times
114 // Measures throughput in the absence of multiprogramming.
115 template <class Q> int BM_Sequential(benchmark::State& state) {
116  Q q;
117  typename Q::value_type v;
118  for (auto _ : state) {
119  for (int i = state.range(0); i--; )
120  q.push(v);
121  for (int e = state.range(0); e--; )
122  q.Wait(&v);
123  }
124  // actually messages, not bytes:
125  state.SetBytesProcessed(state.iterations() * state.range(0));
126 }
127 BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
128 
129 Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
130 benchmark. This option overrides the `benchmark_min_time` flag.
131 
132 void BM_test(benchmark::State& state) {
133  ... body ...
134 }
135 BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
136 
137 In a multithreaded test, it is guaranteed that none of the threads will start
138 until all have reached the loop start, and all will have finished before any
139 thread exits the loop body. As such, any global setup or teardown you want to
140 do can be wrapped in a check against the thread index:
141 
142 static void BM_MultiThreaded(benchmark::State& state) {
143  if (state.thread_index() == 0) {
144  // Setup code here.
145  }
146  for (auto _ : state) {
147  // Run the test as normal.
148  }
149  if (state.thread_index() == 0) {
150  // Teardown code here.
151  }
152 }
153 BENCHMARK(BM_MultiThreaded)->Threads(4);
154 
155 
156 If a benchmark runs a few milliseconds it may be hard to visually compare the
157 measured times, since the output data is given in nanoseconds per default. In
158 order to manually set the time unit, you can specify it manually:
159 
160 BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
161 */
162 
163 #ifndef BENCHMARK_BENCHMARK_H_
164 #define BENCHMARK_BENCHMARK_H_
165 
166 #include <stdint.h>
167 
168 #include <algorithm>
169 #include <atomic>
170 #include <cassert>
171 #include <cstddef>
172 #include <functional>
173 #include <initializer_list>
174 #include <iosfwd>
175 #include <limits>
176 #include <map>
177 #include <memory>
178 #include <set>
179 #include <string>
180 #include <type_traits>
181 #include <utility>
182 #include <vector>
183 
184 #include "benchmark/export.h"
185 
186 #if defined(_MSC_VER)
187 #include <intrin.h> // for _ReadWriteBarrier
188 #endif
189 
190 #define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
191  TypeName(const TypeName&) = delete; \
192  TypeName& operator=(const TypeName&) = delete
193 
194 #ifdef BENCHMARK_HAS_CXX17
195 #define BENCHMARK_UNUSED [[maybe_unused]]
196 #elif defined(__GNUC__) || defined(__clang__)
197 #define BENCHMARK_UNUSED __attribute__((unused))
198 #else
199 #define BENCHMARK_UNUSED
200 #endif
201 
202 // Used to annotate functions, methods and classes so they
203 // are not optimized by the compiler. Useful for tests
204 // where you expect loops to stay in place churning cycles
205 #if defined(__clang__)
206 #define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone))
207 #elif defined(__GNUC__) || defined(__GNUG__)
208 #define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0)))
209 #else
210 // MSVC & Intel do not have a no-optimize attribute, only line pragmas
211 #define BENCHMARK_DONT_OPTIMIZE
212 #endif
213 
214 #if defined(__GNUC__) || defined(__clang__)
215 #define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
216 #elif defined(_MSC_VER) && !defined(__clang__)
217 #define BENCHMARK_ALWAYS_INLINE __forceinline
218 #define __func__ __FUNCTION__
219 #else
220 #define BENCHMARK_ALWAYS_INLINE
221 #endif
222 
223 #define BENCHMARK_INTERNAL_TOSTRING2(x) #x
224 #define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
225 
226 // clang-format off
227 #if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__)
228 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
229 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
230 #define BENCHMARK_DISABLE_DEPRECATED_WARNING \
231  _Pragma("GCC diagnostic push") \
232  _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
233 #define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop")
234 #elif defined(__NVCOMPILER)
235 #define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
236 #define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
237 #define BENCHMARK_DISABLE_DEPRECATED_WARNING \
238  _Pragma("diagnostic push") \
239  _Pragma("diag_suppress deprecated_entity_with_custom_message")
240 #define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop")
241 #else
242 #define BENCHMARK_BUILTIN_EXPECT(x, y) x
243 #define BENCHMARK_DEPRECATED_MSG(msg)
244 #define BENCHMARK_WARNING_MSG(msg) \
245  __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
246  __LINE__) ") : warning note: " msg))
247 #define BENCHMARK_DISABLE_DEPRECATED_WARNING
248 #define BENCHMARK_RESTORE_DEPRECATED_WARNING
249 #endif
250 // clang-format on
251 
252 #if defined(__GNUC__) && !defined(__clang__)
253 #define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
254 #endif
255 
256 #ifndef __has_builtin
257 #define __has_builtin(x) 0
258 #endif
259 
260 #if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
261 #define BENCHMARK_UNREACHABLE() __builtin_unreachable()
262 #elif defined(_MSC_VER)
263 #define BENCHMARK_UNREACHABLE() __assume(false)
264 #else
265 #define BENCHMARK_UNREACHABLE() ((void)0)
266 #endif
267 
268 #if defined(__GNUC__)
269 // Determine the cacheline size based on architecture
270 #if defined(__i386__) || defined(__x86_64__)
271 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
272 #elif defined(__powerpc64__)
273 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 128
274 #elif defined(__aarch64__)
275 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
276 #elif defined(__arm__)
277 // Cache line sizes for ARM: These values are not strictly correct since
278 // cache line sizes depend on implementations, not architectures. There
279 // are even implementations with cache line sizes configurable at boot
280 // time.
281 #if defined(__ARM_ARCH_5T__)
282 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 32
283 #elif defined(__ARM_ARCH_7A__)
284 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
285 #endif // ARM_ARCH
286 #endif // arches
287 #endif // __GNUC__
288 
289 #ifndef BENCHMARK_INTERNAL_CACHELINE_SIZE
290 // A reasonable default guess. Note that overestimates tend to waste more
291 // space, while underestimates tend to waste more time.
292 #define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
293 #endif
294 
295 #if defined(__GNUC__)
296 // Indicates that the declared object be cache aligned using
297 // `BENCHMARK_INTERNAL_CACHELINE_SIZE` (see above).
298 #define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
299  __attribute__((aligned(BENCHMARK_INTERNAL_CACHELINE_SIZE)))
300 #elif defined(_MSC_VER)
301 #define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
302  __declspec(align(BENCHMARK_INTERNAL_CACHELINE_SIZE))
303 #else
304 #define BENCHMARK_INTERNAL_CACHELINE_ALIGNED
305 #endif
306 
307 #if defined(_MSC_VER)
308 #pragma warning(push)
309 // C4251: <symbol> needs to have dll-interface to be used by clients of class
310 #pragma warning(disable : 4251)
311 #endif // _MSC_VER_
312 
313 namespace benchmark {
314 
315 namespace internal {
316 #if (__cplusplus < 201402L || (defined(_MSC_VER) && _MSVC_LANG < 201402L))
317 template <typename T, typename... Args>
318 std::unique_ptr<T> make_unique(Args&&... args) {
319  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
320 }
321 #else
322 using ::std::make_unique;
323 #endif
324 } // namespace internal
325 
326 class BenchmarkReporter;
327 class State;
328 
329 using IterationCount = int64_t;
330 
331 // Define alias of Setup/Teardown callback function type
332 using callback_function = std::function<void(const benchmark::State&)>;
333 
334 // Default number of minimum benchmark running time in seconds.
335 const char kDefaultMinTimeStr[] = "0.5s";
336 
337 // Returns the version of the library.
338 BENCHMARK_EXPORT std::string GetBenchmarkVersion();
339 
340 BENCHMARK_EXPORT void PrintDefaultHelp();
341 
342 BENCHMARK_EXPORT void Initialize(int* argc, char** argv,
343  void (*HelperPrintf)() = PrintDefaultHelp);
344 BENCHMARK_EXPORT void Shutdown();
345 
346 // Report to stdout all arguments in 'argv' as unrecognized except the first.
347 // Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
348 BENCHMARK_EXPORT bool ReportUnrecognizedArguments(int argc, char** argv);
349 
350 // Returns the current value of --benchmark_filter.
351 BENCHMARK_EXPORT std::string GetBenchmarkFilter();
352 
353 // Sets a new value to --benchmark_filter. (This will override this flag's
354 // current value).
355 // Should be called after `benchmark::Initialize()`, as
356 // `benchmark::Initialize()` will override the flag's value.
357 BENCHMARK_EXPORT void SetBenchmarkFilter(std::string value);
358 
359 // Returns the current value of --v (command line value for verbosity).
360 BENCHMARK_EXPORT int32_t GetBenchmarkVerbosity();
361 
362 // Creates a default display reporter. Used by the library when no display
363 // reporter is provided, but also made available for external use in case a
364 // custom reporter should respect the `--benchmark_format` flag as a fallback
365 BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter();
366 
367 // Generate a list of benchmarks matching the specified --benchmark_filter flag
368 // and if --benchmark_list_tests is specified return after printing the name
369 // of each matching benchmark. Otherwise run each matching benchmark and
370 // report the results.
371 //
372 // spec : Specify the benchmarks to run. If users do not specify this arg,
373 // then the value of FLAGS_benchmark_filter
374 // will be used.
375 //
376 // The second and third overload use the specified 'display_reporter' and
377 // 'file_reporter' respectively. 'file_reporter' will write to the file
378 // specified
379 // by '--benchmark_out'. If '--benchmark_out' is not given the
380 // 'file_reporter' is ignored.
381 //
382 // RETURNS: The number of matching benchmarks.
383 BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks();
384 BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(std::string spec);
385 
386 BENCHMARK_EXPORT size_t
387 RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
388 BENCHMARK_EXPORT size_t
389 RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec);
390 
391 BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(
392  BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter);
393 BENCHMARK_EXPORT size_t
394 RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
395  BenchmarkReporter* file_reporter, std::string spec);
396 
397 // TimeUnit is passed to a benchmark in order to specify the order of magnitude
398 // for the measured time.
399 enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
400 
401 BENCHMARK_EXPORT TimeUnit GetDefaultTimeUnit();
402 
403 // Sets the default time unit the benchmarks use
404 // Has to be called before the benchmark loop to take effect
405 BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit);
406 
407 // If a MemoryManager is registered (via RegisterMemoryManager()),
408 // it can be used to collect and report allocation metrics for a run of the
409 // benchmark.
411  public:
412  static constexpr int64_t TombstoneValue = std::numeric_limits<int64_t>::max();
413 
414  struct Result {
415  Result()
416  : num_allocs(0),
417  max_bytes_used(0),
418  total_allocated_bytes(TombstoneValue),
419  net_heap_growth(TombstoneValue),
420  memory_iterations(0) {}
421 
422  // The number of allocations made in total between Start and Stop.
423  int64_t num_allocs;
424 
425  // The peak memory use between Start and Stop.
426  int64_t max_bytes_used;
427 
428  // The total memory allocated, in bytes, between Start and Stop.
429  // Init'ed to TombstoneValue if metric not available.
430  int64_t total_allocated_bytes;
431 
432  // The net changes in memory, in bytes, between Start and Stop.
433  // ie., total_allocated_bytes - total_deallocated_bytes.
434  // Init'ed to TombstoneValue if metric not available.
435  int64_t net_heap_growth;
436 
437  IterationCount memory_iterations;
438  };
439 
440  virtual ~MemoryManager() {}
441 
442  // Implement this to start recording allocation information.
443  virtual void Start() = 0;
444 
445  // Implement this to stop recording and fill out the given Result structure.
446  virtual void Stop(Result& result) = 0;
447 };
448 
449 // Register a MemoryManager instance that will be used to collect and report
450 // allocation measurements for benchmark runs.
451 BENCHMARK_EXPORT
452 void RegisterMemoryManager(MemoryManager* memory_manager);
453 
454 // If a ProfilerManager is registered (via RegisterProfilerManager()), the
455 // benchmark will be run an additional time under the profiler to collect and
456 // report profile metrics for the run of the benchmark.
458  public:
459  virtual ~ProfilerManager() {}
460 
461  // This is called after `Setup()` code and right before the benchmark is run.
462  virtual void AfterSetupStart() = 0;
463 
464  // This is called before `Teardown()` code and right after the benchmark
465  // completes.
466  virtual void BeforeTeardownStop() = 0;
467 };
468 
469 // Register a ProfilerManager instance that will be used to collect and report
470 // profile measurements for benchmark runs.
471 BENCHMARK_EXPORT
472 void RegisterProfilerManager(ProfilerManager* profiler_manager);
473 
474 // Add a key-value pair to output as part of the context stanza in the report.
475 BENCHMARK_EXPORT
476 void AddCustomContext(const std::string& key, const std::string& value);
477 
478 namespace internal {
479 class Benchmark;
480 class BenchmarkImp;
481 class BenchmarkFamilies;
482 
483 BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext();
484 
485 BENCHMARK_EXPORT
486 void UseCharPointer(char const volatile*);
487 
488 // Take ownership of the pointer and register the benchmark. Return the
489 // registered benchmark.
490 BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(
491  std::unique_ptr<Benchmark>);
492 
493 // Ensure that the standard streams are properly initialized in every TU.
494 BENCHMARK_EXPORT int InitializeStreams();
495 BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
496 
497 } // namespace internal
498 
499 #if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
500  defined(__EMSCRIPTEN__)
501 #define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
502 #endif
503 
504 // Force the compiler to flush pending writes to global memory. Acts as an
505 // effective read/write barrier
506 inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
507  std::atomic_signal_fence(std::memory_order_acq_rel);
508 }
509 
510 // The DoNotOptimize(...) function can be used to prevent a value or
511 // expression from being optimized away by the compiler. This function is
512 // intended to add little to no overhead.
513 // See: https://youtu.be/nXaxk27zwlk?t=2441
514 #ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
515 #if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
516 template <class Tp>
517 BENCHMARK_DEPRECATED_MSG(
518  "The const-ref version of this method can permit "
519  "undesired compiler optimizations in benchmarks")
520 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
521  asm volatile("" : : "r,m"(value) : "memory");
522 }
523 
524 template <class Tp>
525 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
526 #if defined(__clang__)
527  asm volatile("" : "+r,m"(value) : : "memory");
528 #else
529  asm volatile("" : "+m,r"(value) : : "memory");
530 #endif
531 }
532 
533 template <class Tp>
534 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
535 #if defined(__clang__)
536  asm volatile("" : "+r,m"(value) : : "memory");
537 #else
538  asm volatile("" : "+m,r"(value) : : "memory");
539 #endif
540 }
541 // !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
542 #elif (__GNUC__ >= 5)
543 // Workaround for a bug with full argument copy overhead with GCC.
544 // See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519
545 template <class Tp>
546 BENCHMARK_DEPRECATED_MSG(
547  "The const-ref version of this method can permit "
548  "undesired compiler optimizations in benchmarks")
549 inline BENCHMARK_ALWAYS_INLINE
550  typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
551  (sizeof(Tp) <= sizeof(Tp*))>::type
552  DoNotOptimize(Tp const& value) {
553  asm volatile("" : : "r,m"(value) : "memory");
554 }
555 
556 template <class Tp>
557 BENCHMARK_DEPRECATED_MSG(
558  "The const-ref version of this method can permit "
559  "undesired compiler optimizations in benchmarks")
560 inline BENCHMARK_ALWAYS_INLINE
561  typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
562  (sizeof(Tp) > sizeof(Tp*))>::type
563  DoNotOptimize(Tp const& value) {
564  asm volatile("" : : "m"(value) : "memory");
565 }
566 
567 template <class Tp>
568 inline BENCHMARK_ALWAYS_INLINE
569  typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
570  (sizeof(Tp) <= sizeof(Tp*))>::type
571  DoNotOptimize(Tp& value) {
572  asm volatile("" : "+m,r"(value) : : "memory");
573 }
574 
575 template <class Tp>
576 inline BENCHMARK_ALWAYS_INLINE
577  typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
578  (sizeof(Tp) > sizeof(Tp*))>::type
579  DoNotOptimize(Tp& value) {
580  asm volatile("" : "+m"(value) : : "memory");
581 }
582 
583 template <class Tp>
584 inline BENCHMARK_ALWAYS_INLINE
585  typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
586  (sizeof(Tp) <= sizeof(Tp*))>::type
587  DoNotOptimize(Tp&& value) {
588  asm volatile("" : "+m,r"(value) : : "memory");
589 }
590 
591 template <class Tp>
592 inline BENCHMARK_ALWAYS_INLINE
593  typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
594  (sizeof(Tp) > sizeof(Tp*))>::type
595  DoNotOptimize(Tp&& value) {
596  asm volatile("" : "+m"(value) : : "memory");
597 }
598 // !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
599 #endif
600 
601 #elif defined(_MSC_VER)
602 template <class Tp>
603 BENCHMARK_DEPRECATED_MSG(
604  "The const-ref version of this method can permit "
605  "undesired compiler optimizations in benchmarks")
606 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
607  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
608  _ReadWriteBarrier();
609 }
610 
611 #else
612 template <class Tp>
613 inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
614  internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
615 }
616 // FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
617 #endif
618 
619 // This class is used for user-defined counters.
620 class Counter {
621  public:
622  enum Flags {
623  kDefaults = 0,
624  // Mark the counter as a rate. It will be presented divided
625  // by the duration of the benchmark.
626  kIsRate = 1 << 0,
627  // Mark the counter as a thread-average quantity. It will be
628  // presented divided by the number of threads.
629  kAvgThreads = 1 << 1,
630  // Mark the counter as a thread-average rate. See above.
631  kAvgThreadsRate = kIsRate | kAvgThreads,
632  // Mark the counter as a constant value, valid/same for *every* iteration.
633  // When reporting, it will be *multiplied* by the iteration count.
634  kIsIterationInvariant = 1 << 2,
635  // Mark the counter as a constant rate.
636  // When reporting, it will be *multiplied* by the iteration count
637  // and then divided by the duration of the benchmark.
638  kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
639  // Mark the counter as a iteration-average quantity.
640  // It will be presented divided by the number of iterations.
641  kAvgIterations = 1 << 3,
642  // Mark the counter as a iteration-average rate. See above.
643  kAvgIterationsRate = kIsRate | kAvgIterations,
644 
645  // In the end, invert the result. This is always done last!
646  kInvert = 1 << 31
647  };
648 
649  enum OneK {
650  // 1'000 items per 1k
651  kIs1000 = 1000,
652  // 1'024 items per 1k
653  kIs1024 = 1024
654  };
655 
656  double value;
657  Flags flags;
658  OneK oneK;
659 
660  BENCHMARK_ALWAYS_INLINE
661  Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
662  : value(v), flags(f), oneK(k) {}
663 
664  BENCHMARK_ALWAYS_INLINE operator double const&() const { return value; }
665  BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
666 };
667 
668 // A helper for user code to create unforeseen combinations of Flags, without
669 // having to do this cast manually each time, or providing this operator.
670 Counter::Flags inline operator|(const Counter::Flags& LHS,
671  const Counter::Flags& RHS) {
672  return static_cast<Counter::Flags>(static_cast<int>(LHS) |
673  static_cast<int>(RHS));
674 }
675 
676 // This is the container for the user-defined counters.
677 typedef std::map<std::string, Counter> UserCounters;
678 
679 // BigO is passed to a benchmark in order to specify the asymptotic
680 // computational
681 // complexity for the benchmark. In case oAuto is selected, complexity will be
682 // calculated automatically to the best fit.
683 enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
684 
685 typedef int64_t ComplexityN;
686 
687 enum StatisticUnit { kTime, kPercentage };
688 
689 // BigOFunc is passed to a benchmark in order to specify the asymptotic
690 // computational complexity for the benchmark.
691 typedef double(BigOFunc)(ComplexityN);
692 
693 // StatisticsFunc is passed to a benchmark in order to compute some descriptive
694 // statistics over all the measurements of some type
695 typedef double(StatisticsFunc)(const std::vector<double>&);
696 
697 namespace internal {
698 struct Statistics {
699  std::string name_;
700  StatisticsFunc* compute_;
701  StatisticUnit unit_;
702 
703  Statistics(const std::string& name, StatisticsFunc* compute,
704  StatisticUnit unit = kTime)
705  : name_(name), compute_(compute), unit_(unit) {}
706 };
707 
708 class BenchmarkInstance;
709 class ThreadTimer;
710 class ThreadManager;
712 
713 enum AggregationReportMode : unsigned {
714  // The mode has not been manually specified
715  ARM_Unspecified = 0,
716  // The mode is user-specified.
717  // This may or may not be set when the following bit-flags are set.
718  ARM_Default = 1U << 0U,
719  // File reporter should only output aggregates.
720  ARM_FileReportAggregatesOnly = 1U << 1U,
721  // Display reporter should only output aggregates
722  ARM_DisplayReportAggregatesOnly = 1U << 2U,
723  // Both reporters should only display aggregates.
724  ARM_ReportAggregatesOnly =
725  ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
726 };
727 
728 enum Skipped : unsigned {
729  NotSkipped = 0,
730  SkippedWithMessage,
731  SkippedWithError
732 };
733 
734 } // namespace internal
735 
736 #if defined(_MSC_VER)
737 #pragma warning(push)
738 // C4324: 'benchmark::State': structure was padded due to alignment specifier
739 #pragma warning(disable : 4324)
740 #endif // _MSC_VER_
741 // State is passed to a running Benchmark and contains state for the
742 // benchmark to use.
743 class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State {
744  public:
745  struct StateIterator;
746  friend struct StateIterator;
747 
748  // Returns iterators used to run each iteration of a benchmark using a
749  // C++11 ranged-based for loop. These functions should not be called directly.
750  //
751  // REQUIRES: The benchmark has not started running yet. Neither begin nor end
752  // have been called previously.
753  //
754  // NOTE: KeepRunning may not be used after calling either of these functions.
755  inline BENCHMARK_ALWAYS_INLINE StateIterator begin();
756  inline BENCHMARK_ALWAYS_INLINE StateIterator end();
757 
758  // Returns true if the benchmark should continue through another iteration.
759  // NOTE: A benchmark may not return from the test until KeepRunning() has
760  // returned false.
761  inline bool KeepRunning();
762 
763  // Returns true iff the benchmark should run n more iterations.
764  // REQUIRES: 'n' > 0.
765  // NOTE: A benchmark must not return from the test until KeepRunningBatch()
766  // has returned false.
767  // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
768  //
769  // Intended usage:
770  // while (state.KeepRunningBatch(1000)) {
771  // // process 1000 elements
772  // }
773  inline bool KeepRunningBatch(IterationCount n);
774 
775  // REQUIRES: timer is running and 'SkipWithMessage(...)' or
776  // 'SkipWithError(...)' has not been called by the current thread.
777  // Stop the benchmark timer. If not called, the timer will be
778  // automatically stopped after the last iteration of the benchmark loop.
779  //
780  // For threaded benchmarks the PauseTiming() function only pauses the timing
781  // for the current thread.
782  //
783  // NOTE: The "real time" measurement is per-thread. If different threads
784  // report different measurements the largest one is reported.
785  //
786  // NOTE: PauseTiming()/ResumeTiming() are relatively
787  // heavyweight, and so their use should generally be avoided
788  // within each benchmark iteration, if possible.
789  void PauseTiming();
790 
791  // REQUIRES: timer is not running and 'SkipWithMessage(...)' or
792  // 'SkipWithError(...)' has not been called by the current thread.
793  // Start the benchmark timer. The timer is NOT running on entrance to the
794  // benchmark function. It begins running after control flow enters the
795  // benchmark loop.
796  //
797  // NOTE: PauseTiming()/ResumeTiming() are relatively
798  // heavyweight, and so their use should generally be avoided
799  // within each benchmark iteration, if possible.
800  void ResumeTiming();
801 
802  // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
803  // called previously by the current thread.
804  // Report the benchmark as resulting in being skipped with the specified
805  // 'msg'.
806  // After this call the user may explicitly 'return' from the benchmark.
807  //
808  // If the ranged-for style of benchmark loop is used, the user must explicitly
809  // break from the loop, otherwise all future iterations will be run.
810  // If the 'KeepRunning()' loop is used the current thread will automatically
811  // exit the loop at the end of the current iteration.
812  //
813  // For threaded benchmarks only the current thread stops executing and future
814  // calls to `KeepRunning()` will block until all threads have completed
815  // the `KeepRunning()` loop. If multiple threads report being skipped only the
816  // first skip message is used.
817  //
818  // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit
819  // the current scope immediately. If the function is called from within
820  // the 'KeepRunning()' loop the current iteration will finish. It is the users
821  // responsibility to exit the scope as needed.
822  void SkipWithMessage(const std::string& msg);
823 
824  // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
825  // called previously by the current thread.
826  // Report the benchmark as resulting in an error with the specified 'msg'.
827  // After this call the user may explicitly 'return' from the benchmark.
828  //
829  // If the ranged-for style of benchmark loop is used, the user must explicitly
830  // break from the loop, otherwise all future iterations will be run.
831  // If the 'KeepRunning()' loop is used the current thread will automatically
832  // exit the loop at the end of the current iteration.
833  //
834  // For threaded benchmarks only the current thread stops executing and future
835  // calls to `KeepRunning()` will block until all threads have completed
836  // the `KeepRunning()` loop. If multiple threads report an error only the
837  // first error message is used.
838  //
839  // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
840  // the current scope immediately. If the function is called from within
841  // the 'KeepRunning()' loop the current iteration will finish. It is the users
842  // responsibility to exit the scope as needed.
843  void SkipWithError(const std::string& msg);
844 
845  // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called.
846  bool skipped() const { return internal::NotSkipped != skipped_; }
847 
848  // Returns true if an error has been reported with 'SkipWithError(...)'.
849  bool error_occurred() const { return internal::SkippedWithError == skipped_; }
850 
851  // REQUIRES: called exactly once per iteration of the benchmarking loop.
852  // Set the manually measured time for this benchmark iteration, which
853  // is used instead of automatically measured time if UseManualTime() was
854  // specified.
855  //
856  // For threaded benchmarks the final value will be set to the largest
857  // reported values.
858  void SetIterationTime(double seconds);
859 
860  // Set the number of bytes processed by the current benchmark
861  // execution. This routine is typically called once at the end of a
862  // throughput oriented benchmark.
863  //
864  // REQUIRES: a benchmark has exited its benchmarking loop.
865  BENCHMARK_ALWAYS_INLINE
866  void SetBytesProcessed(int64_t bytes) {
867  counters["bytes_per_second"] =
868  Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
869  }
870 
871  BENCHMARK_ALWAYS_INLINE
872  int64_t bytes_processed() const {
873  if (counters.find("bytes_per_second") != counters.end())
874  return static_cast<int64_t>(counters.at("bytes_per_second"));
875  return 0;
876  }
877 
878  // If this routine is called with complexity_n > 0 and complexity report is
879  // requested for the
880  // family benchmark, then current benchmark will be part of the computation
881  // and complexity_n will
882  // represent the length of N.
883  BENCHMARK_ALWAYS_INLINE
884  void SetComplexityN(ComplexityN complexity_n) {
885  complexity_n_ = complexity_n;
886  }
887 
888  BENCHMARK_ALWAYS_INLINE
889  ComplexityN complexity_length_n() const { return complexity_n_; }
890 
891  // If this routine is called with items > 0, then an items/s
892  // label is printed on the benchmark report line for the currently
893  // executing benchmark. It is typically called at the end of a processing
894  // benchmark where a processing items/second output is desired.
895  //
896  // REQUIRES: a benchmark has exited its benchmarking loop.
897  BENCHMARK_ALWAYS_INLINE
898  void SetItemsProcessed(int64_t items) {
899  counters["items_per_second"] =
900  Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
901  }
902 
903  BENCHMARK_ALWAYS_INLINE
904  int64_t items_processed() const {
905  if (counters.find("items_per_second") != counters.end())
906  return static_cast<int64_t>(counters.at("items_per_second"));
907  return 0;
908  }
909 
910  // If this routine is called, the specified label is printed at the
911  // end of the benchmark report line for the currently executing
912  // benchmark. Example:
913  // static void BM_Compress(benchmark::State& state) {
914  // ...
915  // double compress = input_size / output_size;
916  // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
917  // }
918  // Produces output that looks like:
919  // BM_Compress 50 50 14115038 compress:27.3%
920  //
921  // REQUIRES: a benchmark has exited its benchmarking loop.
922  void SetLabel(const std::string& label);
923 
924  // Range arguments for this run. CHECKs if the argument has been set.
925  BENCHMARK_ALWAYS_INLINE
926  int64_t range(std::size_t pos = 0) const {
927  assert(range_.size() > pos);
928  return range_[pos];
929  }
930 
931  BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
932  int64_t range_x() const { return range(0); }
933 
934  BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
935  int64_t range_y() const { return range(1); }
936 
937  // Number of threads concurrently executing the benchmark.
938  BENCHMARK_ALWAYS_INLINE
939  int threads() const { return threads_; }
940 
941  // Index of the executing thread. Values from [0, threads).
942  BENCHMARK_ALWAYS_INLINE
943  int thread_index() const { return thread_index_; }
944 
945  BENCHMARK_ALWAYS_INLINE
946  IterationCount iterations() const {
947  if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
948  return 0;
949  }
950  return max_iterations - total_iterations_ + batch_leftover_;
951  }
952 
953  BENCHMARK_ALWAYS_INLINE
954  std::string name() const { return name_; }
955 
956  private:
957  // items we expect on the first cache line (ie 64 bytes of the struct)
958  // When total_iterations_ is 0, KeepRunning() and friends will return false.
959  // May be larger than max_iterations.
960  IterationCount total_iterations_;
961 
962  // When using KeepRunningBatch(), batch_leftover_ holds the number of
963  // iterations beyond max_iters that were run. Used to track
964  // completed_iterations_ accurately.
965  IterationCount batch_leftover_;
966 
967  public:
968  const IterationCount max_iterations;
969 
970  private:
971  bool started_;
972  bool finished_;
973  internal::Skipped skipped_;
974 
975  // items we don't need on the first cache line
976  std::vector<int64_t> range_;
977 
978  ComplexityN complexity_n_;
979 
980  public:
981  // Container for user-defined counters.
982  UserCounters counters;
983 
984  private:
985  State(std::string name, IterationCount max_iters,
986  const std::vector<int64_t>& ranges, int thread_i, int n_threads,
988  internal::PerfCountersMeasurement* perf_counters_measurement,
989  ProfilerManager* profiler_manager);
990 
991  void StartKeepRunning();
992  // Implementation of KeepRunning() and KeepRunningBatch().
993  // is_batch must be true unless n is 1.
994  inline bool KeepRunningInternal(IterationCount n, bool is_batch);
995  void FinishKeepRunning();
996 
997  const std::string name_;
998  const int thread_index_;
999  const int threads_;
1000 
1001  internal::ThreadTimer* const timer_;
1002  internal::ThreadManager* const manager_;
1003  internal::PerfCountersMeasurement* const perf_counters_measurement_;
1004  ProfilerManager* const profiler_manager_;
1005 
1006  friend class internal::BenchmarkInstance;
1007 };
1008 #if defined(_MSC_VER)
1009 #pragma warning(pop)
1010 #endif // _MSC_VER_
1011 
1012 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
1013  return KeepRunningInternal(1, /*is_batch=*/false);
1014 }
1015 
1016 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
1017  return KeepRunningInternal(n, /*is_batch=*/true);
1018 }
1019 
1020 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
1021  bool is_batch) {
1022  // total_iterations_ is set to 0 by the constructor, and always set to a
1023  // nonzero value by StartKepRunning().
1024  assert(n > 0);
1025  // n must be 1 unless is_batch is true.
1026  assert(is_batch || n == 1);
1027  if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
1028  total_iterations_ -= n;
1029  return true;
1030  }
1031  if (!started_) {
1032  StartKeepRunning();
1033  if (!skipped() && total_iterations_ >= n) {
1034  total_iterations_ -= n;
1035  return true;
1036  }
1037  }
1038  // For non-batch runs, total_iterations_ must be 0 by now.
1039  if (is_batch && total_iterations_ != 0) {
1040  batch_leftover_ = n - total_iterations_;
1041  total_iterations_ = 0;
1042  return true;
1043  }
1044  FinishKeepRunning();
1045  return false;
1046 }
1047 
1049  struct BENCHMARK_UNUSED Value {};
1050  typedef std::forward_iterator_tag iterator_category;
1051  typedef Value value_type;
1052  typedef Value reference;
1053  typedef Value pointer;
1054  typedef std::ptrdiff_t difference_type;
1055 
1056  private:
1057  friend class State;
1058  BENCHMARK_ALWAYS_INLINE
1059  StateIterator() : cached_(0), parent_() {}
1060 
1061  BENCHMARK_ALWAYS_INLINE
1062  explicit StateIterator(State* st)
1063  : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {}
1064 
1065  public:
1066  BENCHMARK_ALWAYS_INLINE
1067  Value operator*() const { return Value(); }
1068 
1069  BENCHMARK_ALWAYS_INLINE
1070  StateIterator& operator++() {
1071  assert(cached_ > 0);
1072  --cached_;
1073  return *this;
1074  }
1075 
1076  BENCHMARK_ALWAYS_INLINE
1077  bool operator!=(StateIterator const&) const {
1078  if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
1079  parent_->FinishKeepRunning();
1080  return false;
1081  }
1082 
1083  private:
1084  IterationCount cached_;
1085  State* const parent_;
1086 };
1087 
1088 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
1089  return StateIterator(this);
1090 }
1091 inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
1092  StartKeepRunning();
1093  return StateIterator();
1094 }
1095 
1096 namespace internal {
1097 
1098 typedef void(Function)(State&);
1099 
1100 // ------------------------------------------------------
1101 // Benchmark registration object. The BENCHMARK() macro expands
1102 // into an internal::Benchmark* object. Various methods can
1103 // be called on this object to change the properties of the benchmark.
1104 // Each method returns "this" so that multiple method calls can
1105 // chained into one expression.
1106 class BENCHMARK_EXPORT Benchmark {
1107  public:
1108  virtual ~Benchmark();
1109 
1110  // Note: the following methods all return "this" so that multiple
1111  // method calls can be chained together in one expression.
1112 
1113  // Specify the name of the benchmark
1114  Benchmark* Name(const std::string& name);
1115 
1116  // Run this benchmark once with "x" as the extra argument passed
1117  // to the function.
1118  // REQUIRES: The function passed to the constructor must accept an arg1.
1119  Benchmark* Arg(int64_t x);
1120 
1121  // Run this benchmark with the given time unit for the generated output report
1122  Benchmark* Unit(TimeUnit unit);
1123 
1124  // Run this benchmark once for a number of values picked from the
1125  // range [start..limit]. (start and limit are always picked.)
1126  // REQUIRES: The function passed to the constructor must accept an arg1.
1127  Benchmark* Range(int64_t start, int64_t limit);
1128 
1129  // Run this benchmark once for all values in the range [start..limit] with
1130  // specific step
1131  // REQUIRES: The function passed to the constructor must accept an arg1.
1132  Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
1133 
1134  // Run this benchmark once with "args" as the extra arguments passed
1135  // to the function.
1136  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1137  Benchmark* Args(const std::vector<int64_t>& args);
1138 
1139  // Equivalent to Args({x, y})
1140  // NOTE: This is a legacy C++03 interface provided for compatibility only.
1141  // New code should use 'Args'.
1142  Benchmark* ArgPair(int64_t x, int64_t y) {
1143  std::vector<int64_t> args;
1144  args.push_back(x);
1145  args.push_back(y);
1146  return Args(args);
1147  }
1148 
1149  // Run this benchmark once for a number of values picked from the
1150  // ranges [start..limit]. (starts and limits are always picked.)
1151  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1152  Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t>>& ranges);
1153 
1154  // Run this benchmark once for each combination of values in the (cartesian)
1155  // product of the supplied argument lists.
1156  // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1157  Benchmark* ArgsProduct(const std::vector<std::vector<int64_t>>& arglists);
1158 
1159  // Equivalent to ArgNames({name})
1160  Benchmark* ArgName(const std::string& name);
1161 
1162  // Set the argument names to display in the benchmark name. If not called,
1163  // only argument values will be shown.
1164  Benchmark* ArgNames(const std::vector<std::string>& names);
1165 
1166  // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
1167  // NOTE: This is a legacy C++03 interface provided for compatibility only.
1168  // New code should use 'Ranges'.
1169  Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
1170  std::vector<std::pair<int64_t, int64_t>> ranges;
1171  ranges.push_back(std::make_pair(lo1, hi1));
1172  ranges.push_back(std::make_pair(lo2, hi2));
1173  return Ranges(ranges);
1174  }
1175 
1176  // Have "setup" and/or "teardown" invoked once for every benchmark run.
1177  // If the benchmark is multi-threaded (will run in k threads concurrently),
1178  // the setup callback will be be invoked exactly once (not k times) before
1179  // each run with k threads. Time allowing (e.g. for a short benchmark), there
1180  // may be multiple such runs per benchmark, each run with its own
1181  // "setup"/"teardown".
1182  //
1183  // If the benchmark uses different size groups of threads (e.g. via
1184  // ThreadRange), the above will be true for each size group.
1185  //
1186  // The callback will be passed a State object, which includes the number
1187  // of threads, thread-index, benchmark arguments, etc.
1188  Benchmark* Setup(callback_function&&);
1189  Benchmark* Setup(const callback_function&);
1190  Benchmark* Teardown(callback_function&&);
1191  Benchmark* Teardown(const callback_function&);
1192 
1193  // Pass this benchmark object to *func, which can customize
1194  // the benchmark by calling various methods like Arg, Args,
1195  // Threads, etc.
1196  Benchmark* Apply(void (*custom_arguments)(Benchmark* benchmark));
1197 
1198  // Set the range multiplier for non-dense range. If not called, the range
1199  // multiplier kRangeMultiplier will be used.
1200  Benchmark* RangeMultiplier(int multiplier);
1201 
1202  // Set the minimum amount of time to use when running this benchmark. This
1203  // option overrides the `benchmark_min_time` flag.
1204  // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
1205  Benchmark* MinTime(double t);
1206 
1207  // Set the minimum amount of time to run the benchmark before taking runtimes
1208  // of this benchmark into account. This
1209  // option overrides the `benchmark_min_warmup_time` flag.
1210  // REQUIRES: `t >= 0` and `Iterations` has not been called on this benchmark.
1211  Benchmark* MinWarmUpTime(double t);
1212 
1213  // Specify the amount of iterations that should be run by this benchmark.
1214  // This option overrides the `benchmark_min_time` flag.
1215  // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
1216  //
1217  // NOTE: This function should only be used when *exact* iteration control is
1218  // needed and never to control or limit how long a benchmark runs, where
1219  // `--benchmark_min_time=<N>s` or `MinTime(...)` should be used instead.
1220  Benchmark* Iterations(IterationCount n);
1221 
1222  // Specify the amount of times to repeat this benchmark. This option overrides
1223  // the `benchmark_repetitions` flag.
1224  // REQUIRES: `n > 0`
1225  Benchmark* Repetitions(int n);
1226 
1227  // Specify if each repetition of the benchmark should be reported separately
1228  // or if only the final statistics should be reported. If the benchmark
1229  // is not repeated then the single result is always reported.
1230  // Applies to *ALL* reporters (display and file).
1231  Benchmark* ReportAggregatesOnly(bool value = true);
1232 
1233  // Same as ReportAggregatesOnly(), but applies to display reporter only.
1234  Benchmark* DisplayAggregatesOnly(bool value = true);
1235 
1236  // By default, the CPU time is measured only for the main thread, which may
1237  // be unrepresentative if the benchmark uses threads internally. If called,
1238  // the total CPU time spent by all the threads will be measured instead.
1239  // By default, only the main thread CPU time will be measured.
1240  Benchmark* MeasureProcessCPUTime();
1241 
1242  // If a particular benchmark should use the Wall clock instead of the CPU time
1243  // (be it either the CPU time of the main thread only (default), or the
1244  // total CPU usage of the benchmark), call this method. If called, the elapsed
1245  // (wall) time will be used to control how many iterations are run, and in the
1246  // printing of items/second or MB/seconds values.
1247  // If not called, the CPU time used by the benchmark will be used.
1248  Benchmark* UseRealTime();
1249 
1250  // If a benchmark must measure time manually (e.g. if GPU execution time is
1251  // being
1252  // measured), call this method. If called, each benchmark iteration should
1253  // call
1254  // SetIterationTime(seconds) to report the measured time, which will be used
1255  // to control how many iterations are run, and in the printing of items/second
1256  // or MB/second values.
1257  Benchmark* UseManualTime();
1258 
1259  // Set the asymptotic computational complexity for the benchmark. If called
1260  // the asymptotic computational complexity will be shown on the output.
1261  Benchmark* Complexity(BigO complexity = benchmark::oAuto);
1262 
1263  // Set the asymptotic computational complexity for the benchmark. If called
1264  // the asymptotic computational complexity will be shown on the output.
1265  Benchmark* Complexity(BigOFunc* complexity);
1266 
1267  // Add this statistics to be computed over all the values of benchmark run
1268  Benchmark* ComputeStatistics(const std::string& name,
1269  StatisticsFunc* statistics,
1270  StatisticUnit unit = kTime);
1271 
1272  // Support for running multiple copies of the same benchmark concurrently
1273  // in multiple threads. This may be useful when measuring the scaling
1274  // of some piece of code.
1275 
1276  // Run one instance of this benchmark concurrently in t threads.
1277  Benchmark* Threads(int t);
1278 
1279  // Pick a set of values T from [min_threads,max_threads].
1280  // min_threads and max_threads are always included in T. Run this
1281  // benchmark once for each value in T. The benchmark run for a
1282  // particular value t consists of t threads running the benchmark
1283  // function concurrently. For example, consider:
1284  // BENCHMARK(Foo)->ThreadRange(1,16);
1285  // This will run the following benchmarks:
1286  // Foo in 1 thread
1287  // Foo in 2 threads
1288  // Foo in 4 threads
1289  // Foo in 8 threads
1290  // Foo in 16 threads
1291  Benchmark* ThreadRange(int min_threads, int max_threads);
1292 
1293  // For each value n in the range, run this benchmark once using n threads.
1294  // min_threads and max_threads are always included in the range.
1295  // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
1296  // a benchmark with 1, 4, 7 and 8 threads.
1297  Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
1298 
1299  // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
1300  Benchmark* ThreadPerCpu();
1301 
1302  virtual void Run(State& state) = 0;
1303 
1304  TimeUnit GetTimeUnit() const;
1305 
1306  protected:
1307  explicit Benchmark(const std::string& name);
1308  void SetName(const std::string& name);
1309 
1310  public:
1311  const char* GetName() const;
1312  int ArgsCnt() const;
1313  const char* GetArgName(int arg) const;
1314 
1315  private:
1316  friend class BenchmarkFamilies;
1317  friend class BenchmarkInstance;
1318 
1319  std::string name_;
1320  AggregationReportMode aggregation_report_mode_;
1321  std::vector<std::string> arg_names_; // Args for all benchmark runs
1322  std::vector<std::vector<int64_t>> args_; // Args for all benchmark runs
1323 
1324  TimeUnit time_unit_;
1325  bool use_default_time_unit_;
1326 
1327  int range_multiplier_;
1328  double min_time_;
1329  double min_warmup_time_;
1330  IterationCount iterations_;
1331  int repetitions_;
1332  bool measure_process_cpu_time_;
1333  bool use_real_time_;
1334  bool use_manual_time_;
1335  BigO complexity_;
1336  BigOFunc* complexity_lambda_;
1337  std::vector<Statistics> statistics_;
1338  std::vector<int> thread_counts_;
1339 
1340  callback_function setup_;
1341  callback_function teardown_;
1342 
1343  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(Benchmark);
1344 };
1345 
1346 } // namespace internal
1347 
1348 // Create and register a benchmark with the specified 'name' that invokes
1349 // the specified functor 'fn'.
1350 //
1351 // RETURNS: A pointer to the registered benchmark.
1352 internal::Benchmark* RegisterBenchmark(const std::string& name,
1353  internal::Function* fn);
1354 
1355 template <class Lambda>
1356 internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn);
1357 
1358 // Remove all registered benchmarks. All pointers to previously registered
1359 // benchmarks are invalidated.
1360 BENCHMARK_EXPORT void ClearRegisteredBenchmarks();
1361 
1362 namespace internal {
1363 // The class used to hold all Benchmarks created from static function.
1364 // (ie those created using the BENCHMARK(...) macros.
1365 class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark {
1366  public:
1367  FunctionBenchmark(const std::string& name, Function* func)
1368  : Benchmark(name), func_(func) {}
1369 
1370  void Run(State& st) override;
1371 
1372  private:
1373  Function* func_;
1374 };
1375 
1376 template <class Lambda>
1377 class LambdaBenchmark : public Benchmark {
1378  public:
1379  void Run(State& st) override { lambda_(st); }
1380 
1381  template <class OLambda>
1382  LambdaBenchmark(const std::string& name, OLambda&& lam)
1383  : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1384 
1385  private:
1386  LambdaBenchmark(LambdaBenchmark const&) = delete;
1387  Lambda lambda_;
1388 };
1389 } // namespace internal
1390 
1391 inline internal::Benchmark* RegisterBenchmark(const std::string& name,
1392  internal::Function* fn) {
1393  return internal::RegisterBenchmarkInternal(
1394  benchmark::internal::make_unique<internal::FunctionBenchmark>(name, fn));
1395 }
1396 
1397 template <class Lambda>
1398 internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) {
1399  using BenchType =
1400  internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1401  return internal::RegisterBenchmarkInternal(
1402  benchmark::internal::make_unique<BenchType>(name,
1403  std::forward<Lambda>(fn)));
1404 }
1405 
1406 template <class Lambda, class... Args>
1407 internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn,
1408  Args&&... args) {
1409  return benchmark::RegisterBenchmark(
1410  name, [=](benchmark::State& st) { fn(st, args...); });
1411 }
1412 
1413 // The base class for all fixture tests.
1415  public:
1416  Fixture() : internal::Benchmark("") {}
1417 
1418  void Run(State& st) override {
1419  this->SetUp(st);
1420  this->BenchmarkCase(st);
1421  this->TearDown(st);
1422  }
1423 
1424  // These will be deprecated ...
1425  virtual void SetUp(const State&) {}
1426  virtual void TearDown(const State&) {}
1427  // ... In favor of these.
1428  virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
1429  virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1430 
1431  protected:
1432  virtual void BenchmarkCase(State&) = 0;
1433 };
1434 } // namespace benchmark
1435 
1436 // ------------------------------------------------------
1437 // Macro to register benchmarks
1438 
1439 // Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1440 // every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1441 // empty. If X is empty the expression becomes (+1 == +0).
1442 #if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1443 #define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1444 #else
1445 #define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1446 #endif
1447 
1448 // Helpers for generating unique variable names
1449 #define BENCHMARK_PRIVATE_NAME(...) \
1450  BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \
1451  __VA_ARGS__)
1452 
1453 #define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1454 #define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1455 // Helper for concatenation with macro name expansion
1456 #define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
1457  BaseClass##_##Method##_Benchmark
1458 
1459 #define BENCHMARK_PRIVATE_DECLARE(n) \
1460  /* NOLINTNEXTLINE(misc-use-anonymous-namespace) */ \
1461  static ::benchmark::internal::Benchmark const* const BENCHMARK_PRIVATE_NAME( \
1462  n) BENCHMARK_UNUSED
1463 
1464 #define BENCHMARK(...) \
1465  BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1466  (::benchmark::internal::RegisterBenchmarkInternal( \
1467  benchmark::internal::make_unique< \
1468  ::benchmark::internal::FunctionBenchmark>(#__VA_ARGS__, \
1469  __VA_ARGS__)))
1470 
1471 // Old-style macros
1472 #define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1473 #define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1474 #define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1475 #define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1476 #define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1477  BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1478 
1479 // Register a benchmark which invokes the function specified by `func`
1480 // with the additional arguments specified by `...`.
1481 //
1482 // For example:
1483 //
1484 // template <class ...ExtraArgs>`
1485 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1486 // [...]
1487 //}
1488 // /* Registers a benchmark named "BM_takes_args/int_string_test` */
1489 // BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1490 #define BENCHMARK_CAPTURE(func, test_case_name, ...) \
1491  BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1492  (::benchmark::internal::RegisterBenchmarkInternal( \
1493  benchmark::internal::make_unique< \
1494  ::benchmark::internal::FunctionBenchmark>( \
1495  #func "/" #test_case_name, \
1496  [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1497 
1498 // This will register a benchmark for a templatized function. For example:
1499 //
1500 // template<int arg>
1501 // void BM_Foo(int iters);
1502 //
1503 // BENCHMARK_TEMPLATE(BM_Foo, 1);
1504 //
1505 // will register BM_Foo<1> as a benchmark.
1506 #define BENCHMARK_TEMPLATE1(n, a) \
1507  BENCHMARK_PRIVATE_DECLARE(n) = \
1508  (::benchmark::internal::RegisterBenchmarkInternal( \
1509  benchmark::internal::make_unique< \
1510  ::benchmark::internal::FunctionBenchmark>(#n "<" #a ">", n<a>)))
1511 
1512 #define BENCHMARK_TEMPLATE2(n, a, b) \
1513  BENCHMARK_PRIVATE_DECLARE(n) = \
1514  (::benchmark::internal::RegisterBenchmarkInternal( \
1515  benchmark::internal::make_unique< \
1516  ::benchmark::internal::FunctionBenchmark>(#n "<" #a "," #b ">", \
1517  n<a, b>)))
1518 
1519 #define BENCHMARK_TEMPLATE(n, ...) \
1520  BENCHMARK_PRIVATE_DECLARE(n) = \
1521  (::benchmark::internal::RegisterBenchmarkInternal( \
1522  benchmark::internal::make_unique< \
1523  ::benchmark::internal::FunctionBenchmark>( \
1524  #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1525 
1526 // This will register a benchmark for a templatized function,
1527 // with the additional arguments specified by `...`.
1528 //
1529 // For example:
1530 //
1531 // template <typename T, class ...ExtraArgs>`
1532 // void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1533 // [...]
1534 //}
1535 // /* Registers a benchmark named "BM_takes_args<void>/int_string_test` */
1536 // BENCHMARK_TEMPLATE1_CAPTURE(BM_takes_args, void, int_string_test, 42,
1537 // std::string("abc"));
1538 #define BENCHMARK_TEMPLATE1_CAPTURE(func, a, test_case_name, ...) \
1539  BENCHMARK_CAPTURE(func<a>, test_case_name, __VA_ARGS__)
1540 
1541 #define BENCHMARK_TEMPLATE2_CAPTURE(func, a, b, test_case_name, ...) \
1542  BENCHMARK_PRIVATE_DECLARE(func) = \
1543  (::benchmark::internal::RegisterBenchmarkInternal( \
1544  benchmark::internal::make_unique< \
1545  ::benchmark::internal::FunctionBenchmark>( \
1546  #func "<" #a "," #b ">" \
1547  "/" #test_case_name, \
1548  [](::benchmark::State& st) { func<a, b>(st, __VA_ARGS__); })))
1549 
1550 #define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1551  class BaseClass##_##Method##_Benchmark : public BaseClass { \
1552  public: \
1553  BaseClass##_##Method##_Benchmark() { \
1554  this->SetName(#BaseClass "/" #Method); \
1555  } \
1556  \
1557  protected: \
1558  void BenchmarkCase(::benchmark::State&) override; \
1559  };
1560 
1561 #define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1562  class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
1563  public: \
1564  BaseClass##_##Method##_Benchmark() { \
1565  this->SetName(#BaseClass "<" #a ">/" #Method); \
1566  } \
1567  \
1568  protected: \
1569  void BenchmarkCase(::benchmark::State&) override; \
1570  };
1571 
1572 #define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1573  class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
1574  public: \
1575  BaseClass##_##Method##_Benchmark() { \
1576  this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
1577  } \
1578  \
1579  protected: \
1580  void BenchmarkCase(::benchmark::State&) override; \
1581  };
1582 
1583 #define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \
1584  class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1585  public: \
1586  BaseClass##_##Method##_Benchmark() { \
1587  this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \
1588  } \
1589  \
1590  protected: \
1591  void BenchmarkCase(::benchmark::State&) override; \
1592  };
1593 
1594 #define BENCHMARK_DEFINE_F(BaseClass, Method) \
1595  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1596  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1597 
1598 #define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \
1599  BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1600  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1601 
1602 #define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \
1603  BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1604  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1605 
1606 #define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \
1607  BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1608  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1609 
1610 #define BENCHMARK_REGISTER_F(BaseClass, Method) \
1611  BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method))
1612 
1613 #define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1614  BENCHMARK_PRIVATE_DECLARE(TestName) = \
1615  (::benchmark::internal::RegisterBenchmarkInternal( \
1616  benchmark::internal::make_unique<TestName>()))
1617 
1618 // This macro will define and register a benchmark within a fixture class.
1619 #define BENCHMARK_F(BaseClass, Method) \
1620  BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1621  BENCHMARK_REGISTER_F(BaseClass, Method); \
1622  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1623 
1624 #define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \
1625  BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1626  BENCHMARK_REGISTER_F(BaseClass, Method); \
1627  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1628 
1629 #define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \
1630  BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1631  BENCHMARK_REGISTER_F(BaseClass, Method); \
1632  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1633 
1634 #define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \
1635  BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1636  BENCHMARK_REGISTER_F(BaseClass, Method); \
1637  void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1638 
1639 // Helper macro to create a main routine in a test that runs the benchmarks
1640 // Note the workaround for Hexagon simulator passing argc != 0, argv = NULL.
1641 #define BENCHMARK_MAIN() \
1642  int main(int argc, char** argv) { \
1643  char arg0_default[] = "benchmark"; \
1644  char* args_default = reinterpret_cast<char*>(arg0_default); \
1645  if (!argv) { \
1646  argc = 1; \
1647  argv = &args_default; \
1648  } \
1649  ::benchmark::Initialize(&argc, argv); \
1650  if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1651  ::benchmark::RunSpecifiedBenchmarks(); \
1652  ::benchmark::Shutdown(); \
1653  return 0; \
1654  } \
1655  int main(int, char**)
1656 
1657 // ------------------------------------------------------
1658 // Benchmark Reporters
1659 
1660 namespace benchmark {
1661 
1662 struct BENCHMARK_EXPORT CPUInfo {
1663  struct CacheInfo {
1664  std::string type;
1665  int level;
1666  int size;
1667  int num_sharing;
1668  };
1669 
1670  enum Scaling { UNKNOWN, ENABLED, DISABLED };
1671 
1672  int num_cpus;
1673  Scaling scaling;
1674  double cycles_per_second;
1675  std::vector<CacheInfo> caches;
1676  std::vector<double> load_avg;
1677 
1678  static const CPUInfo& Get();
1679 
1680  private:
1681  CPUInfo();
1682  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1683 };
1684 
1685 // Adding Struct for System Information
1686 struct BENCHMARK_EXPORT SystemInfo {
1687  std::string name;
1688  static const SystemInfo& Get();
1689 
1690  private:
1691  SystemInfo();
1692  BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1693 };
1694 
1695 // BenchmarkName contains the components of the Benchmark's name
1696 // which allows individual fields to be modified or cleared before
1697 // building the final name using 'str()'.
1698 struct BENCHMARK_EXPORT BenchmarkName {
1699  std::string function_name;
1700  std::string args;
1701  std::string min_time;
1702  std::string min_warmup_time;
1703  std::string iterations;
1704  std::string repetitions;
1705  std::string time_type;
1706  std::string threads;
1707 
1708  // Return the full name of the benchmark with each non-empty
1709  // field separated by a '/'
1710  std::string str() const;
1711 };
1712 
1713 // Interface for custom benchmark result printers.
1714 // By default, benchmark reports are printed to stdout. However an application
1715 // can control the destination of the reports by calling
1716 // RunSpecifiedBenchmarks and passing it a custom reporter object.
1717 // The reporter object must implement the following interface.
1718 class BENCHMARK_EXPORT BenchmarkReporter {
1719  public:
1720  struct Context {
1721  CPUInfo const& cpu_info;
1722  SystemInfo const& sys_info;
1723  // The number of chars in the longest benchmark name.
1724  size_t name_field_width = 0;
1725  static const char* executable_name;
1726  Context();
1727  };
1728 
1729  struct BENCHMARK_EXPORT Run {
1730  static const int64_t no_repetition_index = -1;
1731  enum RunType { RT_Iteration, RT_Aggregate };
1732 
1733  Run()
1734  : run_type(RT_Iteration),
1735  aggregate_unit(kTime),
1736  skipped(internal::NotSkipped),
1737  iterations(1),
1738  threads(1),
1739  time_unit(GetDefaultTimeUnit()),
1740  real_accumulated_time(0),
1741  cpu_accumulated_time(0),
1742  max_heapbytes_used(0),
1743  use_real_time_for_initial_big_o(false),
1744  complexity(oNone),
1745  complexity_lambda(),
1746  complexity_n(0),
1747  report_big_o(false),
1748  report_rms(false),
1749  allocs_per_iter(0.0) {}
1750 
1751  std::string benchmark_name() const;
1752  BenchmarkName run_name;
1753  int64_t family_index;
1754  int64_t per_family_instance_index;
1755  RunType run_type;
1756  std::string aggregate_name;
1757  StatisticUnit aggregate_unit;
1758  std::string report_label; // Empty if not set by benchmark.
1759  internal::Skipped skipped;
1760  std::string skip_message;
1761 
1762  IterationCount iterations;
1763  int64_t threads;
1764  int64_t repetition_index;
1765  int64_t repetitions;
1766  TimeUnit time_unit;
1767  double real_accumulated_time;
1768  double cpu_accumulated_time;
1769 
1770  // Return a value representing the real time per iteration in the unit
1771  // specified by 'time_unit'.
1772  // NOTE: If 'iterations' is zero the returned value represents the
1773  // accumulated time.
1774  double GetAdjustedRealTime() const;
1775 
1776  // Return a value representing the cpu time per iteration in the unit
1777  // specified by 'time_unit'.
1778  // NOTE: If 'iterations' is zero the returned value represents the
1779  // accumulated time.
1780  double GetAdjustedCPUTime() const;
1781 
1782  // This is set to 0.0 if memory tracing is not enabled.
1783  double max_heapbytes_used;
1784 
1785  // By default Big-O is computed for CPU time, but that is not what you want
1786  // to happen when manual time was requested, which is stored as real time.
1787  bool use_real_time_for_initial_big_o;
1788 
1789  // Keep track of arguments to compute asymptotic complexity
1790  BigO complexity;
1791  BigOFunc* complexity_lambda;
1792  ComplexityN complexity_n;
1793 
1794  // what statistics to compute from the measurements
1795  const std::vector<internal::Statistics>* statistics;
1796 
1797  // Inform print function whether the current run is a complexity report
1798  bool report_big_o;
1799  bool report_rms;
1800 
1801  UserCounters counters;
1802 
1803  // Memory metrics.
1804  MemoryManager::Result memory_result;
1805  double allocs_per_iter;
1806  };
1807 
1809  PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
1810 
1811  // How many runs will all instances of this benchmark perform?
1812  int num_runs_total;
1813 
1814  // How many runs have happened already?
1815  int num_runs_done;
1816 
1817  // The reports about (non-errneous!) runs of this family.
1818  std::vector<BenchmarkReporter::Run> Runs;
1819  };
1820 
1821  // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1822  // and the error stream set to 'std::cerr'
1824 
1825  // Called once for every suite of benchmarks run.
1826  // The parameter "context" contains information that the
1827  // reporter may wish to use when generating its report, for example the
1828  // platform under which the benchmarks are running. The benchmark run is
1829  // never started if this function returns false, allowing the reporter
1830  // to skip runs based on the context information.
1831  virtual bool ReportContext(const Context& context) = 0;
1832 
1833  // Called once for each group of benchmark runs, gives information about
1834  // the configurations of the runs.
1835  virtual void ReportRunsConfig(double /*min_time*/,
1836  bool /*has_explicit_iters*/,
1837  IterationCount /*iters*/) {}
1838 
1839  // Called once for each group of benchmark runs, gives information about
1840  // cpu-time and heap memory usage during the benchmark run. If the group
1841  // of runs contained more than two entries then 'report' contains additional
1842  // elements representing the mean and standard deviation of those runs.
1843  // Additionally if this group of runs was the last in a family of benchmarks
1844  // 'reports' contains additional entries representing the asymptotic
1845  // complexity and RMS of that benchmark family.
1846  virtual void ReportRuns(const std::vector<Run>& report) = 0;
1847 
1848  // Called once and only once after ever group of benchmarks is run and
1849  // reported.
1850  virtual void Finalize() {}
1851 
1852  // REQUIRES: The object referenced by 'out' is valid for the lifetime
1853  // of the reporter.
1854  void SetOutputStream(std::ostream* out) {
1855  assert(out);
1856  output_stream_ = out;
1857  }
1858 
1859  // REQUIRES: The object referenced by 'err' is valid for the lifetime
1860  // of the reporter.
1861  void SetErrorStream(std::ostream* err) {
1862  assert(err);
1863  error_stream_ = err;
1864  }
1865 
1866  std::ostream& GetOutputStream() const { return *output_stream_; }
1867 
1868  std::ostream& GetErrorStream() const { return *error_stream_; }
1869 
1870  virtual ~BenchmarkReporter();
1871 
1872  // Write a human readable string to 'out' representing the specified
1873  // 'context'.
1874  // REQUIRES: 'out' is non-null.
1875  static void PrintBasicContext(std::ostream* out, Context const& context);
1876 
1877  private:
1878  std::ostream* output_stream_;
1879  std::ostream* error_stream_;
1880 };
1881 
1882 // Simple reporter that outputs benchmark data to the console. This is the
1883 // default reporter used by RunSpecifiedBenchmarks().
1884 class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
1885  public:
1886  enum OutputOptions {
1887  OO_None = 0,
1888  OO_Color = 1,
1889  OO_Tabular = 2,
1890  OO_ColorTabular = OO_Color | OO_Tabular,
1891  OO_Defaults = OO_ColorTabular
1892  };
1893  explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
1894  : output_options_(opts_), name_field_width_(0), printed_header_(false) {}
1895 
1896  bool ReportContext(const Context& context) override;
1897  void ReportRuns(const std::vector<Run>& reports) override;
1898 
1899  protected:
1900  virtual void PrintRunData(const Run& result);
1901  virtual void PrintHeader(const Run& run);
1902 
1903  OutputOptions output_options_;
1904  size_t name_field_width_;
1905  UserCounters prev_counters_;
1906  bool printed_header_;
1907 };
1908 
1909 class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter {
1910  public:
1911  JSONReporter() : first_report_(true) {}
1912  bool ReportContext(const Context& context) override;
1913  void ReportRuns(const std::vector<Run>& reports) override;
1914  void Finalize() override;
1915 
1916  private:
1917  void PrintRunData(const Run& run);
1918 
1919  bool first_report_;
1920 };
1921 
1922 class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG(
1923  "The CSV Reporter will be removed in a future release") CSVReporter
1924  : public BenchmarkReporter {
1925  public:
1926  CSVReporter() : printed_header_(false) {}
1927  bool ReportContext(const Context& context) override;
1928  void ReportRuns(const std::vector<Run>& reports) override;
1929 
1930  private:
1931  void PrintRunData(const Run& run);
1932 
1933  bool printed_header_;
1934  std::set<std::string> user_counter_names_;
1935 };
1936 
1937 inline const char* GetTimeUnitString(TimeUnit unit) {
1938  switch (unit) {
1939  case kSecond:
1940  return "s";
1941  case kMillisecond:
1942  return "ms";
1943  case kMicrosecond:
1944  return "us";
1945  case kNanosecond:
1946  return "ns";
1947  }
1948  BENCHMARK_UNREACHABLE();
1949 }
1950 
1951 inline double GetTimeUnitMultiplier(TimeUnit unit) {
1952  switch (unit) {
1953  case kSecond:
1954  return 1;
1955  case kMillisecond:
1956  return 1e3;
1957  case kMicrosecond:
1958  return 1e6;
1959  case kNanosecond:
1960  return 1e9;
1961  }
1962  BENCHMARK_UNREACHABLE();
1963 }
1964 
1965 // Creates a list of integer values for the given range and multiplier.
1966 // This can be used together with ArgsProduct() to allow multiple ranges
1967 // with different multipliers.
1968 // Example:
1969 // ArgsProduct({
1970 // CreateRange(0, 1024, /*multi=*/32),
1971 // CreateRange(0, 100, /*multi=*/4),
1972 // CreateDenseRange(0, 4, /*step=*/1),
1973 // });
1974 BENCHMARK_EXPORT
1975 std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
1976 
1977 // Creates a list of integer values for the given range and step.
1978 BENCHMARK_EXPORT
1979 std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step);
1980 
1981 } // namespace benchmark
1982 
1983 #if defined(_MSC_VER)
1984 #pragma warning(pop)
1985 #endif
1986 
1987 #endif // BENCHMARK_BENCHMARK_H_
Definition: benchmark.h:1718
Definition: benchmark.h:1884
Definition: benchmark.h:620
Definition: benchmark.h:1414
Definition: benchmark.h:1909
Definition: benchmark.h:410
Definition: benchmark.h:457
Definition: benchmark.h:743
Definition: benchmark_register.cc:73
Definition: benchmark_api_internal.h:18
Definition: benchmark.h:1106
Definition: benchmark.h:1365
Definition: benchmark.h:1377
Definition: perf_counters.h:149
Definition: thread_manager.h:12
Definition: thread_timer.h:10
Definition: benchmark.h:1698
Definition: benchmark.h:1720
Definition: benchmark.h:1729
Definition: benchmark.h:1663
Definition: benchmark.h:1662
Definition: benchmark.h:414
Definition: benchmark.h:1049
Definition: benchmark.h:1048
Definition: benchmark.h:1686
Definition: benchmark.h:698