benchmark 1.7.0
Loading...
Searching...
No Matches
perf_counters.h
1// Copyright 2021 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef BENCHMARK_PERF_COUNTERS_H
16#define BENCHMARK_PERF_COUNTERS_H
17
18#include <array>
19#include <cstdint>
20#include <memory>
21#include <vector>
22
23#include "benchmark/benchmark.h"
24#include "check.h"
25#include "log.h"
26#include "mutex.h"
27
28#ifndef BENCHMARK_OS_WINDOWS
29#include <unistd.h>
30#endif
31
32#if defined(_MSC_VER)
33#pragma warning(push)
34// C4251: <symbol> needs to have dll-interface to be used by clients of class
35#pragma warning(disable : 4251)
36#endif
37
38namespace benchmark {
39namespace internal {
40
41// Typically, we can only read a small number of counters. There is also a
42// padding preceding counter values, when reading multiple counters with one
43// syscall (which is desirable). PerfCounterValues abstracts these details.
44// The implementation ensures the storage is inlined, and allows 0-based
45// indexing into the counter values.
46// The object is used in conjunction with a PerfCounters object, by passing it
47// to Snapshot(). The values are populated such that
48// perfCounters->names()[i]'s value is obtained at position i (as given by
49// operator[]) of this object.
51 public:
52 explicit PerfCounterValues(size_t nr_counters) : nr_counters_(nr_counters) {
53 BM_CHECK_LE(nr_counters_, kMaxCounters);
54 }
55
56 uint64_t operator[](size_t pos) const { return values_[kPadding + pos]; }
57
58 static constexpr size_t kMaxCounters = 3;
59
60 private:
61 friend class PerfCounters;
62 // Get the byte buffer in which perf counters can be captured.
63 // This is used by PerfCounters::Read
64 std::pair<char*, size_t> get_data_buffer() {
65 return {reinterpret_cast<char*>(values_.data()),
66 sizeof(uint64_t) * (kPadding + nr_counters_)};
67 }
68
69 static constexpr size_t kPadding = 1;
70 std::array<uint64_t, kPadding + kMaxCounters> values_;
71 const size_t nr_counters_;
72};
73
74// Collect PMU counters. The object, once constructed, is ready to be used by
75// calling read(). PMU counter collection is enabled from the time create() is
76// called, to obtain the object, until the object's destructor is called.
77class BENCHMARK_EXPORT PerfCounters final {
78 public:
79 // True iff this platform supports performance counters.
80 static const bool kSupported;
81
82 bool IsValid() const { return !counter_names_.empty(); }
83 static PerfCounters NoCounters() { return PerfCounters(); }
84
85 ~PerfCounters() { CloseCounters(); }
86 PerfCounters(PerfCounters&&) = default;
87 PerfCounters(const PerfCounters&) = delete;
88 PerfCounters& operator=(PerfCounters&&) noexcept;
89 PerfCounters& operator=(const PerfCounters&) = delete;
90
91 // Platform-specific implementations may choose to do some library
92 // initialization here.
93 static bool Initialize();
94
95 // Return a PerfCounters object ready to read the counters with the names
96 // specified. The values are user-mode only. The counter name format is
97 // implementation and OS specific.
98 // TODO: once we move to C++-17, this should be a std::optional, and then the
99 // IsValid() boolean can be dropped.
100 static PerfCounters Create(const std::vector<std::string>& counter_names);
101
102 // Take a snapshot of the current value of the counters into the provided
103 // valid PerfCounterValues storage. The values are populated such that:
104 // names()[i]'s value is (*values)[i]
105 BENCHMARK_ALWAYS_INLINE bool Snapshot(PerfCounterValues* values) const {
106#ifndef BENCHMARK_OS_WINDOWS
107 assert(values != nullptr);
108 assert(IsValid());
109 auto buffer = values->get_data_buffer();
110 auto read_bytes = ::read(counter_ids_[0], buffer.first, buffer.second);
111 return static_cast<size_t>(read_bytes) == buffer.second;
112#else
113 (void)values;
114 return false;
115#endif
116 }
117
118 const std::vector<std::string>& names() const { return counter_names_; }
119 size_t num_counters() const { return counter_names_.size(); }
120
121 private:
122 PerfCounters(const std::vector<std::string>& counter_names,
123 std::vector<int>&& counter_ids)
124 : counter_ids_(std::move(counter_ids)), counter_names_(counter_names) {}
125 PerfCounters() = default;
126
127 void CloseCounters() const;
128
129 std::vector<int> counter_ids_;
130 std::vector<std::string> counter_names_;
131};
132
133// Typical usage of the above primitives.
134class BENCHMARK_EXPORT PerfCountersMeasurement final {
135 public:
136 PerfCountersMeasurement(const std::vector<std::string>& counter_names);
138
139 // The only way to get to `counters_` is after ctor-ing a
140 // `PerfCountersMeasurement`, which means that `counters_`'s state is, here,
141 // decided (either invalid or valid) and won't change again even if a ctor is
142 // concurrently running with this. This is preferring efficiency to
143 // maintainability, because the address of the static can be known at compile
144 // time.
145 bool IsValid() const {
146 MutexLock l(mutex_);
147 return counters_.IsValid();
148 }
149
150 BENCHMARK_ALWAYS_INLINE void Start() {
151 assert(IsValid());
152 MutexLock l(mutex_);
153 // Tell the compiler to not move instructions above/below where we take
154 // the snapshot.
155 ClobberMemory();
156 valid_read_ &= counters_.Snapshot(&start_values_);
157 ClobberMemory();
158 }
159
160 BENCHMARK_ALWAYS_INLINE bool Stop(
161 std::vector<std::pair<std::string, double>>& measurements) {
162 assert(IsValid());
163 MutexLock l(mutex_);
164 // Tell the compiler to not move instructions above/below where we take
165 // the snapshot.
166 ClobberMemory();
167 valid_read_ &= counters_.Snapshot(&end_values_);
168 ClobberMemory();
169
170 for (size_t i = 0; i < counters_.names().size(); ++i) {
171 double measurement = static_cast<double>(end_values_[i]) -
172 static_cast<double>(start_values_[i]);
173 measurements.push_back({counters_.names()[i], measurement});
174 }
175
176 return valid_read_;
177 }
178
179 private:
180 static Mutex mutex_;
181 GUARDED_BY(mutex_) static int ref_count_;
182 GUARDED_BY(mutex_) static PerfCounters counters_;
183 bool valid_read_ = true;
184 PerfCounterValues start_values_;
185 PerfCounterValues end_values_;
186};
187
188BENCHMARK_UNUSED static bool perf_init_anchor = PerfCounters::Initialize();
189
190} // namespace internal
191} // namespace benchmark
192
193#if defined(_MSC_VER)
194#pragma warning(pop)
195#endif
196
197#endif // BENCHMARK_PERF_COUNTERS_H
Definition: mutex.h:87
Definition: perf_counters.h:50
Definition: perf_counters.h:134
Definition: perf_counters.h:77