Skip to content

Commit fc38817

Browse files
authored
CPP-964 Add refresh-interval support for histogram metrics (#561)
1 parent 8d65d1e commit fc38817

File tree

7 files changed

+210
-33
lines changed

7 files changed

+210
-33
lines changed

include/cassandra.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2934,6 +2934,28 @@ CASS_EXPORT void
29342934
cass_cluster_set_monitor_reporting_interval(CassCluster* cluster,
29352935
unsigned interval_secs);
29362936

2937+
/**
2938+
* Sets the amount of time after which metric histograms should be refreshed.
2939+
* Upon refresh histograms are reset to zero, effectively dropping any history to
2940+
* that point. Refresh occurs when a snapshot is requested so ths value should
2941+
* be thought of as a minimum time to refresh.
2942+
*
2943+
* If refresh is not enabled the driver will continue to accumulate histogram
2944+
* data over the life of a session; this is the default behaviour and replicates
2945+
* the behaviour of previous versions.
2946+
*
2947+
* Note that the specified interval must be > 0 otherwise CASS_ERROR_LIB_BAD_PARAMS
2948+
* will be returned.
2949+
*
2950+
* @public @memberof CassCluster
2951+
*
2952+
* @param cluster
2953+
* @param refresh_interval Minimum interval (in milliseconds) for refresh interval
2954+
*/
2955+
CASS_EXPORT CassError
2956+
cass_cluster_set_histogram_refresh_interval(CassCluster* cluster,
2957+
unsigned refresh_interval);
2958+
29372959
/***********************************************************************************
29382960
*
29392961
* Session

src/cluster_config.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -577,6 +577,14 @@ void cass_cluster_set_monitor_reporting_interval(CassCluster* cluster, unsigned
577577
cluster->config().set_monitor_reporting_interval_secs(interval_secs);
578578
}
579579

580+
CassError cass_cluster_set_histogram_refresh_interval(CassCluster* cluster, unsigned refresh_interval) {
581+
if (refresh_interval <= 0) {
582+
return CASS_ERROR_LIB_BAD_PARAMS;
583+
}
584+
cluster->config().set_cluster_histogram_refresh_interval(refresh_interval);
585+
return CASS_OK;
586+
}
587+
580588
void cass_cluster_free(CassCluster* cluster) { delete cluster->from(); }
581589

582590
} // extern "C"

src/config.hpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ class Config {
7474
, is_client_id_set_(false)
7575
, host_listener_(new DefaultHostListener())
7676
, monitor_reporting_interval_secs_(CASS_DEFAULT_CLIENT_MONITOR_EVENTS_INTERVAL_SECS)
77-
, cluster_metadata_resolver_factory_(new DefaultClusterMetadataResolverFactory()) {
77+
, cluster_metadata_resolver_factory_(new DefaultClusterMetadataResolverFactory())
78+
, histogram_refresh_interval_(CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_REFRESH) {
7879
profiles_.set_empty_key(String());
7980

8081
// Assign the defaults to the cluster profile
@@ -392,6 +393,12 @@ class Config {
392393
}
393394
}
394395

396+
unsigned cluster_histogram_refresh_interval() const { return histogram_refresh_interval_; }
397+
398+
void set_cluster_histogram_refresh_interval(unsigned refresh_interval) {
399+
histogram_refresh_interval_ = refresh_interval;
400+
}
401+
395402
private:
396403
void init_profiles();
397404

@@ -441,6 +448,7 @@ class Config {
441448
unsigned monitor_reporting_interval_secs_;
442449
CloudSecureConnectionConfig cloud_secure_connection_config_;
443450
ClusterMetadataResolverFactory::Ptr cluster_metadata_resolver_factory_;
451+
unsigned histogram_refresh_interval_;
444452
};
445453

446454
}}} // namespace datastax::internal::core

src/constants.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@
142142
#define CASS_DEFAULT_MAX_TRACING_DATA_WAIT_TIME_MS 15
143143
#define CASS_DEFAULT_RETRY_TRACING_DATA_WAIT_TIME_MS 3
144144
#define CASS_DEFAULT_TRACING_CONSISTENCY CASS_CONSISTENCY_ONE
145+
#define CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_REFRESH 0
145146

146147
// Request-level defaults
147148
#define CASS_DEFAULT_CONSISTENCY CASS_CONSISTENCY_LOCAL_ONE

src/metrics.hpp

Lines changed: 83 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "allocated.hpp"
2424
#include "atomic.hpp"
2525
#include "constants.hpp"
26+
#include "get_time.hpp"
2627
#include "scoped_lock.hpp"
2728
#include "scoped_ptr.hpp"
2829
#include "utils.hpp"
@@ -268,9 +269,15 @@ class Metrics : public Allocated {
268269
int64_t percentile_999th;
269270
};
270271

271-
Histogram(ThreadState* thread_state)
272+
Histogram(ThreadState* thread_state, unsigned refresh_interval = CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_REFRESH)
272273
: thread_state_(thread_state)
273-
, histograms_(new PerThreadHistogram[thread_state->max_threads()]) {
274+
, histograms_(new PerThreadHistogram[thread_state->max_threads()])
275+
, zero_snapshot_(Snapshot {0,0,0,0,0,0,0,0,0,0}) {
276+
277+
refresh_interval_ = refresh_interval;
278+
refresh_timestamp_ = get_time_since_epoch_ms();
279+
cached_snapshot_ = zero_snapshot_;
280+
274281
hdr_init(1LL, HIGHEST_TRACKABLE_VALUE, 3, &histogram_);
275282
uv_mutex_init(&mutex_);
276283
}
@@ -286,38 +293,76 @@ class Metrics : public Allocated {
286293

287294
void get_snapshot(Snapshot* snapshot) const {
288295
ScopedMutex l(&mutex_);
289-
hdr_histogram* h = histogram_;
290-
for (size_t i = 0; i < thread_state_->max_threads(); ++i) {
291-
histograms_[i].add(h);
296+
297+
// In the "no refresh" case (the default) fall back to the old behaviour; add per-thread
298+
// timestamps to histogram_ (without any clearing of data) and return what's there.
299+
if (refresh_interval_ == CASS_DEFAULT_HISTOGRAM_REFRESH_INTERVAL_NO_REFRESH) {
300+
301+
for (size_t i = 0; i < thread_state_->max_threads(); ++i) {
302+
histograms_[i].add(histogram_);
303+
}
304+
305+
if (histogram_->total_count == 0) {
306+
// There is no data; default to 0 for the stats.
307+
copy_snapshot(zero_snapshot_, snapshot);
308+
} else {
309+
histogram_to_snapshot(histogram_, snapshot);
310+
}
311+
return;
292312
}
293313

294-
if (h->total_count == 0) {
295-
// There is no data; default to 0 for the stats.
296-
snapshot->max = 0;
297-
snapshot->min = 0;
298-
snapshot->mean = 0;
299-
snapshot->stddev = 0;
300-
snapshot->median = 0;
301-
snapshot->percentile_75th = 0;
302-
snapshot->percentile_95th = 0;
303-
snapshot->percentile_98th = 0;
304-
snapshot->percentile_99th = 0;
305-
snapshot->percentile_999th = 0;
306-
} else {
307-
snapshot->max = hdr_max(h);
308-
snapshot->min = hdr_min(h);
309-
snapshot->mean = static_cast<int64_t>(hdr_mean(h));
310-
snapshot->stddev = static_cast<int64_t>(hdr_stddev(h));
311-
snapshot->median = hdr_value_at_percentile(h, 50.0);
312-
snapshot->percentile_75th = hdr_value_at_percentile(h, 75.0);
313-
snapshot->percentile_95th = hdr_value_at_percentile(h, 95.0);
314-
snapshot->percentile_98th = hdr_value_at_percentile(h, 98.0);
315-
snapshot->percentile_99th = hdr_value_at_percentile(h, 99.0);
316-
snapshot->percentile_999th = hdr_value_at_percentile(h, 99.9);
314+
// Refresh interval is in use. If we've exceeded the interval clear histogram_,
315+
// compute a new aggregate histogram and build (and cache) a new snapshot. Otherwise
316+
// just return the cached version.
317+
uint64_t now = get_time_since_epoch_ms();
318+
if (now - refresh_timestamp_ >= refresh_interval_) {
319+
320+
hdr_reset(histogram_);
321+
322+
for (size_t i = 0; i < thread_state_->max_threads(); ++i) {
323+
histograms_[i].add(histogram_);
324+
}
325+
326+
if (histogram_->total_count == 0) {
327+
copy_snapshot(zero_snapshot_, &cached_snapshot_);
328+
} else {
329+
histogram_to_snapshot(histogram_, &cached_snapshot_);
330+
}
331+
refresh_timestamp_ = now;
317332
}
333+
334+
copy_snapshot(cached_snapshot_, snapshot);
318335
}
319336

320337
private:
338+
339+
void copy_snapshot(Snapshot from, Snapshot* to) const {
340+
to->min = from.min;
341+
to->max = from.max;
342+
to->mean = from.mean;
343+
to->stddev = from.stddev;
344+
to->median = from.median;
345+
to->percentile_75th = from.percentile_75th;
346+
to->percentile_95th = from.percentile_95th;
347+
to->percentile_98th = from.percentile_98th;
348+
to->percentile_99th = from.percentile_99th;
349+
to->percentile_999th = from.percentile_999th;
350+
}
351+
352+
void histogram_to_snapshot(hdr_histogram* h, Snapshot* to) const {
353+
to->min = hdr_min(h);
354+
to->max = hdr_max(h);
355+
to->mean = static_cast<int64_t>(hdr_mean(h));
356+
to->stddev = static_cast<int64_t>(hdr_stddev(h));
357+
to->median = hdr_value_at_percentile(h, 50.0);
358+
to->percentile_75th = hdr_value_at_percentile(h, 75.0);
359+
to->percentile_95th = hdr_value_at_percentile(h, 95.0);
360+
to->percentile_98th = hdr_value_at_percentile(h, 98.0);
361+
to->percentile_99th = hdr_value_at_percentile(h, 99.0);
362+
to->percentile_999th = hdr_value_at_percentile(h, 99.9);
363+
}
364+
365+
321366
class WriterReaderPhaser {
322367
public:
323368
WriterReaderPhaser()
@@ -409,14 +454,19 @@ class Metrics : public Allocated {
409454
hdr_histogram* histogram_;
410455
mutable uv_mutex_t mutex_;
411456

457+
unsigned refresh_interval_;
458+
mutable uint64_t refresh_timestamp_;
459+
mutable Snapshot cached_snapshot_;
460+
const Snapshot zero_snapshot_;
461+
412462
private:
413463
DISALLOW_COPY_AND_ASSIGN(Histogram);
414464
};
415465

416-
Metrics(size_t max_threads)
466+
Metrics(size_t max_threads, unsigned histogram_refresh_interval)
417467
: thread_state_(max_threads)
418-
, request_latencies(&thread_state_)
419-
, speculative_request_latencies(&thread_state_)
468+
, request_latencies(&thread_state_, histogram_refresh_interval)
469+
, speculative_request_latencies(&thread_state_, histogram_refresh_interval)
420470
, request_rates(&thread_state_)
421471
, total_connections(&thread_state_)
422472
, connection_timeouts(&thread_state_)
@@ -447,6 +497,8 @@ class Metrics : public Allocated {
447497
Counter connection_timeouts;
448498
Counter request_timeouts;
449499

500+
unsigned histogram_refresh_interval;
501+
450502
private:
451503
DISALLOW_COPY_AND_ASSIGN(Metrics);
452504
};

src/session_base.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ Future::Ptr SessionBase::connect(const Config& config, const String& keyspace) {
9797
random_.reset();
9898
}
9999

100-
metrics_.reset(new Metrics(config.thread_count_io() + 1));
100+
metrics_.reset(new Metrics(config.thread_count_io() + 1, config.cluster_histogram_refresh_interval()));
101101

102102
cluster_.reset();
103103
ClusterConnector::Ptr connector(

tests/src/unit/tests/test_metrics.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,92 @@ TEST(MetricsUnitTest, HistogramEmpty) {
144144
EXPECT_EQ(snapshot.percentile_999th, 0);
145145
}
146146

147+
TEST(MetricsUnitTest, HistogramWithRefreshInterval) {
148+
unsigned refresh_interval = 1000;
149+
Metrics::ThreadState thread_state(1);
150+
Metrics::Histogram histogram(&thread_state, refresh_interval);
151+
152+
Metrics::Histogram::Snapshot snapshot;
153+
154+
// Retrieval before the first interval runs will simply return zeros
155+
histogram.get_snapshot(&snapshot);
156+
EXPECT_EQ(snapshot.min, 0);
157+
EXPECT_EQ(snapshot.max, 0);
158+
EXPECT_EQ(snapshot.median, 0);
159+
EXPECT_EQ(snapshot.percentile_75th, 0);
160+
EXPECT_EQ(snapshot.percentile_95th, 0);
161+
EXPECT_EQ(snapshot.percentile_98th, 0);
162+
EXPECT_EQ(snapshot.percentile_99th, 0);
163+
EXPECT_EQ(snapshot.percentile_999th, 0);
164+
EXPECT_EQ(snapshot.mean, 0);
165+
EXPECT_EQ(snapshot.stddev, 0);
166+
167+
// Values added during the first interval (or for that matter any
168+
// interval) will be buffered in per-thread counters and will be
169+
// included in the next generated snapshot
170+
for (uint64_t i = 1; i <= 100; ++i) {
171+
histogram.record_value(i);
172+
}
173+
test::Utils::msleep(1.2 * refresh_interval);
174+
175+
histogram.get_snapshot(&snapshot);
176+
EXPECT_EQ(snapshot.min, 1);
177+
EXPECT_EQ(snapshot.max, 100);
178+
EXPECT_EQ(snapshot.median, 50);
179+
EXPECT_EQ(snapshot.percentile_75th, 75);
180+
EXPECT_EQ(snapshot.percentile_95th, 95);
181+
EXPECT_EQ(snapshot.percentile_98th, 98);
182+
EXPECT_EQ(snapshot.percentile_99th, 99);
183+
EXPECT_EQ(snapshot.percentile_999th, 100);
184+
EXPECT_EQ(snapshot.mean, 50);
185+
EXPECT_EQ(snapshot.stddev, 28);
186+
187+
// Generated snapshot should only include values added within
188+
// the current interval
189+
test::Utils::msleep(1.2 * refresh_interval);
190+
for (uint64_t i = 101; i <= 200; ++i) {
191+
histogram.record_value(i);
192+
}
193+
194+
histogram.get_snapshot(&snapshot);
195+
EXPECT_EQ(snapshot.min, 101);
196+
EXPECT_EQ(snapshot.max, 200);
197+
EXPECT_EQ(snapshot.median, 150);
198+
EXPECT_EQ(snapshot.percentile_75th, 175);
199+
EXPECT_EQ(snapshot.percentile_95th, 195);
200+
EXPECT_EQ(snapshot.percentile_98th, 198);
201+
EXPECT_EQ(snapshot.percentile_99th, 199);
202+
EXPECT_EQ(snapshot.percentile_999th, 200);
203+
EXPECT_EQ(snapshot.mean, 150);
204+
EXPECT_EQ(snapshot.stddev, 28);
205+
}
206+
207+
// Variant of the case above. If we have no requests for the entirety
208+
// of the refresh interval make sure the stats return zero
209+
TEST(MetricsUnitTest, HistogramWithRefreshIntervalNoActivity) {
210+
unsigned refresh_interval = 1000;
211+
Metrics::ThreadState thread_state(1);
212+
Metrics::Histogram histogram(&thread_state, refresh_interval);
213+
214+
Metrics::Histogram::Snapshot snapshot;
215+
216+
// Initial refresh interval (where we always return zero) + another interval of
217+
// no activity
218+
test::Utils::msleep(2.2 * refresh_interval);
219+
220+
histogram.get_snapshot(&snapshot);
221+
EXPECT_EQ(snapshot.min, 0);
222+
EXPECT_EQ(snapshot.max, 0);
223+
EXPECT_EQ(snapshot.median, 0);
224+
EXPECT_EQ(snapshot.percentile_75th, 0);
225+
EXPECT_EQ(snapshot.percentile_95th, 0);
226+
EXPECT_EQ(snapshot.percentile_98th, 0);
227+
EXPECT_EQ(snapshot.percentile_99th, 0);
228+
EXPECT_EQ(snapshot.percentile_999th, 0);
229+
EXPECT_EQ(snapshot.mean, 0);
230+
EXPECT_EQ(snapshot.stddev, 0);
231+
}
232+
147233
TEST(MetricsUnitTest, HistogramWithThreads) {
148234
HistogramThreadArgs args[NUM_THREADS];
149235

0 commit comments

Comments
 (0)