Skip to content

Commit 32e4da0

Browse files
lfittlCommitfest Bot
authored andcommitted
pg_test_timing: Add --fast flag to test fast timing, report time source
In passing also reduce the per-loop overhead caused by repeated divisions in INSTR_TIME_GET_NANOSEC when the ticks variable has become very large, instead diff first and then turn it into nanosecs.
1 parent 319d60a commit 32e4da0

File tree

2 files changed

+65
-24
lines changed

2 files changed

+65
-24
lines changed

src/bin/pg_test_timing/pg_test_timing.c

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ static const char *progname;
1616

1717
static unsigned int test_duration = 3;
1818
static double max_rprct = 99.99;
19+
static bool fast_timing = false;
1920

2021
/* record duration in powers of 2 nanoseconds */
2122
static long long int histogram[32];
@@ -56,6 +57,7 @@ handle_args(int argc, char *argv[])
5657
static struct option long_options[] = {
5758
{"duration", required_argument, NULL, 'd'},
5859
{"cutoff", required_argument, NULL, 'c'},
60+
{"fast", no_argument, NULL, 'f'},
5961
{NULL, 0, NULL, 0}
6062
};
6163

@@ -68,7 +70,7 @@ handle_args(int argc, char *argv[])
6870
{
6971
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
7072
{
71-
printf(_("Usage: %s [-d DURATION] [-c CUTOFF]\n"), progname);
73+
printf(_("Usage: %s [-d DURATION] [-c CUTOFF] [--fast]\n"), progname);
7274
exit(0);
7375
}
7476
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
@@ -78,7 +80,7 @@ handle_args(int argc, char *argv[])
7880
}
7981
}
8082

81-
while ((option = getopt_long(argc, argv, "d:c:",
83+
while ((option = getopt_long(argc, argv, "d:c:f:",
8284
long_options, &optindex)) != -1)
8385
{
8486
switch (option)
@@ -125,6 +127,10 @@ handle_args(int argc, char *argv[])
125127
}
126128
break;
127129

130+
case 'f':
131+
fast_timing = true;
132+
break;
133+
128134
default:
129135
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
130136
progname);
@@ -155,11 +161,31 @@ test_timing(unsigned int duration)
155161
uint64 total_time;
156162
int64 time_elapsed = 0;
157163
uint64 loop_count = 0;
158-
uint64 prev,
159-
cur;
160164
instr_time start_time,
161165
end_time,
162-
temp;
166+
prev,
167+
cur;
168+
char *time_source = NULL;
169+
bool fast_timing_used = false;
170+
171+
INSTR_TIME_INITIALIZE();
172+
173+
#if !defined(WIN32) && defined(__x86_64__) && defined(__linux__)
174+
if (fast_timing && has_rdtsc)
175+
{
176+
time_source = "RDTSC";
177+
fast_timing_used = true;
178+
}
179+
else if (has_rdtscp)
180+
time_source = "RDTSCP";
181+
else
182+
time_source = PG_INSTR_CLOCK_NAME;
183+
#else
184+
time_source = PG_INSTR_CLOCK_NAME;
185+
#endif
186+
if (fast_timing && !fast_timing_used)
187+
printf(_("Warning: Fast timing requested, but not available - regular timing source will be used\n"));
188+
printf(_("Time source: %s\n"), time_source);
163189

164190
/*
165191
* Pre-zero the statistics data structures. They're already zero by
@@ -173,18 +199,23 @@ test_timing(unsigned int duration)
173199

174200
total_time = duration > 0 ? duration * INT64CONST(1000000000) : 0;
175201

176-
INSTR_TIME_SET_CURRENT(start_time);
177-
cur = INSTR_TIME_GET_NANOSEC(start_time);
202+
if (fast_timing)
203+
INSTR_TIME_SET_CURRENT_FAST(start_time);
204+
else
205+
INSTR_TIME_SET_CURRENT(start_time);
206+
cur = start_time;
178207

179208
while (time_elapsed < total_time)
180209
{
181210
int32 diff,
182211
bits;
183212

184213
prev = cur;
185-
INSTR_TIME_SET_CURRENT(temp);
186-
cur = INSTR_TIME_GET_NANOSEC(temp);
187-
diff = cur - prev;
214+
if (fast_timing)
215+
INSTR_TIME_SET_CURRENT_FAST(cur);
216+
else
217+
INSTR_TIME_SET_CURRENT(cur);
218+
diff = INSTR_TIME_DIFF_NANOSEC(cur, prev);
188219

189220
/* Did time go backwards? */
190221
if (unlikely(diff < 0))
@@ -217,11 +248,13 @@ test_timing(unsigned int duration)
217248
largest_diff_count++;
218249

219250
loop_count++;
220-
INSTR_TIME_SUBTRACT(temp, start_time);
221-
time_elapsed = INSTR_TIME_GET_NANOSEC(temp);
251+
time_elapsed = INSTR_TIME_DIFF_NANOSEC(cur, start_time);
222252
}
223253

224-
INSTR_TIME_SET_CURRENT(end_time);
254+
if (fast_timing)
255+
INSTR_TIME_SET_CURRENT_FAST(end_time);
256+
else
257+
INSTR_TIME_SET_CURRENT(end_time);
225258

226259
INSTR_TIME_SUBTRACT(end_time, start_time);
227260

src/include/portability/instr_time.h

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,13 @@ extern int64 max_ticks_no_overflow;
112112
*/
113113
#if defined(__darwin__) && defined(CLOCK_MONOTONIC_RAW)
114114
#define PG_INSTR_CLOCK CLOCK_MONOTONIC_RAW
115+
#define PG_INSTR_CLOCK_NAME "clock_gettime (CLOCK_MONOTONIC_RAW)"
115116
#elif defined(CLOCK_MONOTONIC)
116117
#define PG_INSTR_CLOCK CLOCK_MONOTONIC
118+
#define PG_INSTR_CLOCK_NAME "clock_gettime (CLOCK_MONOTONIC)"
117119
#else
118120
#define PG_INSTR_CLOCK CLOCK_REALTIME
121+
#define PG_INSTR_CLOCK_NAME "clock_gettime (CLOCK_REALTIME)"
119122
#endif
120123

121124
#if defined(__x86_64__) && defined(__linux__)
@@ -174,7 +177,7 @@ pg_get_ticks(void)
174177
}
175178

176179
static inline int64_t
177-
pg_ticks_to_ns(instr_time t)
180+
pg_ticks_to_ns(int64 ticks)
178181
{
179182
/*
180183
* Would multiplication overflow? If so perform computation in two parts.
@@ -183,7 +186,7 @@ pg_ticks_to_ns(instr_time t)
183186
*/
184187
int64 ns = 0;
185188

186-
if (unlikely(t.ticks > max_ticks_no_overflow))
189+
if (unlikely(ticks > max_ticks_no_overflow))
187190
{
188191
/*
189192
* Compute how often the maximum number of ticks fits completely into
@@ -192,7 +195,7 @@ pg_ticks_to_ns(instr_time t)
192195
* value. In a 2nd step we adjust the number of elapsed ticks and
193196
* convert the remaining ticks.
194197
*/
195-
int64 count = t.ticks / max_ticks_no_overflow;
198+
int64 count = ticks / max_ticks_no_overflow;
196199
int64 max_ns = max_ticks_no_overflow * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
197200

198201
ns = max_ns * count;
@@ -201,11 +204,11 @@ pg_ticks_to_ns(instr_time t)
201204
* Subtract the ticks that we now already accounted for, so that they
202205
* don't get counted twice.
203206
*/
204-
t.ticks -= count * max_ticks_no_overflow;
205-
Assert(t.ticks >= 0);
207+
ticks -= count * max_ticks_no_overflow;
208+
Assert(ticks >= 0);
206209
}
207210

208-
ns += t.ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
211+
ns += ticks * ticks_per_ns_scaled / TICKS_TO_NS_PRECISION;
209212
return ns;
210213
}
211214

@@ -226,14 +229,14 @@ pg_initialize_get_ticks()
226229
#define INSTR_TIME_SET_CURRENT(t) \
227230
((t) = pg_get_ticks())
228231

229-
#define INSTR_TIME_GET_NANOSEC(t) \
230-
pg_ticks_to_ns(t)
231-
232+
#define INSTR_TIME_TICKS_TO_NANOSEC(ticks) \
233+
(pg_ticks_to_ns(ticks))
232234

233235
#else /* WIN32 */
234236

235237

236238
/* Use QueryPerformanceCounter() */
239+
#define PG_INSTR_CLOCK_NAME "QueryPerformanceCounter"
237240

238241
/* helper for INSTR_TIME_SET_CURRENT / INSTR_TIME_SET_CURRENT_FAST */
239242
static inline instr_time
@@ -265,8 +268,8 @@ GetTimerFrequency(void)
265268
#define INSTR_TIME_SET_CURRENT(t) \
266269
((t) = pg_query_performance_counter())
267270

268-
#define INSTR_TIME_GET_NANOSEC(t) \
269-
((int64) ((t).ticks * ((double) NS_PER_S / GetTimerFrequency())))
271+
#define INSTR_TIME_TICKS_TO_NANOSEC(ticks) \
272+
((int64) ((ticks) * ((double) NS_PER_S / GetTimerFrequency())))
270273

271274
#endif /* WIN32 */
272275

@@ -285,9 +288,14 @@ GetTimerFrequency(void)
285288
#define INSTR_TIME_SUBTRACT(x,y) \
286289
((x).ticks -= (y).ticks)
287290

291+
#define INSTR_TIME_DIFF_NANOSEC(x,y) \
292+
(INSTR_TIME_TICKS_TO_NANOSEC((x).ticks - (y).ticks))
293+
288294
#define INSTR_TIME_ACCUM_DIFF(x,y,z) \
289295
((x).ticks += (y).ticks - (z).ticks)
290296

297+
#define INSTR_TIME_GET_NANOSEC(t) \
298+
(INSTR_TIME_TICKS_TO_NANOSEC((t).ticks))
291299

292300
#define INSTR_TIME_GET_DOUBLE(t) \
293301
((double) INSTR_TIME_GET_NANOSEC(t) / NS_PER_S)

0 commit comments

Comments
 (0)