Skip to content

Commit 28ecd66

Browse files
committed
Basic benchmark (accuracy & performance) for percentile aggregations.
1 parent 27c5dba commit 28ecd66

File tree

1 file changed

+218
-0
lines changed

1 file changed

+218
-0
lines changed
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.benchmark.search.aggregations;
21+
22+
import com.google.common.collect.ImmutableMap;
23+
import com.google.common.collect.Maps;
24+
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
25+
import org.elasticsearch.action.bulk.BulkRequestBuilder;
26+
import org.elasticsearch.action.bulk.BulkResponse;
27+
import org.elasticsearch.action.search.SearchResponse;
28+
import org.elasticsearch.action.search.SearchType;
29+
import org.elasticsearch.client.Client;
30+
import org.elasticsearch.common.StopWatch;
31+
import org.elasticsearch.common.settings.Settings;
32+
import org.elasticsearch.common.unit.SizeValue;
33+
import org.elasticsearch.common.unit.TimeValue;
34+
import org.elasticsearch.common.xcontent.XContentBuilder;
35+
import org.elasticsearch.common.xcontent.json.JsonXContent;
36+
import org.elasticsearch.node.Node;
37+
import org.elasticsearch.search.aggregations.metrics.percentile.Percentiles;
38+
import org.elasticsearch.search.aggregations.metrics.percentile.Percentiles.ExecutionHint;
39+
import org.elasticsearch.search.aggregations.metrics.percentile.Percentiles.Percentile;
40+
41+
import java.util.*;
42+
import java.util.concurrent.TimeUnit;
43+
44+
import static org.elasticsearch.client.Requests.createIndexRequest;
45+
import static org.elasticsearch.client.Requests.getRequest;
46+
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
47+
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
48+
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
49+
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
50+
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
51+
import static org.elasticsearch.search.aggregations.AggregationBuilders.percentiles;
52+
53+
public class PercentileAggregationSearchBenchmark {
54+
55+
private static final int AMPLITUDE = 10000;
56+
private static final Map<String, ExecutionHint> EXECUTION_HINTS = ImmutableMap.of(
57+
"frugal", ExecutionHint.frugal(),
58+
"qdigest", ExecutionHint.qDigest(),
59+
"tdigest", ExecutionHint.tDigest());
60+
private static final int NUM_DOCS = (int) SizeValue.parseSizeValue("1m").singles();
61+
private static final int BATCH = 100;
62+
private static final String CLUSTER_NAME = PercentileAggregationSearchBenchmark.class.getSimpleName();
63+
private static final double[] PERCENTILES = new double[] { 0, 0.01, 0.1, 1, 10, 25, 50, 75, 90, 99, 99.9, 99.99, 100};
64+
private static final int QUERY_WARMUP = 10;
65+
private static final int QUERY_COUNT = 20;
66+
67+
private static Random R = new Random(0);
68+
69+
// we generate ints to not disadvantage qdigest which only works with integers
70+
private enum Distribution {
71+
UNIFORM {
72+
@Override
73+
int next() {
74+
return (int) (R.nextDouble() * AMPLITUDE);
75+
}
76+
},
77+
GAUSS {
78+
@Override
79+
int next() {
80+
return (int) (R.nextDouble() * AMPLITUDE);
81+
}
82+
},
83+
LOG_NORMAL {
84+
@Override
85+
int next() {
86+
return (int) Math.exp(R.nextDouble() * Math.log(AMPLITUDE));
87+
}
88+
};
89+
String indexName() {
90+
return name().toLowerCase(Locale.ROOT);
91+
}
92+
abstract int next();
93+
}
94+
95+
private static double accuratePercentile(double percentile, int[] sortedValues) {
96+
final double index = percentile / 100 * (sortedValues.length - 1);
97+
final int intIndex = (int) index;
98+
final double delta = index - intIndex;
99+
if (delta == 0) {
100+
return sortedValues[intIndex];
101+
} else {
102+
return sortedValues[intIndex] * (1 - delta) + sortedValues[intIndex + 1] * delta;
103+
}
104+
}
105+
106+
public static void main(String[] args) throws Exception {
107+
Settings settings = settingsBuilder()
108+
.put("index.refresh_interval", "-1")
109+
.put(SETTING_NUMBER_OF_SHARDS, 100) // to also test performance and accuracy of the reduce phase
110+
.put(SETTING_NUMBER_OF_REPLICAS, 0)
111+
.build();
112+
113+
Node[] nodes = new Node[1];
114+
for (int i = 0; i < nodes.length; i++) {
115+
nodes[i] = nodeBuilder().clusterName(CLUSTER_NAME)
116+
.settings(settingsBuilder().put(settings).put("name", "node" + i))
117+
.node();
118+
}
119+
120+
Node clientNode = nodeBuilder()
121+
.clusterName(CLUSTER_NAME)
122+
.settings(settingsBuilder().put(settings).put("name", "client")).client(true).node();
123+
124+
Client client = clientNode.client();
125+
126+
for (Distribution d : Distribution.values()) {
127+
try {
128+
client.admin().indices().create(createIndexRequest(d.indexName()).settings(settings)).actionGet();
129+
} catch (Exception e) {
130+
System.out.println("Index " + d.indexName() + " already exists, skipping index creation");
131+
continue;
132+
}
133+
134+
final int[] values = new int[NUM_DOCS];
135+
for (int i = 0; i < NUM_DOCS; ++i) {
136+
values[i] = d.next();
137+
}
138+
System.out.println("Indexing " + NUM_DOCS + " documents into " + d.indexName());
139+
StopWatch stopWatch = new StopWatch().start();
140+
for (int i = 0; i < NUM_DOCS; ) {
141+
BulkRequestBuilder request = client.prepareBulk();
142+
for (int j = 0; j < BATCH && i < NUM_DOCS; ++j) {
143+
request.add(client.prepareIndex(d.indexName(), "values", Integer.toString(i)).setSource("v", values[i]));
144+
++i;
145+
}
146+
BulkResponse response = request.execute().actionGet();
147+
if (response.hasFailures()) {
148+
System.err.println("--> failures...");
149+
System.err.println(response.buildFailureMessage());
150+
}
151+
if ((i % 100000) == 0) {
152+
System.out.println("--> Indexed " + i + " took " + stopWatch.stop().lastTaskTime());
153+
stopWatch.start();
154+
}
155+
}
156+
Arrays.sort(values);
157+
XContentBuilder builder = JsonXContent.contentBuilder().startObject();
158+
for (double percentile : PERCENTILES) {
159+
builder.field(Double.toString(percentile), accuratePercentile(percentile, values));
160+
}
161+
client.prepareIndex(d.indexName(), "values", "percentiles").setSource(builder.endObject()).execute().actionGet();
162+
client.admin().indices().prepareRefresh(d.indexName()).execute().actionGet();
163+
}
164+
165+
ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet();
166+
if (clusterHealthResponse.isTimedOut()) {
167+
System.err.println("--> Timed out waiting for cluster health");
168+
}
169+
170+
System.out.println("## Precision");
171+
for (Distribution d : Distribution.values()) {
172+
System.out.println("#### " + d);
173+
final long count = client.prepareCount(d.indexName()).setQuery(matchAllQuery()).execute().actionGet().getCount();
174+
if (count != NUM_DOCS + 1) {
175+
throw new Error("Expected " + NUM_DOCS + " documents, got " + (count - 1));
176+
}
177+
Map<String, Object> percentilesUnsorted = client.get(getRequest(d.indexName()).type("values").id("percentiles")).actionGet().getSourceAsMap();
178+
SortedMap<Double, Double> percentiles = Maps.newTreeMap();
179+
for (Map.Entry<String, Object> entry : percentilesUnsorted.entrySet()) {
180+
percentiles.put(Double.parseDouble(entry.getKey()), (Double) entry.getValue());
181+
}
182+
System.out.println("Expected percentiles: " + percentiles);
183+
System.out.println();
184+
for (Map.Entry<String, ExecutionHint> executionHint : EXECUTION_HINTS.entrySet()) {
185+
SearchResponse resp = client.prepareSearch(d.indexName()).setSearchType(SearchType.COUNT).addAggregation(percentiles("pcts").executionHint(executionHint.getValue()).field("v").percentiles(PERCENTILES)).execute().actionGet();
186+
Percentiles pcts = resp.getAggregations().get("pcts");
187+
Map<Double, Double> asMap = Maps.newLinkedHashMap();
188+
double sumOfErrorSquares = 0;
189+
for (Percentile percentile : pcts) {
190+
asMap.put(percentile.getPercent(), percentile.getValue());
191+
double error = percentile.getValue() - percentiles.get(percentile.getPercent());
192+
sumOfErrorSquares += error * error;
193+
}
194+
System.out.println(executionHint.getKey() + ": " + asMap);
195+
System.out.println("Sum of error squares: " + sumOfErrorSquares);
196+
System.out.println();
197+
}
198+
}
199+
200+
System.out.println("## Performance");
201+
for (int i = 0; i < 3; ++i) {
202+
for (Distribution d : Distribution.values()) {
203+
System.out.println("#### " + d);
204+
for (Map.Entry<String, ExecutionHint> executionHint : EXECUTION_HINTS.entrySet()) {
205+
for (int j = 0; j < QUERY_WARMUP; ++j) {
206+
client.prepareSearch(d.indexName()).setSearchType(SearchType.COUNT).addAggregation(percentiles("pcts").executionHint(executionHint.getValue()).field("v").percentiles(PERCENTILES)).execute().actionGet();
207+
}
208+
long start = System.nanoTime();
209+
for (int j = 0; j < QUERY_COUNT; ++j) {
210+
client.prepareSearch(d.indexName()).setSearchType(SearchType.COUNT).addAggregation(percentiles("pcts").executionHint(executionHint.getValue()).field("v").percentiles(PERCENTILES)).execute().actionGet();
211+
}
212+
System.out.println(executionHint.getKey() + ": " + new TimeValue((System.nanoTime() - start) / QUERY_COUNT, TimeUnit.NANOSECONDS));
213+
}
214+
}
215+
}
216+
}
217+
218+
}

0 commit comments

Comments
 (0)