Skip to content

Commit f5896f3

Browse files
kimchyMatt Hartzler
authored andcommitted
add and/not/or docid sets, not just docsets, and improve caching behavior
1 parent 660d036 commit f5896f3

File tree

9 files changed

+492
-22
lines changed

9 files changed

+492
-22
lines changed
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* Licensed to Elastic Search and Shay Banon under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. Elastic Search licenses this
6+
* file to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.common.lucene.docset;
21+
22+
import org.apache.lucene.search.DocIdSet;
23+
import org.apache.lucene.search.DocIdSetIterator;
24+
25+
import java.io.IOException;
26+
import java.util.List;
27+
28+
/**
29+
* @author kimchy (shay.banon)
30+
*/
31+
public class AndDocIdSet extends DocIdSet {
32+
33+
private final List<DocIdSet> sets;
34+
35+
public AndDocIdSet(List<DocIdSet> sets) {
36+
this.sets = sets;
37+
}
38+
39+
@Override public boolean isCacheable() {
40+
// not cacheable, the reason is that by default, when constructing the filter, it is not cacheable,
41+
// so if someone wants it to be cacheable, we might as well construct a cached version of the result
42+
return false;
43+
// for (DocIdSet set : sets) {
44+
// if (!set.isCacheable()) {
45+
// return false;
46+
// }
47+
// }
48+
// return true;
49+
}
50+
51+
@Override public DocIdSetIterator iterator() throws IOException {
52+
return new AndDocIdSetIterator();
53+
}
54+
55+
class AndDocIdSetIterator extends DocIdSetIterator {
56+
int lastReturn = -1;
57+
private DocIdSetIterator[] iterators = null;
58+
59+
AndDocIdSetIterator() throws IOException {
60+
iterators = new DocIdSetIterator[sets.size()];
61+
int j = 0;
62+
for (DocIdSet set : sets) {
63+
if (set != null) {
64+
DocIdSetIterator dcit = set.iterator();
65+
iterators[j++] = dcit;
66+
}
67+
}
68+
lastReturn = (iterators.length > 0 ? -1 : DocIdSetIterator.NO_MORE_DOCS);
69+
}
70+
71+
@Override
72+
public final int docID() {
73+
return lastReturn;
74+
}
75+
76+
@Override
77+
public final int nextDoc() throws IOException {
78+
79+
if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS;
80+
81+
DocIdSetIterator dcit = iterators[0];
82+
int target = dcit.nextDoc();
83+
int size = iterators.length;
84+
int skip = 0;
85+
int i = 1;
86+
while (i < size) {
87+
if (i != skip) {
88+
dcit = iterators[i];
89+
int docid = dcit.advance(target);
90+
if (docid > target) {
91+
target = docid;
92+
if (i != 0) {
93+
skip = i;
94+
i = 0;
95+
continue;
96+
} else
97+
skip = 0;
98+
}
99+
}
100+
i++;
101+
}
102+
return (lastReturn = target);
103+
}
104+
105+
@Override
106+
public final int advance(int target) throws IOException {
107+
108+
if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) return DocIdSetIterator.NO_MORE_DOCS;
109+
110+
DocIdSetIterator dcit = iterators[0];
111+
target = dcit.advance(target);
112+
int size = iterators.length;
113+
int skip = 0;
114+
int i = 1;
115+
while (i < size) {
116+
if (i != skip) {
117+
dcit = iterators[i];
118+
int docid = dcit.advance(target);
119+
if (docid > target) {
120+
target = docid;
121+
if (i != 0) {
122+
skip = i;
123+
i = 0;
124+
continue;
125+
} else {
126+
skip = 0;
127+
}
128+
}
129+
}
130+
i++;
131+
}
132+
return (lastReturn = target);
133+
}
134+
}
135+
}

modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/docset/AndDocSet.java

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,15 @@ public AndDocSet(List<DocSet> sets) {
4444
}
4545

4646
@Override public boolean isCacheable() {
47-
for (DocSet set : sets) {
48-
if (!set.isCacheable()) {
49-
return false;
50-
}
51-
}
52-
return true;
47+
// not cacheable, the reason is that by default, when constructing the filter, it is not cacheable,
48+
// so if someone wants it to be cacheable, we might as well construct a cached version of the result
49+
return false;
50+
// for (DocSet set : sets) {
51+
// if (!set.isCacheable()) {
52+
// return false;
53+
// }
54+
// }
55+
// return true;
5356
}
5457

5558
@Override public DocIdSetIterator iterator() throws IOException {
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/*
2+
* Licensed to Elastic Search and Shay Banon under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. Elastic Search licenses this
6+
* file to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.common.lucene.docset;
21+
22+
import org.apache.lucene.search.DocIdSet;
23+
import org.apache.lucene.search.DocIdSetIterator;
24+
25+
import java.io.IOException;
26+
27+
/**
28+
* @author kimchy (shay.banon)
29+
*/
30+
public class NotDocIdSet extends DocIdSet {
31+
32+
private final DocIdSet set;
33+
34+
private final int max;
35+
36+
public NotDocIdSet(DocIdSet set, int max) {
37+
this.max = max;
38+
this.set = set;
39+
}
40+
41+
@Override public boolean isCacheable() {
42+
// not cacheable, the reason is that by default, when constructing the filter, it is not cacheable,
43+
// so if someone wants it to be cacheable, we might as well construct a cached version of the result
44+
return false;
45+
// return set.isCacheable();
46+
}
47+
48+
@Override public DocIdSetIterator iterator() throws IOException {
49+
return new NotDocIdSetIterator();
50+
}
51+
52+
class NotDocIdSetIterator extends DocIdSetIterator {
53+
int lastReturn = -1;
54+
private DocIdSetIterator it1 = null;
55+
private int innerDocid = -1;
56+
57+
NotDocIdSetIterator() throws IOException {
58+
initialize();
59+
}
60+
61+
private void initialize() throws IOException {
62+
it1 = set.iterator();
63+
64+
if ((innerDocid = it1.nextDoc()) == DocIdSetIterator.NO_MORE_DOCS) it1 = null;
65+
}
66+
67+
@Override
68+
public int docID() {
69+
return lastReturn;
70+
}
71+
72+
@Override
73+
public int nextDoc() throws IOException {
74+
return advance(0);
75+
}
76+
77+
@Override
78+
public int advance(int target) throws IOException {
79+
80+
if (lastReturn == DocIdSetIterator.NO_MORE_DOCS) {
81+
return DocIdSetIterator.NO_MORE_DOCS;
82+
}
83+
84+
if (target <= lastReturn) target = lastReturn + 1;
85+
86+
if (it1 != null && innerDocid < target) {
87+
if ((innerDocid = it1.advance(target)) == DocIdSetIterator.NO_MORE_DOCS) {
88+
it1 = null;
89+
}
90+
}
91+
92+
while (it1 != null && innerDocid == target) {
93+
target++;
94+
if (target >= max) {
95+
return (lastReturn = DocIdSetIterator.NO_MORE_DOCS);
96+
}
97+
if ((innerDocid = it1.advance(target)) == DocIdSetIterator.NO_MORE_DOCS) {
98+
it1 = null;
99+
}
100+
}
101+
102+
// ADDED THIS, bug in original code
103+
if (target >= max) {
104+
return (lastReturn = DocIdSetIterator.NO_MORE_DOCS);
105+
}
106+
107+
return (lastReturn = target);
108+
}
109+
}
110+
}

modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/docset/NotDocSet.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@ public NotDocSet(DocSet set, int max) {
3434
}
3535

3636
@Override public boolean isCacheable() {
37-
return set.isCacheable();
37+
// not cacheable, the reason is that by default, when constructing the filter, it is not cacheable,
38+
// so if someone wants it to be cacheable, we might as well construct a cached version of the result
39+
return false;
40+
// return set.isCacheable();
3841
}
3942

4043
@Override public boolean get(int doc) throws IOException {

0 commit comments

Comments
 (0)