Skip to content

Commit fb46f6b

Browse files
committed
build on aferreira improvements to handle empty parameters with no =, and improve component decoding logic
1 parent 33ae7cb commit fb46f6b

File tree

3 files changed

+166
-34
lines changed

3 files changed

+166
-34
lines changed

.idea/dictionaries/kimchy.xml

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

modules/elasticsearch/src/main/java/org/elasticsearch/rest/support/RestUtils.java

Lines changed: 154 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,53 +19,181 @@
1919

2020
package org.elasticsearch.rest.support;
2121

22-
import java.io.UnsupportedEncodingException;
23-
import java.net.URLDecoder;
24-
import java.nio.charset.UnsupportedCharsetException;
22+
import org.elasticsearch.common.base.Charsets;
23+
24+
import java.nio.charset.Charset;
2525
import java.util.Map;
2626

2727
/**
2828
* @author kimchy (shay.banon)
2929
*/
3030
public class RestUtils {
3131

32-
public static void decodeQueryString(String queryString, int fromIndex, Map<String, String> params) {
32+
public static void decodeQueryString(String s, int fromIndex, Map<String, String> params) {
3333
if (fromIndex < 0) {
3434
return;
3535
}
36-
if (fromIndex >= queryString.length()) {
36+
if (fromIndex >= s.length()) {
3737
return;
3838
}
39-
int toIndex;
40-
while ((toIndex = queryString.indexOf('&', fromIndex)) >= 0) {
41-
int idx = queryString.indexOf('=', fromIndex);
42-
if (fromIndex < idx && idx < toIndex) {
43-
params.put(decodeComponent(queryString.substring(fromIndex, idx)), decodeComponent(queryString.substring(idx + 1, toIndex)));
39+
40+
String name = null;
41+
int pos = fromIndex; // Beginning of the unprocessed region
42+
int i; // End of the unprocessed region
43+
char c = 0; // Current character
44+
for (i = fromIndex; i < s.length(); i++) {
45+
c = s.charAt(i);
46+
if (c == '=' && name == null) {
47+
if (pos != i) {
48+
name = decodeComponent(s.substring(pos, i));
49+
}
50+
pos = i + 1;
51+
} else if (c == '&') {
52+
if (name == null && pos != i) {
53+
// We haven't seen an `=' so far but moved forward.
54+
// Must be a param of the form '&a&' so add it with
55+
// an empty value.
56+
addParam(params, decodeComponent(s.substring(pos, i)), "");
57+
} else if (name != null) {
58+
addParam(params, name, decodeComponent(s.substring(pos, i)));
59+
name = null;
60+
}
61+
pos = i + 1;
4462
}
45-
fromIndex = toIndex + 1;
4663
}
47-
int idx = queryString.indexOf('=', fromIndex);
48-
if (idx < 0) {
49-
return;
64+
65+
if (pos != i) { // Are there characters we haven't dealt with?
66+
if (name == null) { // Yes and we haven't seen any `='.
67+
addParam(params, decodeComponent(s.substring(pos, i)), "");
68+
} else { // Yes and this must be the last value.
69+
addParam(params, name, decodeComponent(s.substring(pos, i)));
70+
}
71+
} else if (name != null) { // Have we seen a name without value?
72+
addParam(params, name, "");
5073
}
51-
params.put(decodeComponent(queryString.substring(fromIndex, idx)), decodeComponent(queryString.substring(idx + 1)));
5274
}
5375

54-
public static String decodeComponent(String s) {
76+
private static void addParam(Map<String, String> params, String name, String value) {
77+
params.put(name, value);
78+
}
79+
80+
/**
81+
* Decodes a bit of an URL encoded by a browser.
82+
* <p>
83+
* This is equivalent to calling {@link #decodeComponent(String, Charset)}
84+
* with the UTF-8 charset (recommended to comply with RFC 3986, Section 2).
85+
*
86+
* @param s The string to decode (can be empty).
87+
* @return The decoded string, or {@code s} if there's nothing to decode.
88+
* If the string to decode is {@code null}, returns an empty string.
89+
* @throws IllegalArgumentException if the string contains a malformed
90+
* escape sequence.
91+
*/
92+
public static String decodeComponent(final String s) {
93+
return decodeComponent(s, Charsets.UTF_8);
94+
}
95+
96+
/**
97+
* Decodes a bit of an URL encoded by a browser.
98+
* <p>
99+
* The string is expected to be encoded as per RFC 3986, Section 2.
100+
* This is the encoding used by JavaScript functions {@code encodeURI}
101+
* and {@code encodeURIComponent}, but not {@code escape}. For example
102+
* in this encoding, &eacute; (in Unicode {@code U+00E9} or in UTF-8
103+
* {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}.
104+
* <p>
105+
* This is essentially equivalent to calling
106+
* <code>{@link java.net.URLDecoder URLDecoder}.{@link
107+
* java.net.URLDecoder#decode(String, String)}</code>
108+
* except that it's over 2x faster and generates less garbage for the GC.
109+
* Actually this function doesn't allocate any memory if there's nothing
110+
* to decode, the argument itself is returned.
111+
*
112+
* @param s The string to decode (can be empty).
113+
* @param charset The charset to use to decode the string (should really
114+
* be {@link Charsets#UTF_8}.
115+
* @return The decoded string, or {@code s} if there's nothing to decode.
116+
* If the string to decode is {@code null}, returns an empty string.
117+
* @throws IllegalArgumentException if the string contains a malformed
118+
* escape sequence.
119+
*/
120+
@SuppressWarnings("fallthrough")
121+
public static String decodeComponent(final String s, final Charset charset) {
55122
if (s == null) {
56123
return "";
57124
}
58-
int numChars = s.length();
59-
for (int i = 0; i < numChars; i++) {
60-
// do an initial check if it requires decoding do it and return
61-
if (s.charAt(i) == '+' || s.charAt(i) == '%') {
62-
try {
63-
return URLDecoder.decode(s, "UTF8");
64-
} catch (UnsupportedEncodingException e) {
65-
throw new UnsupportedCharsetException("UTF8");
66-
}
125+
final int size = s.length();
126+
boolean modified = false;
127+
for (int i = 0; i < size; i++) {
128+
final char c = s.charAt(i);
129+
switch (c) {
130+
case '%':
131+
i++; // We can skip at least one char, e.g. `%%'.
132+
// Fall through.
133+
case '+':
134+
modified = true;
135+
break;
67136
}
68137
}
69-
return s;
138+
if (!modified) {
139+
return s;
140+
}
141+
final byte[] buf = new byte[size];
142+
int pos = 0; // position in `buf'.
143+
for (int i = 0; i < size; i++) {
144+
char c = s.charAt(i);
145+
switch (c) {
146+
case '+':
147+
buf[pos++] = ' '; // "+" -> " "
148+
break;
149+
case '%':
150+
if (i == size - 1) {
151+
throw new IllegalArgumentException("unterminated escape"
152+
+ " sequence at end of string: " + s);
153+
}
154+
c = s.charAt(++i);
155+
if (c == '%') {
156+
buf[pos++] = '%'; // "%%" -> "%"
157+
break;
158+
} else if (i == size - 1) {
159+
throw new IllegalArgumentException("partial escape"
160+
+ " sequence at end of string: " + s);
161+
}
162+
c = decodeHexNibble(c);
163+
final char c2 = decodeHexNibble(s.charAt(++i));
164+
if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) {
165+
throw new IllegalArgumentException(
166+
"invalid escape sequence `%" + s.charAt(i - 1)
167+
+ s.charAt(i) + "' at index " + (i - 2)
168+
+ " of: " + s);
169+
}
170+
c = (char) (c * 16 + c2);
171+
// Fall through.
172+
default:
173+
buf[pos++] = (byte) c;
174+
break;
175+
}
176+
}
177+
return new String(buf, 0, pos, charset);
178+
}
179+
180+
/**
181+
* Helper to decode half of a hexadecimal number from a string.
182+
*
183+
* @param c The ASCII character of the hexadecimal number to decode.
184+
* Must be in the range {@code [0-9a-fA-F]}.
185+
* @return The hexadecimal value represented in the ASCII character
186+
* given, or {@link Character#MAX_VALUE} if the character is invalid.
187+
*/
188+
private static char decodeHexNibble(final char c) {
189+
if ('0' <= c && c <= '9') {
190+
return (char) (c - '0');
191+
} else if ('a' <= c && c <= 'f') {
192+
return (char) (c - 'a' + 10);
193+
} else if ('A' <= c && c <= 'F') {
194+
return (char) (c - 'A' + 10);
195+
} else {
196+
return Character.MAX_VALUE;
197+
}
70198
}
71199
}

modules/elasticsearch/src/test/java/org/elasticsearch/rest/util/RestUtilsTests.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,37 +84,40 @@ public void testDecodeQueryStringEdgeCases() {
8484
params.clear();
8585
uri = "something?=";
8686
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
87-
assertThat(params.size(), equalTo(1));
88-
assertThat(params.get(""), equalTo(""));
87+
assertThat(params.size(), equalTo(0));
8988

9089
params.clear();
9190
uri = "something?&=";
9291
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
93-
assertThat(params.size(), equalTo(1));
94-
assertThat(params.get(""), equalTo(""));
92+
assertThat(params.size(), equalTo(0));
9593

9694
params.clear();
9795
uri = "something?a";
9896
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
99-
assertThat(params.size(), equalTo(0));
97+
assertThat(params.size(), equalTo(1));
98+
assertThat(params.get("a"), equalTo(""));
10099

101100
params.clear();
102101
uri = "something?p=v&a";
103102
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
104-
assertThat(params.size(), equalTo(1));
103+
assertThat(params.size(), equalTo(2));
104+
assertThat(params.get("a"), equalTo(""));
105105
assertThat(params.get("p"), equalTo("v"));
106106

107107
params.clear();
108108
uri = "something?p=v&a&p1=v1";
109109
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
110-
assertThat(params.size(), equalTo(2));
110+
assertThat(params.size(), equalTo(3));
111+
assertThat(params.get("a"), equalTo(""));
111112
assertThat(params.get("p"), equalTo("v"));
112113
assertThat(params.get("p1"), equalTo("v1"));
113114

114115
params.clear();
115116
uri = "something?p=v&a&b&p1=v1";
116117
RestUtils.decodeQueryString(uri, uri.indexOf('?') + 1, params);
117-
assertThat(params.size(), equalTo(2));
118+
assertThat(params.size(), equalTo(4));
119+
assertThat(params.get("a"), equalTo(""));
120+
assertThat(params.get("b"), equalTo(""));
118121
assertThat(params.get("p"), equalTo("v"));
119122
assertThat(params.get("p1"), equalTo("v1"));
120123
}

0 commit comments

Comments
 (0)