|
19 | 19 |
|
20 | 20 | package org.elasticsearch.rest.support;
|
21 | 21 |
|
22 |
| -import java.io.UnsupportedEncodingException; |
23 |
| -import java.net.URLDecoder; |
24 |
| -import java.nio.charset.UnsupportedCharsetException; |
| 22 | +import org.elasticsearch.common.base.Charsets; |
| 23 | + |
| 24 | +import java.nio.charset.Charset; |
25 | 25 | import java.util.Map;
|
26 | 26 |
|
27 | 27 | /**
|
28 | 28 | * @author kimchy (shay.banon)
|
29 | 29 | */
|
30 | 30 | public class RestUtils {
|
31 | 31 |
|
32 |
| - public static void decodeQueryString(String queryString, int fromIndex, Map<String, String> params) { |
| 32 | + public static void decodeQueryString(String s, int fromIndex, Map<String, String> params) { |
33 | 33 | if (fromIndex < 0) {
|
34 | 34 | return;
|
35 | 35 | }
|
36 |
| - if (fromIndex >= queryString.length()) { |
| 36 | + if (fromIndex >= s.length()) { |
37 | 37 | return;
|
38 | 38 | }
|
39 |
| - int toIndex; |
40 |
| - while ((toIndex = queryString.indexOf('&', fromIndex)) >= 0) { |
41 |
| - int idx = queryString.indexOf('=', fromIndex); |
42 |
| - if (fromIndex < idx && idx < toIndex) { |
43 |
| - params.put(decodeComponent(queryString.substring(fromIndex, idx)), decodeComponent(queryString.substring(idx + 1, toIndex))); |
| 39 | + |
| 40 | + String name = null; |
| 41 | + int pos = fromIndex; // Beginning of the unprocessed region |
| 42 | + int i; // End of the unprocessed region |
| 43 | + char c = 0; // Current character |
| 44 | + for (i = fromIndex; i < s.length(); i++) { |
| 45 | + c = s.charAt(i); |
| 46 | + if (c == '=' && name == null) { |
| 47 | + if (pos != i) { |
| 48 | + name = decodeComponent(s.substring(pos, i)); |
| 49 | + } |
| 50 | + pos = i + 1; |
| 51 | + } else if (c == '&') { |
| 52 | + if (name == null && pos != i) { |
| 53 | + // We haven't seen an `=' so far but moved forward. |
| 54 | + // Must be a param of the form '&a&' so add it with |
| 55 | + // an empty value. |
| 56 | + addParam(params, decodeComponent(s.substring(pos, i)), ""); |
| 57 | + } else if (name != null) { |
| 58 | + addParam(params, name, decodeComponent(s.substring(pos, i))); |
| 59 | + name = null; |
| 60 | + } |
| 61 | + pos = i + 1; |
44 | 62 | }
|
45 |
| - fromIndex = toIndex + 1; |
46 | 63 | }
|
47 |
| - int idx = queryString.indexOf('=', fromIndex); |
48 |
| - if (idx < 0) { |
49 |
| - return; |
| 64 | + |
| 65 | + if (pos != i) { // Are there characters we haven't dealt with? |
| 66 | + if (name == null) { // Yes and we haven't seen any `='. |
| 67 | + addParam(params, decodeComponent(s.substring(pos, i)), ""); |
| 68 | + } else { // Yes and this must be the last value. |
| 69 | + addParam(params, name, decodeComponent(s.substring(pos, i))); |
| 70 | + } |
| 71 | + } else if (name != null) { // Have we seen a name without value? |
| 72 | + addParam(params, name, ""); |
50 | 73 | }
|
51 |
| - params.put(decodeComponent(queryString.substring(fromIndex, idx)), decodeComponent(queryString.substring(idx + 1))); |
52 | 74 | }
|
53 | 75 |
|
54 |
| - public static String decodeComponent(String s) { |
| 76 | + private static void addParam(Map<String, String> params, String name, String value) { |
| 77 | + params.put(name, value); |
| 78 | + } |
| 79 | + |
| 80 | + /** |
| 81 | + * Decodes a bit of an URL encoded by a browser. |
| 82 | + * <p> |
| 83 | + * This is equivalent to calling {@link #decodeComponent(String, Charset)} |
| 84 | + * with the UTF-8 charset (recommended to comply with RFC 3986, Section 2). |
| 85 | + * |
| 86 | + * @param s The string to decode (can be empty). |
| 87 | + * @return The decoded string, or {@code s} if there's nothing to decode. |
| 88 | + * If the string to decode is {@code null}, returns an empty string. |
| 89 | + * @throws IllegalArgumentException if the string contains a malformed |
| 90 | + * escape sequence. |
| 91 | + */ |
| 92 | + public static String decodeComponent(final String s) { |
| 93 | + return decodeComponent(s, Charsets.UTF_8); |
| 94 | + } |
| 95 | + |
| 96 | + /** |
| 97 | + * Decodes a bit of an URL encoded by a browser. |
| 98 | + * <p> |
| 99 | + * The string is expected to be encoded as per RFC 3986, Section 2. |
| 100 | + * This is the encoding used by JavaScript functions {@code encodeURI} |
| 101 | + * and {@code encodeURIComponent}, but not {@code escape}. For example |
| 102 | + * in this encoding, é (in Unicode {@code U+00E9} or in UTF-8 |
| 103 | + * {@code 0xC3 0xA9}) is encoded as {@code %C3%A9} or {@code %c3%a9}. |
| 104 | + * <p> |
| 105 | + * This is essentially equivalent to calling |
| 106 | + * <code>{@link java.net.URLDecoder URLDecoder}.{@link |
| 107 | + * java.net.URLDecoder#decode(String, String)}</code> |
| 108 | + * except that it's over 2x faster and generates less garbage for the GC. |
| 109 | + * Actually this function doesn't allocate any memory if there's nothing |
| 110 | + * to decode, the argument itself is returned. |
| 111 | + * |
| 112 | + * @param s The string to decode (can be empty). |
| 113 | + * @param charset The charset to use to decode the string (should really |
| 114 | + * be {@link Charsets#UTF_8}. |
| 115 | + * @return The decoded string, or {@code s} if there's nothing to decode. |
| 116 | + * If the string to decode is {@code null}, returns an empty string. |
| 117 | + * @throws IllegalArgumentException if the string contains a malformed |
| 118 | + * escape sequence. |
| 119 | + */ |
| 120 | + @SuppressWarnings("fallthrough") |
| 121 | + public static String decodeComponent(final String s, final Charset charset) { |
55 | 122 | if (s == null) {
|
56 | 123 | return "";
|
57 | 124 | }
|
58 |
| - int numChars = s.length(); |
59 |
| - for (int i = 0; i < numChars; i++) { |
60 |
| - // do an initial check if it requires decoding do it and return |
61 |
| - if (s.charAt(i) == '+' || s.charAt(i) == '%') { |
62 |
| - try { |
63 |
| - return URLDecoder.decode(s, "UTF8"); |
64 |
| - } catch (UnsupportedEncodingException e) { |
65 |
| - throw new UnsupportedCharsetException("UTF8"); |
66 |
| - } |
| 125 | + final int size = s.length(); |
| 126 | + boolean modified = false; |
| 127 | + for (int i = 0; i < size; i++) { |
| 128 | + final char c = s.charAt(i); |
| 129 | + switch (c) { |
| 130 | + case '%': |
| 131 | + i++; // We can skip at least one char, e.g. `%%'. |
| 132 | + // Fall through. |
| 133 | + case '+': |
| 134 | + modified = true; |
| 135 | + break; |
67 | 136 | }
|
68 | 137 | }
|
69 |
| - return s; |
| 138 | + if (!modified) { |
| 139 | + return s; |
| 140 | + } |
| 141 | + final byte[] buf = new byte[size]; |
| 142 | + int pos = 0; // position in `buf'. |
| 143 | + for (int i = 0; i < size; i++) { |
| 144 | + char c = s.charAt(i); |
| 145 | + switch (c) { |
| 146 | + case '+': |
| 147 | + buf[pos++] = ' '; // "+" -> " " |
| 148 | + break; |
| 149 | + case '%': |
| 150 | + if (i == size - 1) { |
| 151 | + throw new IllegalArgumentException("unterminated escape" |
| 152 | + + " sequence at end of string: " + s); |
| 153 | + } |
| 154 | + c = s.charAt(++i); |
| 155 | + if (c == '%') { |
| 156 | + buf[pos++] = '%'; // "%%" -> "%" |
| 157 | + break; |
| 158 | + } else if (i == size - 1) { |
| 159 | + throw new IllegalArgumentException("partial escape" |
| 160 | + + " sequence at end of string: " + s); |
| 161 | + } |
| 162 | + c = decodeHexNibble(c); |
| 163 | + final char c2 = decodeHexNibble(s.charAt(++i)); |
| 164 | + if (c == Character.MAX_VALUE || c2 == Character.MAX_VALUE) { |
| 165 | + throw new IllegalArgumentException( |
| 166 | + "invalid escape sequence `%" + s.charAt(i - 1) |
| 167 | + + s.charAt(i) + "' at index " + (i - 2) |
| 168 | + + " of: " + s); |
| 169 | + } |
| 170 | + c = (char) (c * 16 + c2); |
| 171 | + // Fall through. |
| 172 | + default: |
| 173 | + buf[pos++] = (byte) c; |
| 174 | + break; |
| 175 | + } |
| 176 | + } |
| 177 | + return new String(buf, 0, pos, charset); |
| 178 | + } |
| 179 | + |
| 180 | + /** |
| 181 | + * Helper to decode half of a hexadecimal number from a string. |
| 182 | + * |
| 183 | + * @param c The ASCII character of the hexadecimal number to decode. |
| 184 | + * Must be in the range {@code [0-9a-fA-F]}. |
| 185 | + * @return The hexadecimal value represented in the ASCII character |
| 186 | + * given, or {@link Character#MAX_VALUE} if the character is invalid. |
| 187 | + */ |
| 188 | + private static char decodeHexNibble(final char c) { |
| 189 | + if ('0' <= c && c <= '9') { |
| 190 | + return (char) (c - '0'); |
| 191 | + } else if ('a' <= c && c <= 'f') { |
| 192 | + return (char) (c - 'a' + 10); |
| 193 | + } else if ('A' <= c && c <= 'F') { |
| 194 | + return (char) (c - 'A' + 10); |
| 195 | + } else { |
| 196 | + return Character.MAX_VALUE; |
| 197 | + } |
70 | 198 | }
|
71 | 199 | }
|
0 commit comments