Skip to content

Commit 26fd70e

Browse files
committed
Merge pull request AsyncHttpClient#357 from chadselph/fix-utf8
Fix utf8 URL encoding
2 parents 1d965d7 + 95877bd commit 26fd70e

File tree

2 files changed

+21
-6
lines changed

2 files changed

+21
-6
lines changed

src/main/java/com/ning/http/util/UTF8UrlEncoder.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ public class UTF8UrlEncoder {
2424

2525
/**
2626
* Encoding table used for figuring out ascii characters that must be escaped
27-
* (all non-Ascii characers need to be encoded anyway)
27+
* (all non-Ascii characters need to be encoded anyway)
2828
*/
2929
private final static int[] SAFE_ASCII = new int[128];
3030

@@ -58,11 +58,11 @@ public static String encode(String input) {
5858
public static StringBuilder appendEncoded(StringBuilder sb, String input) {
5959
final int[] safe = SAFE_ASCII;
6060

61-
for (int i = 0, len = input.length(); i < len; ++i) {
62-
char c = input.charAt(i);
61+
for (int c, i = 0, len = input.length(); i < len; i+= Character.charCount(c)) {
62+
c = input.codePointAt(i);
6363
if (c <= 127) {
6464
if (safe[c] != 0) {
65-
sb.append(c);
65+
sb.append((char) c);
6666
} else {
6767
appendSingleByteEncoded(sb, c);
6868
}
@@ -86,14 +86,18 @@ private final static void appendSingleByteEncoded(StringBuilder sb, int value) {
8686
}
8787

8888
private final static void appendMultiByteEncoded(StringBuilder sb, int value) {
89-
// two or three bytes? (ignoring surrogate pairs for now, which would yield 4 bytes)
9089
if (value < 0x800) {
9190
appendSingleByteEncoded(sb, (0xc0 | (value >> 6)));
9291
appendSingleByteEncoded(sb, (0x80 | (value & 0x3f)));
93-
} else {
92+
} else if (value < 0x10000) {
9493
appendSingleByteEncoded(sb, (0xe0 | (value >> 12)));
9594
appendSingleByteEncoded(sb, (0x80 | ((value >> 6) & 0x3f)));
9695
appendSingleByteEncoded(sb, (0x80 | (value & 0x3f)));
96+
} else {
97+
appendSingleByteEncoded(sb, (0xf0 | (value >> 18)));
98+
appendSingleByteEncoded(sb, (0x80 | (value >> 12) & 0x3f));
99+
appendSingleByteEncoded(sb, (0x80 | (value >> 6) & 0x3f));
100+
appendSingleByteEncoded(sb, (0x80 | (value & 0x3f)));
97101
}
98102
}
99103

src/test/java/com/ning/http/util/TestUTF8UrlCodec.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,15 @@ public void testBasics()
2727
Assert.assertEquals(UTF8UrlEncoder.encode("a&b"), "a%26b");
2828
Assert.assertEquals(UTF8UrlEncoder.encode("a+b"), "a%2Bb");
2929
}
30+
31+
@Test(groups="fast")
32+
public void testNonBmp()
33+
{
34+
// Plane 1
35+
Assert.assertEquals(UTF8UrlEncoder.encode("\uD83D\uDCA9"), "%F0%9F%92%A9");
36+
// Plane 2
37+
Assert.assertEquals(UTF8UrlEncoder.encode("\ud84c\uddc8 \ud84f\udfef"), "%F0%A3%87%88%20%F0%A3%BF%AF");
38+
// Plane 15
39+
Assert.assertEquals(UTF8UrlEncoder.encode("\udb80\udc01"), "%F3%B0%80%81");
40+
}
3041
}

0 commit comments

Comments
 (0)