Skip to content

Commit 6f125c7

Browse files
committed
Extract UTF-8 decoding logic
1 parent 9566dfb commit 6f125c7

File tree

2 files changed

+84
-65
lines changed

2 files changed

+84
-65
lines changed

client/src/main/java/org/asynchttpclient/util/Utf8ByteBufDecoder.java

Lines changed: 2 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -18,83 +18,20 @@
1818

1919
import java.nio.charset.CharacterCodingException;
2020

21-
public class Utf8ByteBufDecoder {
21+
public class Utf8ByteBufDecoder extends Utf8Decoder {
2222

2323
private static final FastThreadLocal<Utf8ByteBufDecoder> DECODERS = new FastThreadLocal<Utf8ByteBufDecoder>() {
2424
protected Utf8ByteBufDecoder initialValue() {
2525
return new Utf8ByteBufDecoder();
2626
};
2727
};
28-
28+
2929
public static Utf8ByteBufDecoder getCachedDecoder() {
3030
Utf8ByteBufDecoder cached = DECODERS.get();
3131
cached.reset();
3232
return cached;
3333
}
3434

35-
private static final byte[] TYPES = new byte[] {//
36-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
37-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
38-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
39-
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
40-
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,/**/
41-
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,/**/
42-
8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,/**/
43-
10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 /**/
44-
};
45-
46-
private static final byte[] STATES = new byte[] {//
47-
0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,/**/
48-
12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12,/**/
49-
12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,/**/
50-
12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,/**/
51-
12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 //
52-
};
53-
54-
private static final int UTF8_ACCEPT = 0;
55-
private static final int UTF8_REJECT = 12;
56-
57-
private StringBuilder sb = new StringBuilder();
58-
private int state = UTF8_ACCEPT;
59-
private int codePoint = 0;
60-
61-
private void write(byte b) throws CharacterCodingException {
62-
int t = TYPES[b & 0xFF];
63-
64-
codePoint = state != UTF8_ACCEPT ? (b & 0x3f) | (codePoint << 6) : (0xff >> t) & b;
65-
state = STATES[state + t];
66-
67-
if (state == UTF8_ACCEPT) {
68-
if (codePoint < Character.MIN_HIGH_SURROGATE) {
69-
sb.append((char) codePoint);
70-
} else {
71-
appendCodePointChars();
72-
}
73-
} else if (state == UTF8_REJECT) {
74-
throw new CharacterCodingException();
75-
}
76-
}
77-
78-
private void appendCodePointChars() {
79-
if (Character.isBmpCodePoint(codePoint)) {
80-
sb.append((char) codePoint);
81-
82-
} else if (Character.isValidCodePoint(codePoint)) {
83-
char charIndexPlus1 = Character.lowSurrogate(codePoint);
84-
char charIndex = Character.highSurrogate(codePoint);
85-
sb.append(charIndex).append(charIndexPlus1);
86-
87-
} else {
88-
throw new IllegalArgumentException();
89-
}
90-
}
91-
92-
public void reset() {
93-
sb.setLength(0);
94-
state = UTF8_ACCEPT;
95-
codePoint = 0;
96-
}
97-
9835
public String decode(Iterable<ByteBuf> bufs) throws CharacterCodingException {
9936

10037
for (ByteBuf buf : bufs) {
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Copyright (c) 2016 AsyncHttpClient Project. All rights reserved.
3+
*
4+
* This program is licensed to you under the Apache License Version 2.0,
5+
* and you may not use this file except in compliance with the Apache License Version 2.0.
6+
* You may obtain a copy of the Apache License Version 2.0 at
7+
* http://www.apache.org/licenses/LICENSE-2.0.
8+
*
9+
* Unless required by applicable law or agreed to in writing,
10+
* software distributed under the Apache License Version 2.0 is distributed on an
11+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the Apache License Version 2.0 for the specific language governing permissions and limitations there under.
13+
*/
14+
package org.asynchttpclient.util;
15+
16+
import java.nio.charset.CharacterCodingException;
17+
18+
public abstract class Utf8Decoder {
19+
20+
private static final byte[] TYPES = new byte[] {//
21+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
22+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
23+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
24+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,/**/
25+
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,/**/
26+
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,/**/
27+
8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,/**/
28+
10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 /**/
29+
};
30+
31+
private static final byte[] STATES = new byte[] {//
32+
0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,/**/
33+
12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12,/**/
34+
12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,/**/
35+
12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,/**/
36+
12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12 //
37+
};
38+
39+
protected static final int UTF8_ACCEPT = 0;
40+
protected static final int UTF8_REJECT = 12;
41+
42+
protected StringBuilder sb = new StringBuilder();
43+
protected int state = UTF8_ACCEPT;
44+
private int codePoint = 0;
45+
46+
protected void write(byte b) throws CharacterCodingException {
47+
int t = TYPES[b & 0xFF];
48+
49+
codePoint = state != UTF8_ACCEPT ? (b & 0x3f) | (codePoint << 6) : (0xff >> t) & b;
50+
state = STATES[state + t];
51+
52+
if (state == UTF8_ACCEPT) {
53+
if (codePoint < Character.MIN_HIGH_SURROGATE) {
54+
sb.append((char) codePoint);
55+
} else {
56+
appendCodePointChars();
57+
}
58+
} else if (state == UTF8_REJECT) {
59+
throw new CharacterCodingException();
60+
}
61+
}
62+
63+
private void appendCodePointChars() {
64+
if (Character.isBmpCodePoint(codePoint)) {
65+
sb.append((char) codePoint);
66+
67+
} else if (Character.isValidCodePoint(codePoint)) {
68+
char charIndexPlus1 = Character.lowSurrogate(codePoint);
69+
char charIndex = Character.highSurrogate(codePoint);
70+
sb.append(charIndex).append(charIndexPlus1);
71+
72+
} else {
73+
throw new IllegalArgumentException();
74+
}
75+
}
76+
77+
public void reset() {
78+
sb.setLength(0);
79+
state = UTF8_ACCEPT;
80+
codePoint = 0;
81+
}
82+
}

0 commit comments

Comments
 (0)