Skip to content

Commit 667eafb

Browse files
committed
Have Utf8ByteBufCharsetDecoder replace invalid characters, close AsyncHttpClient#1411
1 parent 942f805 commit 667eafb

File tree

3 files changed

+210
-161
lines changed

3 files changed

+210
-161
lines changed

client/src/main/java/org/asynchttpclient/netty/ws/NettyWebSocket.java

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import io.netty.util.concurrent.ImmediateEventExecutor;
3030

3131
import java.net.SocketAddress;
32-
import java.nio.charset.CharacterCodingException;
3332
import java.util.ArrayList;
3433
import java.util.Collection;
3534
import java.util.List;
@@ -301,16 +300,12 @@ public void onTextFrame(TextWebSocketFrame frame) {
301300
}
302301

303302
private void onTextFrame0(WebSocketFrame frame) {
304-
try {
305-
// faster than frame.text();
306-
String text = Utf8ByteBufCharsetDecoder.decodeUtf8(frame.content());
307-
frame.isFinalFragment();
308-
frame.rsv();
309-
for (WebSocketListener listener : listeners) {
310-
listener.onTextFrame(text, frame.isFinalFragment(), frame.rsv());
311-
}
312-
} catch (CharacterCodingException e) {
313-
throw new IllegalArgumentException(e);
303+
// faster than frame.text();
304+
String text = Utf8ByteBufCharsetDecoder.decodeUtf8(frame.content());
305+
frame.isFinalFragment();
306+
frame.rsv();
307+
for (WebSocketListener listener : listeners) {
308+
listener.onTextFrame(text, frame.isFinalFragment(), frame.rsv());
314309
}
315310
}
316311

netty-utils/src/main/java/org/asynchttpclient/netty/util/Utf8ByteBufCharsetDecoder.java

Lines changed: 172 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -18,191 +18,214 @@
1818

1919
import java.nio.ByteBuffer;
2020
import java.nio.CharBuffer;
21-
import java.nio.charset.CharacterCodingException;
2221
import java.nio.charset.CharsetDecoder;
2322
import java.nio.charset.CoderResult;
23+
import java.nio.charset.CodingErrorAction;
2424

2525
public class Utf8ByteBufCharsetDecoder {
2626

27-
private static final int INITIAL_CHAR_BUFFER_SIZE = 1024;
28-
private static final int UTF_8_MAX_BYTES_PER_CHAR = 4;
27+
private static final int INITIAL_CHAR_BUFFER_SIZE = 1024;
28+
private static final int UTF_8_MAX_BYTES_PER_CHAR = 4;
29+
private static final char INVALID_CHAR_REPLACEMENT = '�';
2930

30-
private static final ThreadLocal<Utf8ByteBufCharsetDecoder> POOL = new ThreadLocal<Utf8ByteBufCharsetDecoder>() {
31-
protected Utf8ByteBufCharsetDecoder initialValue() {
32-
return new Utf8ByteBufCharsetDecoder();
33-
}
34-
};
35-
36-
private static Utf8ByteBufCharsetDecoder pooledDecoder() {
37-
Utf8ByteBufCharsetDecoder decoder = POOL.get();
38-
decoder.reset();
39-
return decoder;
40-
}
41-
42-
public static String decodeUtf8(ByteBuf buf) throws CharacterCodingException {
43-
return pooledDecoder().decode(buf);
31+
private static final ThreadLocal<Utf8ByteBufCharsetDecoder> POOL = new ThreadLocal<Utf8ByteBufCharsetDecoder>() {
32+
protected Utf8ByteBufCharsetDecoder initialValue() {
33+
return new Utf8ByteBufCharsetDecoder();
4434
}
45-
46-
public static String decodeUtf8(ByteBuf... bufs) throws CharacterCodingException {
47-
return pooledDecoder().decode(bufs);
48-
}
49-
50-
private final CharsetDecoder decoder = UTF_8.newDecoder();
51-
protected CharBuffer charBuffer = allocateCharBuffer(INITIAL_CHAR_BUFFER_SIZE);
52-
private ByteBuffer splitCharBuffer = ByteBuffer.allocate(UTF_8_MAX_BYTES_PER_CHAR);
53-
54-
protected CharBuffer allocateCharBuffer(int l) {
55-
return CharBuffer.allocate(l);
35+
};
36+
37+
private static Utf8ByteBufCharsetDecoder pooledDecoder() {
38+
Utf8ByteBufCharsetDecoder decoder = POOL.get();
39+
decoder.reset();
40+
return decoder;
41+
}
42+
43+
public static String decodeUtf8(ByteBuf buf) {
44+
return pooledDecoder().decode(buf);
45+
}
46+
47+
public static String decodeUtf8(ByteBuf... bufs) {
48+
return pooledDecoder().decode(bufs);
49+
}
50+
51+
private static CharsetDecoder configureReplaceCodingErrorActions(CharsetDecoder decoder) {
52+
return decoder.onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE);
53+
}
54+
55+
private final CharsetDecoder decoder = configureReplaceCodingErrorActions(UTF_8.newDecoder());
56+
protected CharBuffer charBuffer = allocateCharBuffer(INITIAL_CHAR_BUFFER_SIZE);
57+
private ByteBuffer splitCharBuffer = ByteBuffer.allocate(UTF_8_MAX_BYTES_PER_CHAR);
58+
59+
protected CharBuffer allocateCharBuffer(int l) {
60+
return CharBuffer.allocate(l);
61+
}
62+
63+
private void ensureCapacity(int l) {
64+
if (charBuffer.position() == 0) {
65+
if (charBuffer.capacity() < l) {
66+
charBuffer = allocateCharBuffer(l);
67+
}
68+
} else if (charBuffer.remaining() < l) {
69+
CharBuffer newCharBuffer = allocateCharBuffer(charBuffer.position() + l);
70+
charBuffer.flip();
71+
newCharBuffer.put(charBuffer);
72+
charBuffer = newCharBuffer;
5673
}
57-
58-
private void ensureCapacity(int l) {
59-
if (charBuffer.position() == 0) {
60-
if (charBuffer.capacity() < l) {
61-
charBuffer = allocateCharBuffer(l);
62-
}
63-
} else if (charBuffer.remaining() < l) {
64-
CharBuffer newCharBuffer = allocateCharBuffer(charBuffer.position() + l);
65-
charBuffer.flip();
66-
newCharBuffer.put(charBuffer);
67-
charBuffer = newCharBuffer;
68-
}
74+
}
75+
76+
public void reset() {
77+
configureReplaceCodingErrorActions(decoder.reset());
78+
charBuffer.clear();
79+
splitCharBuffer.clear();
80+
}
81+
82+
private static int moreThanOneByteCharSize(byte firstByte) {
83+
if (firstByte >> 5 == -2 && (firstByte & 0x1e) != 0) {
84+
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
85+
return 2;
86+
87+
} else if (firstByte >> 4 == -2) {
88+
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
89+
return 3;
90+
91+
} else if (firstByte >> 3 == -2) {
92+
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
93+
return 4;
94+
95+
} else {
96+
// charSize isn't supposed to be called for regular bytes
97+
// is that even possible?
98+
return -1;
6999
}
70-
71-
public void reset() {
72-
decoder.reset();
73-
charBuffer.clear();
100+
}
101+
102+
private static boolean isContinuation(byte b) {
103+
// 10xxxxxx
104+
return b >> 6 == -2;
105+
}
106+
107+
private boolean stashContinuationBytes(ByteBuffer nioBuffer, int missingBytes) {
108+
for (int i = 0; i < missingBytes; i++) {
109+
byte b = nioBuffer.get();
110+
// make sure we only add continuation bytes in buffer
111+
if (isContinuation(b)) {
112+
splitCharBuffer.put(b);
113+
} else {
114+
// we hit a non-continuation byte
115+
// push it back and flush
116+
nioBuffer.position(nioBuffer.position() - 1);
117+
charBuffer.append(INVALID_CHAR_REPLACEMENT);
74118
splitCharBuffer.clear();
119+
return false;
120+
}
75121
}
122+
return true;
123+
}
76124

77-
private static int charSize(byte firstByte) throws CharacterCodingException {
78-
if ((firstByte >> 5) == -2 && (firstByte & 0x1e) != 0) {
79-
// 2 bytes, 11 bits: 110xxxxx 10xxxxxx
80-
return 2;
125+
private void handlePendingSplitCharBuffer(ByteBuffer nioBuffer, boolean endOfInput) {
81126

82-
} else if ((firstByte >> 4) == -2) {
83-
// 3 bytes, 16 bits: 1110xxxx 10xxxxxx 10xxxxxx
84-
return 3;
127+
int charSize = moreThanOneByteCharSize(splitCharBuffer.get(0));
85128

86-
} else if ((firstByte >> 3) == -2) {
87-
// 4 bytes, 21 bits: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
88-
return 4;
129+
if (charSize > 0) {
130+
int missingBytes = charSize - splitCharBuffer.position();
89131

132+
if (nioBuffer.remaining() < missingBytes) {
133+
if (endOfInput) {
134+
charBuffer.append(INVALID_CHAR_REPLACEMENT);
90135
} else {
91-
// charSize isn't supposed to be called for regular bytes
92-
throw new CharacterCodingException();
136+
stashContinuationBytes(nioBuffer, nioBuffer.remaining());
93137
}
94-
}
95138

96-
private void handleSplitCharBuffer(ByteBuffer nioBuffer, boolean endOfInput) throws CharacterCodingException {
97-
// TODO we could save charSize
98-
int missingBytes = charSize(splitCharBuffer.get(0)) - splitCharBuffer.position();
99-
100-
if (nioBuffer.remaining() < missingBytes) {
101-
if (endOfInput) {
102-
throw new CharacterCodingException();
103-
}
104-
105-
// still not enough bytes
106-
splitCharBuffer.put(nioBuffer);
107-
108-
} else {
109-
// FIXME better way?
110-
for (int i = 0; i < missingBytes; i++) {
111-
splitCharBuffer.put(nioBuffer.get());
112-
}
113-
114-
splitCharBuffer.flip();
115-
CoderResult res = decoder.decode(splitCharBuffer, charBuffer, endOfInput && !nioBuffer.hasRemaining());
116-
if (res.isError()) {
117-
res.throwException();
118-
}
119-
splitCharBuffer.clear();
120-
}
139+
} else if (stashContinuationBytes(nioBuffer, missingBytes)) {
140+
splitCharBuffer.flip();
141+
decoder.decode(splitCharBuffer, charBuffer, endOfInput && !nioBuffer.hasRemaining());
142+
splitCharBuffer.clear();
143+
}
144+
} else {
145+
// drop chars until we hit a non continuation one
146+
charBuffer.append(INVALID_CHAR_REPLACEMENT);
147+
splitCharBuffer.clear();
121148
}
149+
}
122150

123-
protected void decodePartial(ByteBuffer nioBuffer, boolean endOfInput) throws CharacterCodingException {
124-
// deal with pending splitCharBuffer
125-
if (splitCharBuffer.position() > 0 && nioBuffer.hasRemaining()) {
126-
handleSplitCharBuffer(nioBuffer, endOfInput);
127-
}
151+
protected void decodePartial(ByteBuffer nioBuffer, boolean endOfInput) {
152+
// deal with pending splitCharBuffer
153+
if (splitCharBuffer.position() > 0 && nioBuffer.hasRemaining()) {
154+
handlePendingSplitCharBuffer(nioBuffer, endOfInput);
155+
}
128156

129-
// decode remaining buffer
130-
if (nioBuffer.hasRemaining()) {
131-
CoderResult res = decoder.decode(nioBuffer, charBuffer, endOfInput);
132-
if (res.isUnderflow()) {
133-
if (nioBuffer.remaining() > 0) {
134-
splitCharBuffer.put(nioBuffer);
135-
}
136-
} else if (res.isError()) {
137-
res.throwException();
138-
}
157+
// decode remaining buffer
158+
if (nioBuffer.hasRemaining()) {
159+
CoderResult res = decoder.decode(nioBuffer, charBuffer, endOfInput);
160+
if (res.isUnderflow()) {
161+
if (nioBuffer.remaining() > 0) {
162+
splitCharBuffer.put(nioBuffer);
139163
}
164+
}
140165
}
166+
}
141167

142-
private void decode(ByteBuffer[] nioBuffers, int length) throws CharacterCodingException {
143-
int count = nioBuffers.length;
144-
for (int i = 0; i < count; i++) {
145-
decodePartial(nioBuffers[i].duplicate(), i == count - 1);
146-
}
168+
private void decode(ByteBuffer[] nioBuffers, int length) {
169+
int count = nioBuffers.length;
170+
for (int i = 0; i < count; i++) {
171+
decodePartial(nioBuffers[i].duplicate(), i == count - 1);
147172
}
173+
}
148174

149-
private void decodeSingleNioBuffer(ByteBuffer nioBuffer, int length) throws CharacterCodingException {
150-
CoderResult res = decoder.decode(nioBuffer, charBuffer, true);
151-
if (res.isError()) {
152-
res.throwException();
153-
}
175+
private void decodeSingleNioBuffer(ByteBuffer nioBuffer, int length) {
176+
decoder.decode(nioBuffer, charBuffer, true);
177+
}
178+
179+
public String decode(ByteBuf buf) {
180+
if (buf.isDirect()) {
181+
return buf.toString(UTF_8);
154182
}
155183

156-
public String decode(ByteBuf buf) throws CharacterCodingException {
157-
if (buf.isDirect()) {
158-
return buf.toString(UTF_8);
159-
}
184+
int length = buf.readableBytes();
185+
ensureCapacity(length);
160186

161-
int length = buf.readableBytes();
162-
ensureCapacity(length);
187+
if (buf.nioBufferCount() == 1) {
188+
decodeSingleNioBuffer(buf.internalNioBuffer(buf.readerIndex(), length).duplicate(), length);
189+
} else {
190+
decode(buf.nioBuffers(), buf.readableBytes());
191+
}
163192

164-
if (buf.nioBufferCount() == 1) {
165-
decodeSingleNioBuffer(buf.internalNioBuffer(buf.readerIndex(), length).duplicate(), length);
166-
} else {
167-
decode(buf.nioBuffers(), buf.readableBytes());
168-
}
193+
return charBuffer.flip().toString();
194+
}
169195

170-
return charBuffer.flip().toString();
196+
public String decode(ByteBuf... bufs) {
197+
if (bufs.length == 1) {
198+
return decode(bufs[0]);
171199
}
172200

173-
public String decode(ByteBuf... bufs) throws CharacterCodingException {
174-
if (bufs.length == 1) {
175-
return decode(bufs[0]);
176-
}
201+
int totalSize = 0;
202+
int totalNioBuffers = 0;
203+
boolean withoutArray = false;
204+
for (ByteBuf buf : bufs) {
205+
if (!buf.hasArray()) {
206+
withoutArray = true;
207+
break;
208+
}
209+
totalSize += buf.readableBytes();
210+
totalNioBuffers += buf.nioBufferCount();
211+
}
212+
213+
if (withoutArray) {
214+
return ByteBufUtils.byteBuf2StringDefault(UTF_8, bufs);
177215

178-
int totalSize = 0;
179-
int totalNioBuffers = 0;
180-
boolean withoutArray = false;
181-
for (ByteBuf buf : bufs) {
182-
if (!buf.hasArray()) {
183-
withoutArray = true;
184-
break;
185-
}
186-
totalSize += buf.readableBytes();
187-
totalNioBuffers += buf.nioBufferCount();
216+
} else {
217+
ByteBuffer[] nioBuffers = new ByteBuffer[totalNioBuffers];
218+
int i = 0;
219+
for (ByteBuf buf : bufs) {
220+
for (ByteBuffer nioBuffer : buf.nioBuffers()) {
221+
nioBuffers[i++] = nioBuffer;
188222
}
223+
}
189224

190-
if (withoutArray) {
191-
return ByteBufUtils.byteBuf2StringDefault(UTF_8, bufs);
225+
ensureCapacity(totalSize);
226+
decode(nioBuffers, totalSize);
192227

193-
} else {
194-
ByteBuffer[] nioBuffers = new ByteBuffer[totalNioBuffers];
195-
int i = 0;
196-
for (ByteBuf buf : bufs) {
197-
for (ByteBuffer nioBuffer : buf.nioBuffers()) {
198-
nioBuffers[i++] = nioBuffer;
199-
}
200-
}
201-
202-
ensureCapacity(totalSize);
203-
decode(nioBuffers, totalSize);
204-
205-
return charBuffer.flip().toString();
206-
}
228+
return charBuffer.flip().toString();
207229
}
230+
}
208231
}

0 commit comments

Comments
 (0)