Skip to content

Commit 3e743e9

Browse files
committed
Merge branch 'PHP-8.2'
* PHP-8.2: For UTF-7, flag unnecessary extra trailing byte in Base64 section as error
2 parents 20a1d5d + a618682 commit 3e743e9

File tree

4 files changed

+34
-4
lines changed

4 files changed

+34
-4
lines changed

ext/mbstring/libmbfl/filters/mbfilter_utf7.c

+8-2
Original file line numberDiff line numberDiff line change
@@ -530,9 +530,12 @@ static size_t mb_utf7_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf
530530
}
531531

532532
unsigned char n4 = decode_base64(*p++);
533-
if (is_base64_end(n4) || p == e) {
533+
if (is_base64_end(n4)) {
534534
out = handle_base64_end(n4, &p, out, &base64, n3 & 0x3, &surrogate1);
535535
continue;
536+
} else if (p == e) {
537+
out = handle_base64_end(n4, &p, out, &base64, true, &surrogate1);
538+
continue;
536539
}
537540
unsigned char n5 = decode_base64(*p++);
538541
if (is_base64_end(n5) || p == e) {
@@ -552,9 +555,12 @@ static size_t mb_utf7_to_wchar(unsigned char **in, size_t *in_len, uint32_t *buf
552555
}
553556

554557
unsigned char n7 = decode_base64(*p++);
555-
if (is_base64_end(n7) || p == e) {
558+
if (is_base64_end(n7)) {
556559
out = handle_base64_end(n7, &p, out, &base64, n6 & 0xF, &surrogate1);
557560
continue;
561+
} else if (p == e) {
562+
out = handle_base64_end(n7, &p, out, &base64, true, &surrogate1);
563+
continue;
558564
}
559565
unsigned char n8 = decode_base64(*p++);
560566
if (is_base64_end(n8)) {

ext/mbstring/libmbfl/filters/mbfilter_utf7imap.c

+8-2
Original file line numberDiff line numberDiff line change
@@ -558,9 +558,12 @@ static size_t mb_utf7imap_to_wchar(unsigned char **in, size_t *in_len, uint32_t
558558
}
559559

560560
unsigned char n4 = decode_base64(*p++);
561-
if (is_base64_end(n4) || p == e) {
561+
if (is_base64_end(n4)) {
562562
out = handle_base64_end(n4, out, &base64, n3 & 0x3, &surrogate1);
563563
continue;
564+
} else if (p == e) {
565+
out = handle_base64_end(n4, out, &base64, true, &surrogate1);
566+
continue;
564567
}
565568
unsigned char n5 = decode_base64(*p++);
566569
if (is_base64_end(n5) || p == e) {
@@ -580,9 +583,12 @@ static size_t mb_utf7imap_to_wchar(unsigned char **in, size_t *in_len, uint32_t
580583
}
581584

582585
unsigned char n7 = decode_base64(*p++);
583-
if (is_base64_end(n7) || p == e) {
586+
if (is_base64_end(n7)) {
584587
out = handle_base64_end(n7, out, &base64, n6 & 0xF, &surrogate1);
585588
continue;
589+
} else if (p == e) {
590+
out = handle_base64_end(n7, out, &base64, true, &surrogate1);
591+
continue;
586592
}
587593
unsigned char n8 = decode_base64(*p++);
588594
if (is_base64_end(n8)) {

ext/mbstring/tests/utf7imap_encoding.phpt

+8
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,14 @@ convertInvalidString("\x80", "%", "UTF7-IMAP", "UTF-8");
221221
convertInvalidString("abc&", "abc%", "UTF7-IMAP", "UTF-8"); // The & starts a Base-64 coded section, which is OK... but there's no data in it
222222
convertInvalidString("&**-", "%*-", "UTF7-IMAP", "UTF-8"); // When we hit the first bad byte in a Base-64 coded section, it drops us back into the default mode, so the following characters are literal
223223

224+
// Try strings where Base64 has an extra trailing byte which is not needed
225+
convertInvalidString('&RR8I', "\xE4\x94\x9F%", 'UTF7-IMAP', 'UTF-8');
226+
convertInvalidString('&RR8IAAA', "\xE4\x94\x9F\xE0\xA0\x80%", 'UTF7-IMAP', 'UTF-8');
227+
228+
// It is useless for a Base64 section to only contain a single 'A'
229+
// (which decodes to only zero bits)
230+
convertInvalidString("&A", "\x00\x00\x00%", 'UTF7-IMAP', 'UTF-32BE');
231+
224232
echo "Done!\n";
225233
?>
226234
--EXPECT--

ext/mbstring/tests/utf_encodings.phpt

+10
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,16 @@ testInvalidString('+' . rawEncode("\xD8\x01"), "\x00\x00\x00%", 'UTF-7', 'UTF-32
10631063
testInvalidString('+' . rawEncode("\x01") . '-', "\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
10641064
testInvalidString('+l', "\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
10651065

1066+
// Base64 section should not have 4 ASCII characters; the first 3 can encode one
1067+
// UTF-16 character, so there is no need for the 4th
1068+
testInvalidString('+RR8I', "\xE4\x94\x9F%", 'UTF-7', 'UTF-8');
1069+
// Likewise with 7 characters
1070+
testInvalidString('+RR8IAAA', "\xE4\x94\x9F\xE0\xA0\x80%", 'UTF-7', 'UTF-8');
1071+
1072+
// Similarly, it is useless for a Base64 section to only contain a single 'A'
1073+
// (which decodes to only zero bits)
1074+
testInvalidString("+A", "\x00\x00\x00%", 'UTF-7', 'UTF-32BE');
1075+
10661076
// And then, messed up Base64 encoding
10671077

10681078
// Bad padding on + section (not zeroes)

0 commit comments

Comments
 (0)