Skip to content

Commit 79ae309

Browse files
committed
Merge branch 'PHP-8.1' into PHP-8.2
* PHP-8.1: [ci skip] NEWS Fix GH-9535 (unintended behavior change for mb_strcut in PHP 8.1)
2 parents 9d0f5bc + 1562ba5 commit 79ae309

File tree

3 files changed

+294
-0
lines changed

3 files changed

+294
-0
lines changed

ext/mbstring/libmbfl/mbfl/mbfilter.c

+1
Original file line numberDiff line numberDiff line change
@@ -1174,6 +1174,7 @@ mbfl_strcut(
11741174
bk = _bk;
11751175
}
11761176

1177+
decoder->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
11771178
(*encoder->filter_flush)(encoder);
11781179

11791180
if (bk.decoder.filter_dtor)

ext/mbstring/tests/gh9535.phpt

+187
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
--TEST--
2+
GH-9535 (mb_strcut(): The behavior of mb_strcut in mbstring has been changed in PHP8.1)
3+
--EXTENSIONS--
4+
mbstring
5+
--FILE--
6+
<?php
7+
$encodings = [
8+
'BASE64',
9+
'HTML-ENTITIES',
10+
'Quoted-Printable',
11+
'UTF-16',
12+
'UTF-16BE',
13+
'UTF-16LE',
14+
'UTF-7',
15+
'UTF7-IMAP',
16+
'ISO-2022-JP-MS',
17+
'GB18030',
18+
'HZ',
19+
'ISO-2022-KR',
20+
'ISO-2022-JP-MOBILE#KDDI',
21+
'CP50220',
22+
'CP50221',
23+
'CP50222',
24+
];
25+
26+
$input = '宛如繁星般宛如皎月般';
27+
$bytes_length = 15;
28+
foreach($encodings as $encoding) {
29+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
30+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
31+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
32+
echo $encoding.': '.$reconverted_str.PHP_EOL;
33+
}
34+
35+
echo PHP_EOL;
36+
37+
$input = '星のように月のように';
38+
$bytes_length = 20;
39+
foreach($encodings as $encoding) {
40+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
41+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
42+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
43+
echo $encoding.': '.$reconverted_str.PHP_EOL;
44+
}
45+
46+
echo PHP_EOL;
47+
48+
$input = 'あaいb';
49+
$bytes_length = 10;
50+
foreach($encodings as $encoding) {
51+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
52+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
53+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
54+
echo $encoding.': '.$reconverted_str.PHP_EOL;
55+
}
56+
57+
echo PHP_EOL;
58+
59+
$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
60+
$bytes_length = 10;
61+
foreach($encodings as $encoding) {
62+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
63+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
64+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
65+
echo $encoding.': '.$reconverted_str.PHP_EOL;
66+
}
67+
68+
echo PHP_EOL;
69+
70+
$input = '???';
71+
$bytes_length = 2;
72+
foreach($encodings as $encoding) {
73+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
74+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
75+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
76+
echo $encoding.': '.$reconverted_str.PHP_EOL;
77+
}
78+
79+
echo PHP_EOL;
80+
81+
foreach($encodings as $encoding) {
82+
var_dump(mb_strcut($input, 0, $bytes_length, $encoding));
83+
}
84+
85+
?>
86+
--EXPECTF--
87+
BASE64: 宛如繁
88+
HTML-ENTITIES: 宛&#22914
89+
Quoted-Printable: %s
90+
UTF-16: 宛如繁星般宛如
91+
UTF-16BE: 宛如繁星般宛如
92+
UTF-16LE: 宛如繁星般宛如
93+
UTF-7: 宛如繁星
94+
UTF7-IMAP: 宛如繁星
95+
ISO-2022-JP-MS: 宛如繁星
96+
GB18030: 宛如繁星般宛如
97+
HZ: 宛如繁星般
98+
ISO-2022-KR: 宛如繁星
99+
ISO-2022-JP-MOBILE#KDDI: 宛如繁星
100+
CP50220: 宛如繁星
101+
CP50221: 宛如繁星
102+
CP50222: 宛如繁星
103+
104+
BASE64: 星のように
105+
HTML-ENTITIES: 星の&#12
106+
Quoted-Printable: 星の
107+
UTF-16: 星のように月のように
108+
UTF-16BE: 星のように月のように
109+
UTF-16LE: 星のように月のように
110+
UTF-7: 星のように月
111+
UTF7-IMAP: 星のように月
112+
ISO-2022-JP-MS: 星のように月の
113+
GB18030: 星のように月のように
114+
HZ: 星のように月のよ
115+
ISO-2022-KR: 星のように月の
116+
ISO-2022-JP-MOBILE#KDDI: 星のように月の
117+
CP50220: 星のように月の
118+
CP50221: 星のように月の
119+
CP50222: 星のように月の
120+
121+
BASE64: %s
122+
HTML-ENTITIES: あa&
123+
Quoted-Printable: あa
124+
UTF-16: あaいb
125+
UTF-16BE: あaいb
126+
UTF-16LE: あaいb
127+
UTF-7: あa
128+
UTF7-IMAP: あa
129+
ISO-2022-JP-MS: あa
130+
GB18030: あaいb
131+
HZ: あa
132+
ISO-2022-KR: あa
133+
ISO-2022-JP-MOBILE#KDDI: あa
134+
CP50220: あa
135+
CP50221: あa
136+
CP50222: あa
137+
138+
BASE64: AAAAAA
139+
HTML-ENTITIES: AAAAAAAAAA
140+
Quoted-Printable: AAAAAAAAAA
141+
UTF-16: AAAAA
142+
UTF-16BE: AAAAA
143+
UTF-16LE: AAAAA
144+
UTF-7: AAAAAAAAAA
145+
UTF7-IMAP: AAAAAAAAAA
146+
ISO-2022-JP-MS: AAAAAAAAAA
147+
GB18030: AAAAAAAAAA
148+
HZ: AAAAAAAAAA
149+
ISO-2022-KR: AAAAAAAAAA
150+
ISO-2022-JP-MOBILE#KDDI: AAAAAAAAAA
151+
CP50220: AAAAAAAAAA
152+
CP50221: AAAAAAAAAA
153+
CP50222: AAAAAAAAAA
154+
155+
BASE64:%s
156+
HTML-ENTITIES: ??
157+
Quoted-Printable: ??
158+
UTF-16: ?
159+
UTF-16BE: ?
160+
UTF-16LE: ?
161+
UTF-7: ??
162+
UTF7-IMAP: ??
163+
ISO-2022-JP-MS: ??
164+
GB18030: ??
165+
HZ: ??
166+
ISO-2022-KR: ??
167+
ISO-2022-JP-MOBILE#KDDI: ??
168+
CP50220: ??
169+
CP50221: ??
170+
CP50222: ??
171+
172+
string(0) ""
173+
string(2) "??"
174+
string(2) "??"
175+
string(2) "??"
176+
string(2) "??"
177+
string(2) "??"
178+
string(2) "??"
179+
string(2) "??"
180+
string(2) "??"
181+
string(2) "??"
182+
string(2) "??"
183+
string(2) "??"
184+
string(2) "??"
185+
string(2) "??"
186+
string(2) "??"
187+
string(2) "??"

ext/mbstring/tests/gh9535b.phpt

+106
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
--TEST--
2+
Output of mb_strcut covers requested range of bytes even when output contains ending escape sequences
3+
--EXTENSIONS--
4+
mbstring
5+
--FILE--
6+
<?php
7+
// The existing behavior of mb_strcut is wrong for these encodings, when they add an extra closing
8+
// escape sequence to a string which would otherwise end in a non-default conversion mode
9+
// See https://github.com/php/php-src/pull/9562 for details on the bug
10+
11+
// These tests were developed when fixing a different bug, but they don't pass because of
12+
// the bug involving the added closing escape sequences
13+
// When that bug is fixed, we can remove XFAIL (or combine this file with gh9535.phpt)
14+
15+
$encodings = [
16+
'JIS',
17+
'ISO-2022-JP',
18+
'ISO-2022-JP-2004',
19+
];
20+
21+
$input = '宛如繁星般宛如皎月般';
22+
$bytes_length = 15;
23+
foreach($encodings as $encoding) {
24+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
25+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
26+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
27+
echo $encoding.': '.$reconverted_str.PHP_EOL;
28+
}
29+
30+
echo PHP_EOL;
31+
32+
$input = '星のように月のように';
33+
$bytes_length = 20;
34+
foreach($encodings as $encoding) {
35+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
36+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
37+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
38+
echo $encoding.': '.$reconverted_str.PHP_EOL;
39+
}
40+
41+
echo PHP_EOL;
42+
43+
$input = 'あaいb';
44+
$bytes_length = 10;
45+
foreach($encodings as $encoding) {
46+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
47+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
48+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
49+
echo $encoding.': '.$reconverted_str.PHP_EOL;
50+
}
51+
52+
echo PHP_EOL;
53+
54+
$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
55+
$bytes_length = 10;
56+
foreach($encodings as $encoding) {
57+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
58+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
59+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
60+
echo $encoding.': '.$reconverted_str.PHP_EOL;
61+
}
62+
63+
echo PHP_EOL;
64+
65+
$input = '???';
66+
$bytes_length = 2;
67+
foreach($encodings as $encoding) {
68+
$converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
69+
$cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
70+
$reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
71+
echo $encoding.': '.$reconverted_str.PHP_EOL;
72+
}
73+
74+
echo PHP_EOL;
75+
76+
foreach($encodings as $encoding) {
77+
var_dump(mb_strcut($input, 0, $bytes_length, $encoding));
78+
}
79+
80+
?>
81+
--XFAIL--
82+
Discussion: https://github.com/php/php-src/pull/9562
83+
--EXPECTF--
84+
JIS: 宛如繁星般
85+
ISO-2022-JP: 宛如繁星般
86+
ISO-2022-JP-2004: 宛如繁星
87+
88+
JIS: 星のように月の
89+
ISO-2022-JP: 星のように月の
90+
ISO-2022-JP-2004: 星のように月の
91+
92+
JIS: あa
93+
ISO-2022-JP: あa
94+
ISO-2022-JP-2004: あa
95+
96+
JIS: AAAAAAAAAA
97+
ISO-2022-JP: AAAAAAAAAA
98+
ISO-2022-JP-2004: AAAAAAAAAA
99+
100+
JIS: ??
101+
ISO-2022-JP: ??
102+
ISO-2022-JP-2004: ??
103+
104+
string(2) "??"
105+
string(2) "??"
106+
string(2) "??"

0 commit comments

Comments
 (0)