Skip to content

Commit b158acc

Browse files
committed
Fixed bug and reduced code
1 parent 790b53c commit b158acc

File tree

1 file changed

+31
-27
lines changed

1 file changed

+31
-27
lines changed

functions/xml/utf8_decode.js

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -9,45 +9,49 @@ function utf8_decode(str_data) {
99
// bugfixed by: Onno Marsman
1010
// bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
1111
// bugfixed by: kirilloid
12+
// bugfixed by: w35l3y (http://www.wesley.eti.br)
1213
// example 1: utf8_decode('Kevin van Zonneveld');
1314
// returns 1: 'Kevin van Zonneveld'
1415

1516
var tmp_arr = [],
1617
i = 0,
17-
ac = 0,
1818
c1 = 0,
19-
c2 = 0,
20-
c3 = 0,
21-
c4 = 0;
19+
seqlen = 0;
2220

2321
str_data += '';
2422

2523
while (i < str_data.length) {
26-
c1 = str_data.charCodeAt(i);
27-
if (c1 <= 191) {
28-
tmp_arr[ac++] = String.fromCharCode(c1);
29-
i++;
30-
} else if (c1 <= 223) {
31-
c2 = str_data.charCodeAt(i + 1);
32-
tmp_arr[ac++] = String.fromCharCode(((c1 & 31) << 6) | (c2 & 63));
33-
i += 2;
34-
} else if (c1 <= 239) {
35-
// http://en.wikipedia.org/wiki/UTF-8#Codepage_layout
36-
c2 = str_data.charCodeAt(i + 1);
37-
c3 = str_data.charCodeAt(i + 2);
38-
tmp_arr[ac++] = String.fromCharCode(((c1 & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
39-
i += 3;
24+
c1 = str_data.charCodeAt(i) & 0xFF;
25+
seqlen = 0;
26+
27+
// http://en.wikipedia.org/wiki/UTF-8#Codepage_layout
28+
if (c1 <= 0xBF) {
29+
c1 = (c1 & 0x7F);
30+
seqlen = 1;
31+
} else if (c1 <= 0xDF) {
32+
c1 = (c1 & 0x1F);
33+
seqlen = 2;
34+
} else if (c1 <= 0xEF) {
35+
c1 = (c1 & 0x0F);
36+
seqlen = 3;
4037
} else {
41-
c2 = str_data.charCodeAt(i + 1);
42-
c3 = str_data.charCodeAt(i + 2);
43-
c4 = str_data.charCodeAt(i + 3);
44-
c1 = ((c1 & 7) << 18) | ((c2 & 63) << 12) | ((c3 & 63) << 6) | (c4 & 63);
38+
c1 = (c1 & 0x07);
39+
seqlen = 4;
40+
}
41+
42+
for (var ai = 1; ai < seqlen; ++ai) {
43+
c1 = ((c1 << 0x06) | (str_data.charCodeAt(ai + i) & 0x3F));
44+
}
45+
46+
if (seqlen == 4) {
4547
c1 -= 0x10000;
46-
tmp_arr[ac++] = String.fromCharCode(0xD800 | ((c1 >> 10) & 0x3FF));
47-
tmp_arr[ac++] = String.fromCharCode(0xDC00 | (c1 & 0x3FF));
48-
i += 4;
48+
tmp_arr.push(String.fromCharCode(0xD800 | ((c1 >> 10) & 0x3FF)), String.fromCharCode(0xDC00 | (c1 & 0x3FF)));
49+
} else {
50+
tmp_arr.push(String.fromCharCode(c1));
4951
}
52+
53+
i += seqlen;
5054
}
5155

52-
return tmp_arr.join('');
53-
}
56+
return tmp_arr.join("");
57+
}

0 commit comments

Comments
 (0)