@@ -9,45 +9,49 @@ function utf8_decode(str_data) {
9
9
// bugfixed by: Onno Marsman
10
10
// bugfixed by: Kevin van Zonneveld (http://kevin.vanzonneveld.net)
11
11
// bugfixed by: kirilloid
12
+ // bugfixed by: w35l3y (http://www.wesley.eti.br)
12
13
// example 1: utf8_decode('Kevin van Zonneveld');
13
14
// returns 1: 'Kevin van Zonneveld'
14
15
15
16
var tmp_arr = [ ] ,
16
17
i = 0 ,
17
- ac = 0 ,
18
18
c1 = 0 ,
19
- c2 = 0 ,
20
- c3 = 0 ,
21
- c4 = 0 ;
19
+ seqlen = 0 ;
22
20
23
21
str_data += '' ;
24
22
25
23
while ( i < str_data . length ) {
26
- c1 = str_data . charCodeAt ( i ) ;
27
- if ( c1 <= 191 ) {
28
- tmp_arr [ ac ++ ] = String . fromCharCode ( c1 ) ;
29
- i ++ ;
30
- } else if ( c1 <= 223 ) {
31
- c2 = str_data . charCodeAt ( i + 1 ) ;
32
- tmp_arr [ ac ++ ] = String . fromCharCode ( ( ( c1 & 31 ) << 6 ) | ( c2 & 63 ) ) ;
33
- i += 2 ;
34
- } else if ( c1 <= 239 ) {
35
- // http://en.wikipedia.org/wiki/UTF-8#Codepage_layout
36
- c2 = str_data . charCodeAt ( i + 1 ) ;
37
- c3 = str_data . charCodeAt ( i + 2 ) ;
38
- tmp_arr [ ac ++ ] = String . fromCharCode ( ( ( c1 & 15 ) << 12 ) | ( ( c2 & 63 ) << 6 ) | ( c3 & 63 ) ) ;
39
- i += 3 ;
24
+ c1 = str_data . charCodeAt ( i ) & 0xFF ;
25
+ seqlen = 0 ;
26
+
27
+ // http://en.wikipedia.org/wiki/UTF-8#Codepage_layout
28
+ if ( c1 <= 0xBF ) {
29
+ c1 = ( c1 & 0x7F ) ;
30
+ seqlen = 1 ;
31
+ } else if ( c1 <= 0xDF ) {
32
+ c1 = ( c1 & 0x1F ) ;
33
+ seqlen = 2 ;
34
+ } else if ( c1 <= 0xEF ) {
35
+ c1 = ( c1 & 0x0F ) ;
36
+ seqlen = 3 ;
40
37
} else {
41
- c2 = str_data . charCodeAt ( i + 1 ) ;
42
- c3 = str_data . charCodeAt ( i + 2 ) ;
43
- c4 = str_data . charCodeAt ( i + 3 ) ;
44
- c1 = ( ( c1 & 7 ) << 18 ) | ( ( c2 & 63 ) << 12 ) | ( ( c3 & 63 ) << 6 ) | ( c4 & 63 ) ;
38
+ c1 = ( c1 & 0x07 ) ;
39
+ seqlen = 4 ;
40
+ }
41
+
42
+ for ( var ai = 1 ; ai < seqlen ; ++ ai ) {
43
+ c1 = ( ( c1 << 0x06 ) | ( str_data . charCodeAt ( ai + i ) & 0x3F ) ) ;
44
+ }
45
+
46
+ if ( seqlen == 4 ) {
45
47
c1 -= 0x10000 ;
46
- tmp_arr [ ac ++ ] = String . fromCharCode ( 0xD800 | ( ( c1 >> 10 ) & 0x3FF ) ) ;
47
- tmp_arr [ ac ++ ] = String . fromCharCode ( 0xDC00 | ( c1 & 0x3FF ) ) ;
48
- i += 4 ;
48
+ tmp_arr . push ( String . fromCharCode ( 0xD800 | ( ( c1 >> 10 ) & 0x3FF ) ) , String . fromCharCode ( 0xDC00 | ( c1 & 0x3FF ) ) ) ;
49
+ } else {
50
+ tmp_arr . push ( String . fromCharCode ( c1 ) ) ;
49
51
}
52
+
53
+ i += seqlen ;
50
54
}
51
55
52
- return tmp_arr . join ( '' ) ;
53
- }
56
+ return tmp_arr . join ( "" ) ;
57
+ }
0 commit comments