From 5f41bff45b5c688d899493e5d7623ed9aacaebb6 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 12:42:19 +0900 Subject: [PATCH 01/24] added php_mb_check_encoding --- ext/mbstring/mbstring.c | 55 ++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index d2c9ace36ec8..231fc9d3079d 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4506,40 +4506,32 @@ PHP_FUNCTION(mb_get_info) } /* }}} */ -/* {{{ proto bool mb_check_encoding([string var[, string encoding]]) - Check if the string is valid for the specified encoding */ -PHP_FUNCTION(mb_check_encoding) +static inline zend_bool php_mb_check_encoding(const char *input, size_t length, const char *enc) { - char *var = NULL; - size_t var_len; - char *enc = NULL; - size_t enc_len; - mbfl_buffer_converter *convd; const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); + mbfl_buffer_converter *convd; mbfl_string string, result, *ret = NULL; long illegalchars = 0; - if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) { - return; - } - - if (var == NULL) { - RETURN_BOOL(MBSTRG(illegalchars) == 0); + if (input == NULL) { + return MBSTRG(illegalchars) == 0; } if (enc != NULL) { encoding = mbfl_name2encoding(enc); if (!encoding || encoding == &mbfl_encoding_pass) { php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc); - RETURN_FALSE; + return false; } } convd = mbfl_buffer_converter_new2(encoding, encoding, 0); + if (convd == NULL) { php_error_docref(NULL, E_WARNING, "Unable to create converter"); - RETURN_FALSE; + return false; } + mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); mbfl_buffer_converter_illegal_substchar(convd, 0); @@ -4547,19 +4539,42 @@ PHP_FUNCTION(mb_check_encoding) mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding); mbfl_string_init(&result); - string.val = (unsigned char *)var; - string.len = var_len; + string.val = (unsigned char *) input; + string.len = length; + ret = mbfl_buffer_converter_feed_result(convd, &string, &result); illegalchars = mbfl_buffer_illegalchars(convd); mbfl_buffer_converter_delete(convd); - RETVAL_FALSE; if (ret != NULL) { if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { - RETVAL_TRUE; + return true; } + mbfl_string_clear(&result); } + + return false; +} + +/* {{{ proto bool mb_check_encoding([string var[, string encoding]]) + Check if the string is valid for the specified encoding */ +PHP_FUNCTION(mb_check_encoding) +{ + char *var = NULL; + size_t var_len; + char *enc = NULL; + size_t enc_len; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) { + return; + } + + RETVAL_FALSE; + + if (php_mb_check_encoding(var, var_len, enc)) { + RETVAL_TRUE; + } } /* }}} */ From f6be936e103e2b673a8580bb3e2659929becda4b Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 13:18:39 +0900 Subject: [PATCH 02/24] added mb_ord --- ext/mbstring/mbstring.c | 73 +++++++++++++++++++++++++++++++++++++++++ ext/mbstring/mbstring.h | 1 + 2 files changed, 74 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 231fc9d3079d..3bcef5ac9993 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -430,6 +430,11 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1) + ZEND_ARG_INFO(0, str) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() @@ -555,6 +560,7 @@ const zend_function_entry mbstring_functions[] = { PHP_FE(mb_send_mail, arginfo_mb_send_mail) PHP_FE(mb_get_info, arginfo_mb_get_info) PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) + PHP_FE(mb_ord, arginfo_mb_ord) #if HAVE_MBREGEX PHP_MBREGEX_FUNCTION_ENTRIES #endif @@ -4578,6 +4584,73 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ +static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) +{ + enum mbfl_no_encoding no_enc; + zend_bool supported = false; + char* ret; + size_t ret_len; + + if (enc == NULL) { + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + } else { + no_enc = mbfl_name2no_encoding(enc); + + if (no_enc == mbfl_no_encoding_invalid) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + return -1; + } + } + + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + || no_enc == mbfl_no_encoding_utf8_sb + ) { + supported = true; + } + + if (!supported) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return -1; + } + + ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); + + if (ret == NULL) { + return -1; + } + + return (unsigned char) ret[0] << 24 | + (unsigned char) ret[1] << 16 | + (unsigned char) ret[2] << 8 | + (unsigned char) ret[3]; +} + +/* {{{ proto bool mb_ord([string str[, string encoding]]) */ +PHP_FUNCTION(mb_ord) +{ + char* str; + size_t str_len; + char* enc = NULL; + size_t enc_len; + long cp; + + if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) { + return; + } + + cp = php_mb_ord(str, str_len, enc); + + if (0 > cp) { + RETURN_FALSE; + } + + RETURN_LONG(cp); +} +/* }}} */ + /* {{{ php_mb_populate_current_detect_order_list */ static void php_mb_populate_current_detect_order_list(void) { diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 9685c64d7b4a..8599e46881ea 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -127,6 +127,7 @@ PHP_FUNCTION(mb_decode_numericentity); PHP_FUNCTION(mb_send_mail); PHP_FUNCTION(mb_get_info); PHP_FUNCTION(mb_check_encoding); +PHP_FUNCTION(mb_ord); MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc); From f008db6f4c72050b163b4e45ece5d31db5175578 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 14:32:54 +0900 Subject: [PATCH 03/24] added utf32 and ucs4 for available encodings --- ext/mbstring/mbstring.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 3bcef5ac9993..6dfc669e6ab5 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4607,6 +4607,12 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) || no_enc == mbfl_no_encoding_utf8_kddi_a || no_enc == mbfl_no_encoding_utf8_kddi_b || no_enc == mbfl_no_encoding_utf8_sb + || no_enc == mbfl_no_encoding_ucs4 + || no_enc == mbfl_no_encoding_ucs4be + || no_enc == mbfl_no_encoding_ucs4le + || no_enc == mbfl_no_encoding_utf32 + || no_enc == mbfl_no_encoding_utf32be + || no_enc == mbfl_no_encoding_utf32le ) { supported = true; } From c55dea018f74212e74008270b2e5afe7469875da Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 14:37:12 +0900 Subject: [PATCH 04/24] added check for forbidden encodings --- ext/mbstring/mbstring.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 6dfc669e6ab5..e0753409e877 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4615,6 +4615,32 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) || no_enc == mbfl_no_encoding_utf32le ) { supported = true; + } else if (no_enc == mbfl_no_encoding_pass + || no_enc == mbfl_no_encoding_auto + || no_enc == mbfl_no_encoding_wchar + || no_enc == mbfl_no_encoding_byte2be + || no_enc == mbfl_no_encoding_byte2le + || no_enc == mbfl_no_encoding_byte4be + || no_enc == mbfl_no_encoding_byte4le + || no_enc == mbfl_no_encoding_base64 + || no_enc == mbfl_no_encoding_uuencode + || no_enc == mbfl_no_encoding_html_ent + || no_enc == mbfl_no_encoding_qprint + || no_enc == mbfl_no_encoding_utf7 + || no_enc == mbfl_no_encoding_utf7imap + || no_enc == mbfl_no_encoding_2022kr + || no_enc == mbfl_no_encoding_jis + || no_enc == mbfl_no_encoding_2022jp + || no_enc == mbfl_no_encoding_2022jpms + || no_enc == mbfl_no_encoding_jis_ms + || no_enc == mbfl_no_encoding_2022jp_2004 + || no_enc == mbfl_no_encoding_2022jp_kddi + || no_enc == mbfl_no_encoding_cp50220 + || no_enc == mbfl_no_encoding_cp50220raw + || no_enc == mbfl_no_encoding_cp50221 + || no_enc == mbfl_no_encoding_cp50222 + ) { + supported = false; } if (!supported) { From 590e6f0cda6fc207585c0c3ebd36a96f91168b7b Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 14:45:33 +0900 Subject: [PATCH 05/24] added utf16 and ucs2 for supported encodings --- ext/mbstring/mbstring.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index e0753409e877..59ace4c7fdb7 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4613,6 +4613,12 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) || no_enc == mbfl_no_encoding_utf32 || no_enc == mbfl_no_encoding_utf32be || no_enc == mbfl_no_encoding_utf32le + || no_enc == mbfl_no_encoding_ucs2 + || no_enc == mbfl_no_encoding_ucs2be + || no_enc == mbfl_no_encoding_ucs2le + || no_enc == mbfl_no_encoding_utf16 + || no_enc == mbfl_no_encoding_utf16be + || no_enc == mbfl_no_encoding_utf16le ) { supported = true; } else if (no_enc == mbfl_no_encoding_pass From 1c1c64a591f19f013df7ca7c3f656d863bc95264 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 15:03:35 +0900 Subject: [PATCH 06/24] added support for various encodings other than unicode --- ext/mbstring/mbstring.c | 56 +++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 13 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 59ace4c7fdb7..5c16a0cda10c 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4587,9 +4587,11 @@ PHP_FUNCTION(mb_check_encoding) static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) { enum mbfl_no_encoding no_enc; - zend_bool supported = false; char* ret; size_t ret_len; + const mbfl_encoding *encoding; + unsigned char char_len; + long cp; if (enc == NULL) { no_enc = MBSTRG(current_internal_encoding)->no_encoding; @@ -4620,7 +4622,18 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) || no_enc == mbfl_no_encoding_utf16be || no_enc == mbfl_no_encoding_utf16le ) { - supported = true; + + ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); + + if (ret == NULL) { + return -1; + } + + return (unsigned char) ret[0] << 24 | + (unsigned char) ret[1] << 16 | + (unsigned char) ret[2] << 8 | + (unsigned char) ret[3]; + } else if (no_enc == mbfl_no_encoding_pass || no_enc == mbfl_no_encoding_auto || no_enc == mbfl_no_encoding_wchar @@ -4646,24 +4659,41 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) || no_enc == mbfl_no_encoding_cp50221 || no_enc == mbfl_no_encoding_cp50222 ) { - supported = false; - } - - if (!supported) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return -1; } - ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); + if (!php_mb_check_encoding(str, str_len, enc)) { - if (ret == NULL) { - return -1; + if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + + return cp; + } + + encoding = mbfl_no2encoding(no_enc); + char_len = php_mb_mbchar_bytes_ex(str, encoding); + + if (char_len == 1) { + cp = (unsigned char) str[0]; + } else if (char_len == 2) { + cp = ((unsigned char) str[0] << 8) | + (unsigned char) str[1]; + } else if (char_len == 3) { + cp = ((unsigned char) str[0] << 16) | + ((unsigned char) str[1] << 8) | + (unsigned char) str[2]; + } else { + cp = ((unsigned char) str[0] << 24) | + ((unsigned char) str[1] << 16) | + ((unsigned char) str[2] << 8) | + (unsigned char) str[3]; } - return (unsigned char) ret[0] << 24 | - (unsigned char) ret[1] << 16 | - (unsigned char) ret[2] << 8 | - (unsigned char) ret[3]; + return cp; } /* {{{ proto bool mb_ord([string str[, string encoding]]) */ From 421f7fcfa8c35c4686ff10ff83a5792dca4149ca Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 15:13:17 +0900 Subject: [PATCH 07/24] added php_mb_check_forbidden_encoding --- ext/mbstring/mbstring.c | 59 ++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 25 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 5c16a0cda10c..e7c2e90cabb8 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4584,6 +4584,39 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ +static inline zend_bool php_mb_check_forbidden_encoding(enum mbfl_no_encoding no_enc) +{ + if (no_enc == mbfl_no_encoding_pass + || no_enc == mbfl_no_encoding_auto + || no_enc == mbfl_no_encoding_wchar + || no_enc == mbfl_no_encoding_byte2be + || no_enc == mbfl_no_encoding_byte2le + || no_enc == mbfl_no_encoding_byte4be + || no_enc == mbfl_no_encoding_byte4le + || no_enc == mbfl_no_encoding_base64 + || no_enc == mbfl_no_encoding_uuencode + || no_enc == mbfl_no_encoding_html_ent + || no_enc == mbfl_no_encoding_qprint + || no_enc == mbfl_no_encoding_utf7 + || no_enc == mbfl_no_encoding_utf7imap + || no_enc == mbfl_no_encoding_2022kr + || no_enc == mbfl_no_encoding_jis + || no_enc == mbfl_no_encoding_2022jp + || no_enc == mbfl_no_encoding_2022jpms + || no_enc == mbfl_no_encoding_jis_ms + || no_enc == mbfl_no_encoding_2022jp_2004 + || no_enc == mbfl_no_encoding_2022jp_kddi + || no_enc == mbfl_no_encoding_cp50220 + || no_enc == mbfl_no_encoding_cp50220raw + || no_enc == mbfl_no_encoding_cp50221 + || no_enc == mbfl_no_encoding_cp50222 + ) { + return true; + } + + return false; +} + static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) { enum mbfl_no_encoding no_enc; @@ -4634,31 +4667,7 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) (unsigned char) ret[2] << 8 | (unsigned char) ret[3]; - } else if (no_enc == mbfl_no_encoding_pass - || no_enc == mbfl_no_encoding_auto - || no_enc == mbfl_no_encoding_wchar - || no_enc == mbfl_no_encoding_byte2be - || no_enc == mbfl_no_encoding_byte2le - || no_enc == mbfl_no_encoding_byte4be - || no_enc == mbfl_no_encoding_byte4le - || no_enc == mbfl_no_encoding_base64 - || no_enc == mbfl_no_encoding_uuencode - || no_enc == mbfl_no_encoding_html_ent - || no_enc == mbfl_no_encoding_qprint - || no_enc == mbfl_no_encoding_utf7 - || no_enc == mbfl_no_encoding_utf7imap - || no_enc == mbfl_no_encoding_2022kr - || no_enc == mbfl_no_encoding_jis - || no_enc == mbfl_no_encoding_2022jp - || no_enc == mbfl_no_encoding_2022jpms - || no_enc == mbfl_no_encoding_jis_ms - || no_enc == mbfl_no_encoding_2022jp_2004 - || no_enc == mbfl_no_encoding_2022jp_kddi - || no_enc == mbfl_no_encoding_cp50220 - || no_enc == mbfl_no_encoding_cp50220raw - || no_enc == mbfl_no_encoding_cp50221 - || no_enc == mbfl_no_encoding_cp50222 - ) { + } else if (php_mb_check_forbidden_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return -1; } From 32383114d540c642222307b1c08535301ea376f2 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 16:01:47 +0900 Subject: [PATCH 08/24] added mb_chr --- ext/mbstring/mbstring.c | 98 +++++++++++++++++++++++++++++++++++++++++ ext/mbstring/mbstring.h | 1 + 2 files changed, 99 insertions(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index e7c2e90cabb8..cc2e8e6d2ce9 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -435,6 +435,11 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ord, 0, 0, 1) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() +ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_chr, 0, 0, 1) + ZEND_ARG_INFO(0, cp) + ZEND_ARG_INFO(0, encoding) +ZEND_END_ARG_INFO() + ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0) ZEND_ARG_INFO(0, encoding) ZEND_END_ARG_INFO() @@ -561,6 +566,7 @@ const zend_function_entry mbstring_functions[] = { PHP_FE(mb_get_info, arginfo_mb_get_info) PHP_FE(mb_check_encoding, arginfo_mb_check_encoding) PHP_FE(mb_ord, arginfo_mb_ord) + PHP_FE(mb_chr, arginfo_mb_chr) #if HAVE_MBREGEX PHP_MBREGEX_FUNCTION_ENTRIES #endif @@ -4728,6 +4734,98 @@ PHP_FUNCTION(mb_ord) } /* }}} */ +static inline char* php_mb_chr(long cp, const char* enc) +{ + enum mbfl_no_encoding no_enc; + zend_bool supported = false; + zend_string *buf = zend_string_alloc(4, 0); + char* ret; + size_t ret_len; + + if (enc == NULL) { + no_enc = MBSTRG(current_internal_encoding)->no_encoding; + } else { + no_enc = mbfl_name2no_encoding(enc); + if (no_enc == mbfl_no_encoding_invalid) { + php_error_docref(NULL, E_WARNING, "Unknown encoding \"%s\"", enc); + return NULL; + } + } + + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + || no_enc == mbfl_no_encoding_utf8_sb + || no_enc == mbfl_no_encoding_ucs4 + || no_enc == mbfl_no_encoding_ucs4be + || no_enc == mbfl_no_encoding_ucs4le + || no_enc == mbfl_no_encoding_utf32 + || no_enc == mbfl_no_encoding_utf32be + || no_enc == mbfl_no_encoding_utf32le + || no_enc == mbfl_no_encoding_ucs2 + || no_enc == mbfl_no_encoding_ucs2be + || no_enc == mbfl_no_encoding_ucs2le + || no_enc == mbfl_no_encoding_utf16 + || no_enc == mbfl_no_encoding_utf16be + || no_enc == mbfl_no_encoding_utf16le + ) { + supported = true; + } + + if (!supported) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return NULL; + } + + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + ) { + + if (0 > cp || (cp > 0xd7ff && 0xe000 > cp) || 0x10ffff < cp) { + if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + } + } + + buf->val[0] = (cp >> 24) & 0xff; + buf->val[1] = (cp >> 16) & 0xff; + buf->val[2] = (cp >> 8) & 0xff; + buf->val[3] = cp & 0xff; + buf->val[4] = 0; + + ret = php_mb_convert_encoding(buf->val, buf->len, enc, "UCS-4BE", &ret_len); + zend_string_release(buf); + + return ret; +} +/* {{{ proto bool mb_ord([int cp[, string encoding]]) */ +PHP_FUNCTION(mb_chr) +{ + long cp; + char* enc = NULL; + long enc_len; + char* ret; + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l|s", &cp, &enc, &enc_len) == FAILURE) { + return; + } + + ret = php_mb_chr(cp, enc); + + if (ret == NULL) { + RETURN_FALSE; + } + + RETURN_STRING(ret); +} +/* }}} */ + /* {{{ php_mb_populate_current_detect_order_list */ static void php_mb_populate_current_detect_order_list(void) { diff --git a/ext/mbstring/mbstring.h b/ext/mbstring/mbstring.h index 8599e46881ea..bf28c1b51d80 100644 --- a/ext/mbstring/mbstring.h +++ b/ext/mbstring/mbstring.h @@ -128,6 +128,7 @@ PHP_FUNCTION(mb_send_mail); PHP_FUNCTION(mb_get_info); PHP_FUNCTION(mb_check_encoding); PHP_FUNCTION(mb_ord); +PHP_FUNCTION(mb_chr); MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc); From 09b5bfc71a7e4199295a48e75ef220aa34376cd5 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 18:43:55 +0900 Subject: [PATCH 09/24] added check by php_mb_check_forbidden_encoding --- ext/mbstring/mbstring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index cc2e8e6d2ce9..5ef6eca9a83b 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4773,7 +4773,7 @@ static inline char* php_mb_chr(long cp, const char* enc) supported = true; } - if (!supported) { + if (!supported || php_mb_check_forbidden_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return NULL; } From ae838e6e4c53013469c5a1c2b0c540cf150c66be Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 18:59:13 +0900 Subject: [PATCH 10/24] added various encoding support other than unicode --- ext/mbstring/mbstring.c | 73 ++++++++++++++++++++++++++++------------- 1 file changed, 51 insertions(+), 22 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 5ef6eca9a83b..47143b6616b5 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4737,8 +4737,7 @@ PHP_FUNCTION(mb_ord) static inline char* php_mb_chr(long cp, const char* enc) { enum mbfl_no_encoding no_enc; - zend_bool supported = false; - zend_string *buf = zend_string_alloc(4, 0); + zend_string *buf; char* ret; size_t ret_len; @@ -4752,6 +4751,11 @@ static inline char* php_mb_chr(long cp, const char* enc) } } + if (php_mb_check_forbidden_encoding(no_enc)) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return NULL; + } + if (no_enc == mbfl_no_encoding_utf8 || no_enc == mbfl_no_encoding_utf8_docomo || no_enc == mbfl_no_encoding_utf8_kddi_a @@ -4770,19 +4774,6 @@ static inline char* php_mb_chr(long cp, const char* enc) || no_enc == mbfl_no_encoding_utf16be || no_enc == mbfl_no_encoding_utf16le ) { - supported = true; - } - - if (!supported || php_mb_check_forbidden_encoding(no_enc)) { - php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); - return NULL; - } - - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - ) { if (0 > cp || (cp > 0xd7ff && 0xe000 > cp) || 0x10ffff < cp) { if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { @@ -4791,18 +4782,56 @@ static inline char* php_mb_chr(long cp, const char* enc) cp = 0x3f; } } + + buf = zend_string_alloc(4, 0); + buf->val[0] = (cp >> 24) & 0xff; + buf->val[1] = (cp >> 16) & 0xff; + buf->val[2] = (cp >> 8) & 0xff; + buf->val[3] = cp & 0xff; + buf->val[4] = 0; + + ret = php_mb_convert_encoding(buf->val, buf->len, enc, "UCS-4BE", &ret_len); + zend_string_release(buf); + + return ret; + } + + if (0 > cp || cp > 0x100000000) { + if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } } - buf->val[0] = (cp >> 24) & 0xff; - buf->val[1] = (cp >> 16) & 0xff; - buf->val[2] = (cp >> 8) & 0xff; - buf->val[3] = cp & 0xff; - buf->val[4] = 0; + if (cp < 0x100) { + buf = zend_string_alloc(1, 0); + buf->val[0] = cp; + buf->val[1] = 0; + } else if (cp < 0x10000) { + buf = zend_string_alloc(2, 0); + buf->val[0] = cp >> 8; + buf->val[1] = cp & 0xff; + buf->val[2] = 0; + } else if (cp < 0x1000000) { + buf = zend_string_alloc(3, 0); + buf->val[0] = cp >> 16; + buf->val[1] = (cp >> 8) & 0xff; + buf->val[2] = cp & 0xff; + buf->val[3] = 0; + } else { + buf = zend_string_alloc(4, 0); + buf->val[0] = cp >> 24; + buf->val[1] = (cp >> 16) & 0xff; + buf->val[2] = (cp >> 8) & 0xff; + buf->val[3] = cp & 0xff; + buf->val[4] = 0; + } - ret = php_mb_convert_encoding(buf->val, buf->len, enc, "UCS-4BE", &ret_len); + ret = php_mb_convert_encoding(buf->val, buf->len, enc, enc, &ret_len); zend_string_release(buf); - return ret; + return ret; } /* {{{ proto bool mb_ord([int cp[, string encoding]]) */ PHP_FUNCTION(mb_chr) From 08443b2bab312f8cc292541bab6b444211c8a977 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 19:08:50 +0900 Subject: [PATCH 11/24] use php_mb_convert_encoding instead of php_mb_check_encoding --- ext/mbstring/mbstring.c | 47 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 47143b6616b5..4d014af906b0 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4668,46 +4668,47 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) return -1; } - return (unsigned char) ret[0] << 24 | - (unsigned char) ret[1] << 16 | - (unsigned char) ret[2] << 8 | - (unsigned char) ret[3]; + cp = (unsigned char) ret[0] << 24 | \ + (unsigned char) ret[1] << 16 | \ + (unsigned char) ret[2] << 8 | \ + (unsigned char) ret[3]; + + efree(ret); + + return cp; } else if (php_mb_check_forbidden_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return -1; } - if (!php_mb_check_encoding(str, str_len, enc)) { - - if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { - cp = MBSTRG(current_filter_illegal_substchar); - } else { - cp = 0x3f; - } + ret = php_mb_convert_encoding(str, str_len, enc, enc, &ret_len); - return cp; + if (ret == NULL) { + return -1; } encoding = mbfl_no2encoding(no_enc); - char_len = php_mb_mbchar_bytes_ex(str, encoding); + char_len = php_mb_mbchar_bytes_ex(ret, encoding); if (char_len == 1) { - cp = (unsigned char) str[0]; + cp = (unsigned char) ret[0]; } else if (char_len == 2) { - cp = ((unsigned char) str[0] << 8) | - (unsigned char) str[1]; + cp = ((unsigned char) ret[0] << 8) | \ + (unsigned char) ret[1]; } else if (char_len == 3) { - cp = ((unsigned char) str[0] << 16) | - ((unsigned char) str[1] << 8) | - (unsigned char) str[2]; + cp = ((unsigned char) ret[0] << 16) | \ + ((unsigned char) ret[1] << 8) | \ + (unsigned char) ret[2]; } else { - cp = ((unsigned char) str[0] << 24) | - ((unsigned char) str[1] << 16) | - ((unsigned char) str[2] << 8) | - (unsigned char) str[3]; + cp = ((unsigned char) ret[0] << 24) | \ + ((unsigned char) ret[1] << 16) | \ + ((unsigned char) ret[2] << 8) | \ + (unsigned char) ret[3]; } + efree(ret); + return cp; } From 2e97d7be3b1048a699afc31292b0316082becca7 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Thu, 19 Feb 2015 19:16:14 +0900 Subject: [PATCH 12/24] changed the position of calling php_mb_check_forbidden_encoding --- ext/mbstring/mbstring.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 4d014af906b0..ce692ae07652 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4678,7 +4678,7 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) return cp; } else if (php_mb_check_forbidden_encoding(no_enc)) { - php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return -1; } @@ -4752,11 +4752,6 @@ static inline char* php_mb_chr(long cp, const char* enc) } } - if (php_mb_check_forbidden_encoding(no_enc)) { - php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); - return NULL; - } - if (no_enc == mbfl_no_encoding_utf8 || no_enc == mbfl_no_encoding_utf8_docomo || no_enc == mbfl_no_encoding_utf8_kddi_a @@ -4795,6 +4790,9 @@ static inline char* php_mb_chr(long cp, const char* enc) zend_string_release(buf); return ret; + } else if (php_mb_check_forbidden_encoding(no_enc)) { + php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); + return NULL; } if (0 > cp || cp > 0x100000000) { @@ -4832,7 +4830,7 @@ static inline char* php_mb_chr(long cp, const char* enc) ret = php_mb_convert_encoding(buf->val, buf->len, enc, enc, &ret_len); zend_string_release(buf); - return ret; + return ret; } /* {{{ proto bool mb_ord([int cp[, string encoding]]) */ PHP_FUNCTION(mb_chr) From 325faea56fcd91a18a663486de204b7c5db1ae0d Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Fri, 20 Feb 2015 18:29:34 +0900 Subject: [PATCH 13/24] rename php_mb_check_forbidden_encoding to php_mb_check_unsupported_encoding --- ext/mbstring/mbstring.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index ce692ae07652..d0dc42cfa660 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4590,7 +4590,7 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ -static inline zend_bool php_mb_check_forbidden_encoding(enum mbfl_no_encoding no_enc) +static inline zend_bool php_mb_check_unsupported_encoding(enum mbfl_no_encoding no_enc) { if (no_enc == mbfl_no_encoding_pass || no_enc == mbfl_no_encoding_auto @@ -4677,7 +4677,7 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) return cp; - } else if (php_mb_check_forbidden_encoding(no_enc)) { + } else if (php_mb_check_unsupported_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return -1; } @@ -4790,7 +4790,7 @@ static inline char* php_mb_chr(long cp, const char* enc) zend_string_release(buf); return ret; - } else if (php_mb_check_forbidden_encoding(no_enc)) { + } else if (php_mb_check_unsupported_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return NULL; } From 00324a379c80c5f6eb69f1c53331de92d75ca1e8 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Fri, 20 Feb 2015 18:43:21 +0900 Subject: [PATCH 14/24] add test for mb_chr and mb_ord --- ext/mbstring/tests/mb_chr.phpt | 16 ++++++++++++++++ ext/mbstring/tests/mb_ord.phpt | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 ext/mbstring/tests/mb_chr.phpt create mode 100644 ext/mbstring/tests/mb_ord.phpt diff --git a/ext/mbstring/tests/mb_chr.phpt b/ext/mbstring/tests/mb_chr.phpt new file mode 100644 index 000000000000..8edc8a229cac --- /dev/null +++ b/ext/mbstring/tests/mb_chr.phpt @@ -0,0 +1,16 @@ +--TEST-- +mb_chr() +--SKIPIF-- + +--FILE-- + +--EXPECT-- +bool(true) +bool(true) +bool(true) \ No newline at end of file diff --git a/ext/mbstring/tests/mb_ord.phpt b/ext/mbstring/tests/mb_ord.phpt new file mode 100644 index 000000000000..8c9a40741e6c --- /dev/null +++ b/ext/mbstring/tests/mb_ord.phpt @@ -0,0 +1,16 @@ +--TEST-- +mb_ord() +--SKIPIF-- + +--FILE-- + +--EXPECT-- +bool(true) +bool(true) +bool(true) \ No newline at end of file From 1033bad01ec8d599082c3ddeb6ba4c54faf1b26b Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sat, 21 Feb 2015 19:19:40 +0900 Subject: [PATCH 15/24] add php_mb_check_unicode_encoding --- ext/mbstring/mbstring.c | 92 ++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 37 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index d0dc42cfa660..a2ec16f7060e 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4623,6 +4623,32 @@ static inline zend_bool php_mb_check_unsupported_encoding(enum mbfl_no_encoding return false; } +static inline zend_bool php_mb_check_unicode_encoding(enum mbfl_no_encoding no_enc) +{ + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + || no_enc == mbfl_no_encoding_utf8_sb + || no_enc == mbfl_no_encoding_ucs4 + || no_enc == mbfl_no_encoding_ucs4be + || no_enc == mbfl_no_encoding_ucs4le + || no_enc == mbfl_no_encoding_utf32 + || no_enc == mbfl_no_encoding_utf32be + || no_enc == mbfl_no_encoding_utf32le + || no_enc == mbfl_no_encoding_ucs2 + || no_enc == mbfl_no_encoding_ucs2be + || no_enc == mbfl_no_encoding_ucs2le + || no_enc == mbfl_no_encoding_utf16 + || no_enc == mbfl_no_encoding_utf16be + || no_enc == mbfl_no_encoding_utf16le + ) { + return true; + } + + return false; +} + static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) { enum mbfl_no_encoding no_enc; @@ -4643,24 +4669,7 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) } } - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - || no_enc == mbfl_no_encoding_utf8_sb - || no_enc == mbfl_no_encoding_ucs4 - || no_enc == mbfl_no_encoding_ucs4be - || no_enc == mbfl_no_encoding_ucs4le - || no_enc == mbfl_no_encoding_utf32 - || no_enc == mbfl_no_encoding_utf32be - || no_enc == mbfl_no_encoding_utf32le - || no_enc == mbfl_no_encoding_ucs2 - || no_enc == mbfl_no_encoding_ucs2be - || no_enc == mbfl_no_encoding_ucs2le - || no_enc == mbfl_no_encoding_utf16 - || no_enc == mbfl_no_encoding_utf16be - || no_enc == mbfl_no_encoding_utf16le - ) { + if (php_mb_check_unicode_encoding(no_enc)) { ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); @@ -4752,30 +4761,39 @@ static inline char* php_mb_chr(long cp, const char* enc) } } - if (no_enc == mbfl_no_encoding_utf8 + if (php_mb_check_unicode_encoding(no_enc)) { + + if (0 > cp || 0x10ffff < cp) { + + if (php_mb_check_unicode_encoding(MBSTRG(current_internal_encoding)->no_encoding)) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } + + } + + if (no_enc == mbfl_no_encoding_utf8 || no_enc == mbfl_no_encoding_utf8_docomo || no_enc == mbfl_no_encoding_utf8_kddi_a || no_enc == mbfl_no_encoding_utf8_kddi_b || no_enc == mbfl_no_encoding_utf8_sb - || no_enc == mbfl_no_encoding_ucs4 - || no_enc == mbfl_no_encoding_ucs4be - || no_enc == mbfl_no_encoding_ucs4le - || no_enc == mbfl_no_encoding_utf32 - || no_enc == mbfl_no_encoding_utf32be - || no_enc == mbfl_no_encoding_utf32le - || no_enc == mbfl_no_encoding_ucs2 - || no_enc == mbfl_no_encoding_ucs2be - || no_enc == mbfl_no_encoding_ucs2le - || no_enc == mbfl_no_encoding_utf16 - || no_enc == mbfl_no_encoding_utf16be - || no_enc == mbfl_no_encoding_utf16le - ) { + ) { + if (cp > 0xd7ff && 0xe000 > cp) { + + if (php_mb_check_unicode_encoding(MBSTRG(current_internal_encoding)->no_encoding)) { + if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff && + 0xe000 > MBSTRG(current_filter_illegal_substchar) + ) { + cp = 0x3f; + } else { + cp = MBSTRG(current_filter_illegal_substchar); + } + + } else { + cp = 0x3f; + } - if (0 > cp || (cp > 0xd7ff && 0xe000 > cp) || 0x10ffff < cp) { - if (no_enc == MBSTRG(current_internal_encoding)->no_encoding) { - cp = MBSTRG(current_filter_illegal_substchar); - } else { - cp = 0x3f; } } From 2a3c08b834e731592699b8dfc23590dbd92e1cf8 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sat, 21 Feb 2015 20:48:06 +0900 Subject: [PATCH 16/24] fix php_mb_ord for better handling the value of MBSTRG(current_filter_illegal_substchar) --- ext/mbstring/mbstring.c | 51 ++++++++++++++++++++-------------- ext/mbstring/tests/mb_chr.phpt | 23 +++++++++++++-- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index a2ec16f7060e..b7be8d5ead53 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4518,7 +4518,7 @@ PHP_FUNCTION(mb_get_info) } /* }}} */ -static inline zend_bool php_mb_check_encoding(const char *input, size_t length, const char *enc) +static inline int php_mb_check_encoding(const char *input, size_t length, const char *enc) { const mbfl_encoding *encoding = MBSTRG(current_internal_encoding); mbfl_buffer_converter *convd; @@ -4533,7 +4533,7 @@ static inline zend_bool php_mb_check_encoding(const char *input, size_t length, encoding = mbfl_name2encoding(enc); if (!encoding || encoding == &mbfl_encoding_pass) { php_error_docref(NULL, E_WARNING, "Invalid encoding \"%s\"", enc); - return false; + return 0; } } @@ -4541,7 +4541,7 @@ static inline zend_bool php_mb_check_encoding(const char *input, size_t length, if (convd == NULL) { php_error_docref(NULL, E_WARNING, "Unable to create converter"); - return false; + return 0; } mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE); @@ -4560,13 +4560,13 @@ static inline zend_bool php_mb_check_encoding(const char *input, size_t length, if (ret != NULL) { if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { - return true; + return 1; } mbfl_string_clear(&result); } - return false; + return 0; } /* {{{ proto bool mb_check_encoding([string var[, string encoding]]) @@ -4590,7 +4590,7 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ -static inline zend_bool php_mb_check_unsupported_encoding(enum mbfl_no_encoding no_enc) +static inline int php_mb_check_unsupported_encoding(enum mbfl_no_encoding no_enc) { if (no_enc == mbfl_no_encoding_pass || no_enc == mbfl_no_encoding_auto @@ -4617,13 +4617,13 @@ static inline zend_bool php_mb_check_unsupported_encoding(enum mbfl_no_encoding || no_enc == mbfl_no_encoding_cp50221 || no_enc == mbfl_no_encoding_cp50222 ) { - return true; + return 1; } - return false; + return 0; } -static inline zend_bool php_mb_check_unicode_encoding(enum mbfl_no_encoding no_enc) +static inline int php_mb_check_unicode_encoding(enum mbfl_no_encoding no_enc) { if (no_enc == mbfl_no_encoding_utf8 || no_enc == mbfl_no_encoding_utf8_docomo @@ -4643,10 +4643,24 @@ static inline zend_bool php_mb_check_unicode_encoding(enum mbfl_no_encoding no_e || no_enc == mbfl_no_encoding_utf16be || no_enc == mbfl_no_encoding_utf16le ) { - return true; + return 1; } - return false; + return 0; +} + +static inline int php_mb_check_utf8_encoding(enum mbfl_no_encoding no_enc) +{ + if (no_enc == mbfl_no_encoding_utf8 + || no_enc == mbfl_no_encoding_utf8_docomo + || no_enc == mbfl_no_encoding_utf8_kddi_a + || no_enc == mbfl_no_encoding_utf8_kddi_b + || no_enc == mbfl_no_encoding_utf8_sb + ) { + return 1; + } + + return 0; } static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) @@ -4773,18 +4787,12 @@ static inline char* php_mb_chr(long cp, const char* enc) } - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - || no_enc == mbfl_no_encoding_utf8_sb - ) { - if (cp > 0xd7ff && 0xe000 > cp) { + if (php_mb_check_utf8_encoding(no_enc)) { + if (cp > 0xd7ff && 0xe000 > cp) { if (php_mb_check_unicode_encoding(MBSTRG(current_internal_encoding)->no_encoding)) { - if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff && - 0xe000 > MBSTRG(current_filter_illegal_substchar) - ) { + + if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff && 0xe000 > MBSTRG(current_filter_illegal_substchar)) { cp = 0x3f; } else { cp = MBSTRG(current_filter_illegal_substchar); @@ -4795,6 +4803,7 @@ static inline char* php_mb_chr(long cp, const char* enc) } } + } buf = zend_string_alloc(4, 0); diff --git a/ext/mbstring/tests/mb_chr.phpt b/ext/mbstring/tests/mb_chr.phpt index 8edc8a229cac..7047d1c2de97 100644 --- a/ext/mbstring/tests/mb_chr.phpt +++ b/ext/mbstring/tests/mb_chr.phpt @@ -6,11 +6,30 @@ mb_chr() --EXPECT-- bool(true) bool(true) +bool(true) +bool(true) +bool(true) bool(true) \ No newline at end of file From bdb7a59a4b4902a2baaf82531430672ee5ca3927 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sat, 21 Feb 2015 22:58:34 +0900 Subject: [PATCH 17/24] fix memory leak --- ext/mbstring/mbstring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index b7be8d5ead53..1ca53b67b53d 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4877,7 +4877,8 @@ PHP_FUNCTION(mb_chr) RETURN_FALSE; } - RETURN_STRING(ret); + RETVAL_STRING(ret); + efree(ret); } /* }}} */ From 84100270fb2167a749bfce75b13a24d31eb5eb39 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sat, 21 Feb 2015 23:39:59 +0900 Subject: [PATCH 18/24] fix memory leak in php_mb_check_encoding --- ext/mbstring/mbstring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 1ca53b67b53d..a6fedd32ff97 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4560,6 +4560,7 @@ static inline int php_mb_check_encoding(const char *input, size_t length, const if (ret != NULL) { if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) { + mbfl_string_clear(&result); return 1; } From 29d09b7d978871996c890ab4b85b156ae0d00050 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Fri, 27 Feb 2015 19:11:16 +0900 Subject: [PATCH 19/24] update the functions for checking the names of encodings --- ext/mbstring/mbstring.c | 154 ++++++++++++++++++++++++---------------- 1 file changed, 92 insertions(+), 62 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index a6fedd32ff97..2679a7c73a82 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4591,74 +4591,104 @@ PHP_FUNCTION(mb_check_encoding) } /* }}} */ -static inline int php_mb_check_unsupported_encoding(enum mbfl_no_encoding no_enc) +static const enum mbfl_no_encoding php_mb_unsupported_no_encoding_list[] = { + mbfl_no_encoding_pass, + mbfl_no_encoding_auto, + mbfl_no_encoding_wchar, + mbfl_no_encoding_byte2be, + mbfl_no_encoding_byte2le, + mbfl_no_encoding_byte4be, + mbfl_no_encoding_byte4le, + mbfl_no_encoding_base64, + mbfl_no_encoding_uuencode, + mbfl_no_encoding_html_ent, + mbfl_no_encoding_qprint, + mbfl_no_encoding_utf7, + mbfl_no_encoding_utf7imap, + mbfl_no_encoding_2022kr, + mbfl_no_encoding_jis, + mbfl_no_encoding_2022jp, + mbfl_no_encoding_2022jpms, + mbfl_no_encoding_jis_ms, + mbfl_no_encoding_2022jp_2004, + mbfl_no_encoding_2022jp_kddi, + mbfl_no_encoding_cp50220, + mbfl_no_encoding_cp50220raw, + mbfl_no_encoding_cp50221, + mbfl_no_encoding_cp50222 +}; + +static inline int php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc) { - if (no_enc == mbfl_no_encoding_pass - || no_enc == mbfl_no_encoding_auto - || no_enc == mbfl_no_encoding_wchar - || no_enc == mbfl_no_encoding_byte2be - || no_enc == mbfl_no_encoding_byte2le - || no_enc == mbfl_no_encoding_byte4be - || no_enc == mbfl_no_encoding_byte4le - || no_enc == mbfl_no_encoding_base64 - || no_enc == mbfl_no_encoding_uuencode - || no_enc == mbfl_no_encoding_html_ent - || no_enc == mbfl_no_encoding_qprint - || no_enc == mbfl_no_encoding_utf7 - || no_enc == mbfl_no_encoding_utf7imap - || no_enc == mbfl_no_encoding_2022kr - || no_enc == mbfl_no_encoding_jis - || no_enc == mbfl_no_encoding_2022jp - || no_enc == mbfl_no_encoding_2022jpms - || no_enc == mbfl_no_encoding_jis_ms - || no_enc == mbfl_no_encoding_2022jp_2004 - || no_enc == mbfl_no_encoding_2022jp_kddi - || no_enc == mbfl_no_encoding_cp50220 - || no_enc == mbfl_no_encoding_cp50220raw - || no_enc == mbfl_no_encoding_cp50221 - || no_enc == mbfl_no_encoding_cp50222 - ) { - return 1; + int i; + int size = sizeof(php_mb_unsupported_no_encoding_list)/sizeof(php_mb_unsupported_no_encoding_list[0]); + + for (i = 0; i < size; i++) { + + if (no_enc == php_mb_unsupported_no_encoding_list[i]) { + return 1; + } + } return 0; } -static inline int php_mb_check_unicode_encoding(enum mbfl_no_encoding no_enc) +static const enum mbfl_no_encoding php_mb_no_encoding_unicode_list[] = { + mbfl_no_encoding_utf8, + mbfl_no_encoding_utf8_docomo, + mbfl_no_encoding_utf8_kddi_a, + mbfl_no_encoding_utf8_kddi_b, + mbfl_no_encoding_utf8_sb, + mbfl_no_encoding_ucs4, + mbfl_no_encoding_ucs4be, + mbfl_no_encoding_ucs4le, + mbfl_no_encoding_utf32, + mbfl_no_encoding_utf32be, + mbfl_no_encoding_utf32le, + mbfl_no_encoding_ucs2, + mbfl_no_encoding_ucs2be, + mbfl_no_encoding_ucs2le, + mbfl_no_encoding_utf16, + mbfl_no_encoding_utf16be, + mbfl_no_encoding_utf16le +}; + +static inline int php_mb_is_no_encoding_unicode(enum mbfl_no_encoding no_enc) { - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - || no_enc == mbfl_no_encoding_utf8_sb - || no_enc == mbfl_no_encoding_ucs4 - || no_enc == mbfl_no_encoding_ucs4be - || no_enc == mbfl_no_encoding_ucs4le - || no_enc == mbfl_no_encoding_utf32 - || no_enc == mbfl_no_encoding_utf32be - || no_enc == mbfl_no_encoding_utf32le - || no_enc == mbfl_no_encoding_ucs2 - || no_enc == mbfl_no_encoding_ucs2be - || no_enc == mbfl_no_encoding_ucs2le - || no_enc == mbfl_no_encoding_utf16 - || no_enc == mbfl_no_encoding_utf16be - || no_enc == mbfl_no_encoding_utf16le - ) { - return 1; + int i; + int size = sizeof(php_mb_no_encoding_unicode_list)/sizeof(php_mb_no_encoding_unicode_list[0]); + + for (i = 0; i < size; i++) { + + if (no_enc == php_mb_no_encoding_unicode_list[i]) { + return 1; + } + } return 0; } -static inline int php_mb_check_utf8_encoding(enum mbfl_no_encoding no_enc) +static const enum mbfl_no_encoding php_mb_no_encoding_utf8_list[] = { + mbfl_no_encoding_utf8, + mbfl_no_encoding_utf8_docomo, + mbfl_no_encoding_utf8_kddi_a, + mbfl_no_encoding_utf8_kddi_b, + mbfl_no_encoding_utf8_sb +}; + +static inline int php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc) { - if (no_enc == mbfl_no_encoding_utf8 - || no_enc == mbfl_no_encoding_utf8_docomo - || no_enc == mbfl_no_encoding_utf8_kddi_a - || no_enc == mbfl_no_encoding_utf8_kddi_b - || no_enc == mbfl_no_encoding_utf8_sb - ) { - return 1; + int i; + int size = sizeof(php_mb_no_encoding_utf8_list)/sizeof(php_mb_no_encoding_utf8_list[0]); + + for (i = 0; i < size; i++) { + + if (no_enc == php_mb_no_encoding_utf8_list[i]) { + return 1; + } + } return 0; @@ -4684,7 +4714,7 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) } } - if (php_mb_check_unicode_encoding(no_enc)) { + if (php_mb_is_no_encoding_unicode(no_enc)) { ret = php_mb_convert_encoding(str, str_len, "UCS-4BE", enc, &ret_len); @@ -4701,7 +4731,7 @@ static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) return cp; - } else if (php_mb_check_unsupported_encoding(no_enc)) { + } else if (php_mb_is_unsupported_no_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return -1; } @@ -4776,11 +4806,11 @@ static inline char* php_mb_chr(long cp, const char* enc) } } - if (php_mb_check_unicode_encoding(no_enc)) { + if (php_mb_is_no_encoding_unicode(no_enc)) { if (0 > cp || 0x10ffff < cp) { - if (php_mb_check_unicode_encoding(MBSTRG(current_internal_encoding)->no_encoding)) { + if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { cp = MBSTRG(current_filter_illegal_substchar); } else { cp = 0x3f; @@ -4788,10 +4818,10 @@ static inline char* php_mb_chr(long cp, const char* enc) } - if (php_mb_check_utf8_encoding(no_enc)) { + if (php_mb_is_no_encoding_utf8(no_enc)) { if (cp > 0xd7ff && 0xe000 > cp) { - if (php_mb_check_unicode_encoding(MBSTRG(current_internal_encoding)->no_encoding)) { + if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff && 0xe000 > MBSTRG(current_filter_illegal_substchar)) { cp = 0x3f; @@ -4818,7 +4848,7 @@ static inline char* php_mb_chr(long cp, const char* enc) zend_string_release(buf); return ret; - } else if (php_mb_check_unsupported_encoding(no_enc)) { + } else if (php_mb_is_unsupported_no_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return NULL; } From 70e241f1a32f8e30ca4aac47e7e9a781d9c42eb1 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sun, 1 Mar 2015 15:05:23 +0900 Subject: [PATCH 20/24] replace zend_string_alloc with safe_emalloc --- ext/mbstring/mbstring.c | 64 ++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 2679a7c73a82..a6c2575f0bfd 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4792,7 +4792,8 @@ PHP_FUNCTION(mb_ord) static inline char* php_mb_chr(long cp, const char* enc) { enum mbfl_no_encoding no_enc; - zend_string *buf; + char* buf; + size_t buf_len; char* ret; size_t ret_len; @@ -4837,15 +4838,16 @@ static inline char* php_mb_chr(long cp, const char* enc) } - buf = zend_string_alloc(4, 0); - buf->val[0] = (cp >> 24) & 0xff; - buf->val[1] = (cp >> 16) & 0xff; - buf->val[2] = (cp >> 8) & 0xff; - buf->val[3] = cp & 0xff; - buf->val[4] = 0; + buf_len = 4; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = (cp >> 24) & 0xff; + buf[1] = (cp >> 16) & 0xff; + buf[2] = (cp >> 8) & 0xff; + buf[3] = cp & 0xff; + buf[4] = 0; - ret = php_mb_convert_encoding(buf->val, buf->len, enc, "UCS-4BE", &ret_len); - zend_string_release(buf); + ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len); + efree(buf); return ret; } else if (php_mb_is_unsupported_no_encoding(no_enc)) { @@ -4862,31 +4864,35 @@ static inline char* php_mb_chr(long cp, const char* enc) } if (cp < 0x100) { - buf = zend_string_alloc(1, 0); - buf->val[0] = cp; - buf->val[1] = 0; + buf_len = 1; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp; + buf[1] = 0; } else if (cp < 0x10000) { - buf = zend_string_alloc(2, 0); - buf->val[0] = cp >> 8; - buf->val[1] = cp & 0xff; - buf->val[2] = 0; + buf_len = 2; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 8; + buf[1] = cp & 0xff; + buf[2] = 0; } else if (cp < 0x1000000) { - buf = zend_string_alloc(3, 0); - buf->val[0] = cp >> 16; - buf->val[1] = (cp >> 8) & 0xff; - buf->val[2] = cp & 0xff; - buf->val[3] = 0; + buf_len = 3; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 16; + buf[1] = (cp >> 8) & 0xff; + buf[2] = cp & 0xff; + buf[3] = 0; } else { - buf = zend_string_alloc(4, 0); - buf->val[0] = cp >> 24; - buf->val[1] = (cp >> 16) & 0xff; - buf->val[2] = (cp >> 8) & 0xff; - buf->val[3] = cp & 0xff; - buf->val[4] = 0; + buf_len = 4; + buf = (char *) safe_emalloc(buf_len, 1, 1); + buf[0] = cp >> 24; + buf[1] = (cp >> 16) & 0xff; + buf[2] = (cp >> 8) & 0xff; + buf[3] = cp & 0xff; + buf[4] = 0; } - ret = php_mb_convert_encoding(buf->val, buf->len, enc, enc, &ret_len); - zend_string_release(buf); + ret = php_mb_convert_encoding(buf, buf_len, enc, enc, &ret_len); + efree(buf); return ret; } From 905dfb9179a570b51469febda825a52a529d6793 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sun, 1 Mar 2015 15:26:42 +0900 Subject: [PATCH 21/24] add argument for output_len --- ext/mbstring/mbstring.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index a6c2575f0bfd..c17fc6555fb2 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4789,7 +4789,7 @@ PHP_FUNCTION(mb_ord) } /* }}} */ -static inline char* php_mb_chr(long cp, const char* enc) +static inline char* php_mb_chr(long cp, const char* enc, size_t *output_len) { enum mbfl_no_encoding no_enc; char* buf; @@ -4849,6 +4849,10 @@ static inline char* php_mb_chr(long cp, const char* enc) ret = php_mb_convert_encoding(buf, buf_len, enc, "UCS-4BE", &ret_len); efree(buf); + if (output_len) { + *output_len = ret_len; + } + return ret; } else if (php_mb_is_unsupported_no_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); @@ -4894,6 +4898,10 @@ static inline char* php_mb_chr(long cp, const char* enc) ret = php_mb_convert_encoding(buf, buf_len, enc, enc, &ret_len); efree(buf); + if (output_len) { + *output_len = ret_len; + } + return ret; } /* {{{ proto bool mb_ord([int cp[, string encoding]]) */ @@ -4903,12 +4911,13 @@ PHP_FUNCTION(mb_chr) char* enc = NULL; long enc_len; char* ret; - + size_t ret_len; + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l|s", &cp, &enc, &enc_len) == FAILURE) { return; } - ret = php_mb_chr(cp, enc); + ret = php_mb_chr(cp, enc, &ret_len); if (ret == NULL) { RETURN_FALSE; From d24dacc594cd82ebb6ac984a91ffbcd3ace2a602 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sun, 1 Mar 2015 16:01:06 +0900 Subject: [PATCH 22/24] replace AND operator with OR operator --- ext/mbstring/mbstring.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index c17fc6555fb2..25982bdb162b 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4817,23 +4817,18 @@ static inline char* php_mb_chr(long cp, const char* enc, size_t *output_len) cp = 0x3f; } - } - - if (php_mb_is_no_encoding_utf8(no_enc)) { + } else if (php_mb_is_no_encoding_utf8(no_enc)) { if (cp > 0xd7ff && 0xe000 > cp) { if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { - - if (MBSTRG(current_filter_illegal_substchar) > 0xd7ff && 0xe000 > MBSTRG(current_filter_illegal_substchar)) { - cp = 0x3f; - } else { + if (0xd800 > MBSTRG(current_filter_illegal_substchar) || MBSTRG(current_filter_illegal_substchar) > 0xdfff) { cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; } - } else { cp = 0x3f; } - } } @@ -4854,6 +4849,7 @@ static inline char* php_mb_chr(long cp, const char* enc, size_t *output_len) } return ret; + } else if (php_mb_is_unsupported_no_encoding(no_enc)) { php_error_docref(NULL, E_WARNING, "Unsupported encoding \"%s\"", enc); return NULL; From b8468adad6370a3363284f5f2bbfbf4eba7ebeb4 Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Mon, 2 Mar 2015 20:09:35 +0900 Subject: [PATCH 23/24] introduce fast zpp --- ext/mbstring/mbstring.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 25982bdb162b..95ee0f0d9e80 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4694,14 +4694,14 @@ static inline int php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc) return 0; } -static inline long php_mb_ord(const char* str, size_t str_len, const char* enc) +static inline zend_long php_mb_ord(const char* str, size_t str_len, const char* enc) { enum mbfl_no_encoding no_enc; char* ret; size_t ret_len; const mbfl_encoding *encoding; unsigned char char_len; - long cp; + zend_long cp; if (enc == NULL) { no_enc = MBSTRG(current_internal_encoding)->no_encoding; @@ -4773,11 +4773,19 @@ PHP_FUNCTION(mb_ord) size_t str_len; char* enc = NULL; size_t enc_len; - long cp; + zend_long cp; +#ifndef FAST_ZPP if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|s", &str, &str_len, &enc, &enc_len) == FAILURE) { return; } +#else + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_STRING(str, str_len) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) + ZEND_PARSE_PARAMETERS_END(); +#endif cp = php_mb_ord(str, str_len, enc); @@ -4789,7 +4797,7 @@ PHP_FUNCTION(mb_ord) } /* }}} */ -static inline char* php_mb_chr(long cp, const char* enc, size_t *output_len) +static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len) { enum mbfl_no_encoding no_enc; char* buf; @@ -4903,15 +4911,23 @@ static inline char* php_mb_chr(long cp, const char* enc, size_t *output_len) /* {{{ proto bool mb_ord([int cp[, string encoding]]) */ PHP_FUNCTION(mb_chr) { - long cp; + zend_long cp; char* enc = NULL; - long enc_len; + size_t enc_len; char* ret; size_t ret_len; +#ifndef FAST_ZPP if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l|s", &cp, &enc, &enc_len) == FAILURE) { return; } +#else + ZEND_PARSE_PARAMETERS_START(1, 2) + Z_PARAM_LONG(cp) + Z_PARAM_OPTIONAL + Z_PARAM_STRING(enc, enc_len) + ZEND_PARSE_PARAMETERS_END(); +#endif ret = php_mb_chr(cp, enc, &ret_len); From 15e32fdc58d7570138590d078c007ccf64a34f2e Mon Sep 17 00:00:00 2001 From: Masaki Kagaya Date: Sun, 8 Mar 2015 02:17:17 +0900 Subject: [PATCH 24/24] generate utf-8 string directly instead of using php_mb_convert_encoding --- ext/mbstring/mbstring.c | 65 ++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 14 deletions(-) diff --git a/ext/mbstring/mbstring.c b/ext/mbstring/mbstring.c index 95ee0f0d9e80..6cf91c094b6c 100644 --- a/ext/mbstring/mbstring.c +++ b/ext/mbstring/mbstring.c @@ -4815,28 +4815,65 @@ static inline char* php_mb_chr(zend_long cp, const char* enc, size_t *output_len } } - if (php_mb_is_no_encoding_unicode(no_enc)) { - if (0 > cp || 0x10ffff < cp) { + if (php_mb_is_no_encoding_utf8(no_enc)) { - if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { + if (0 > cp || cp > 0x10ffff || (cp > 0xd7ff && 0xe000 > cp)) { + if (php_mb_is_no_encoding_utf8(MBSTRG(current_internal_encoding)->no_encoding)) { cp = MBSTRG(current_filter_illegal_substchar); + } else if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { + if (0xd800 > MBSTRG(current_filter_illegal_substchar) || MBSTRG(current_filter_illegal_substchar) > 0xdfff) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; + } } else { cp = 0x3f; } + } - } else if (php_mb_is_no_encoding_utf8(no_enc)) { + if (cp < 0x80) { + ret_len = 1; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = cp; + ret[1] = 0; + } else if (cp < 0x800) { + ret_len = 2; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xc0 | (cp >> 6); + ret[1] = 0x80 | (cp & 0x3f); + ret[2] = 0; + } else if (cp < 0x10000) { + ret_len = 3; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xe0 | (cp >> 12); + ret[1] = 0x80 | ((cp >> 6) & 0x3f); + ret[2] = 0x80 | (cp & 0x3f); + ret[3] = 0; + } else { + ret_len = 4; + ret = (char *) safe_emalloc(ret_len, 1, 1); + ret[0] = 0xf0 | (cp >> 18); + ret[1] = 0x80 | ((cp >> 12) & 0x3f); + ret[2] = 0x80 | ((cp >> 6) & 0x3f); + ret[3] = 0x80 | (cp & 0x3f); + ret[4] = 0; + } - if (cp > 0xd7ff && 0xe000 > cp) { - if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { - if (0xd800 > MBSTRG(current_filter_illegal_substchar) || MBSTRG(current_filter_illegal_substchar) > 0xdfff) { - cp = MBSTRG(current_filter_illegal_substchar); - } else { - cp = 0x3f; - } - } else { - cp = 0x3f; - } + if (output_len) { + *output_len = ret_len; + } + + return ret; + + } else if (php_mb_is_no_encoding_unicode(no_enc)) { + + if (0 > cp || 0x10ffff < cp) { + + if (php_mb_is_no_encoding_unicode(MBSTRG(current_internal_encoding)->no_encoding)) { + cp = MBSTRG(current_filter_illegal_substchar); + } else { + cp = 0x3f; } }