Skip to content

PHP 5.6 RFC default encoding #568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 39 additions & 5 deletions ext/iconv/iconv.c
Original file line number Diff line number Diff line change
Expand Up @@ -220,21 +220,55 @@ static char _generic_superset_name[] = ICONV_UCS4_ENCODING;
#define GENERIC_SUPERSET_NBYTES 4
/* }}} */

static PHP_INI_MH(OnUpdateStringIconvCharset)

static PHP_INI_MH(OnUpdateInputEncoding)
{
if (new_value_length >= ICONV_CSNMAXLEN) {
return FAILURE;
}
if (new_value_length) {
OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
} else {
OnUpdateString(entry, PG(input_encoding), strlen(PG(input_encoding))+1, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
}
return SUCCESS;
}


static PHP_INI_MH(OnUpdateOutputEncoding)
{
if(new_value_length >= ICONV_CSNMAXLEN) {
return FAILURE;
}
OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
if (new_value_length) {
OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
} else {
OnUpdateString(entry, PG(output_encoding), strlen(PG(output_encoding))+1, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
}
return SUCCESS;
}


static PHP_INI_MH(OnUpdateInternalEncoding)
{
if(new_value_length >= ICONV_CSNMAXLEN) {
return FAILURE;
}
if (new_value_length) {
OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
} else {
OnUpdateString(entry, PG(internal_encoding), strlen(PG(internal_encoding))+1, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
}
return SUCCESS;
}


/* {{{ PHP_INI
*/
PHP_INI_BEGIN()
STD_PHP_INI_ENTRY("iconv.input_encoding", ICONV_INPUT_ENCODING, PHP_INI_ALL, OnUpdateStringIconvCharset, input_encoding, zend_iconv_globals, iconv_globals)
STD_PHP_INI_ENTRY("iconv.output_encoding", ICONV_OUTPUT_ENCODING, PHP_INI_ALL, OnUpdateStringIconvCharset, output_encoding, zend_iconv_globals, iconv_globals)
STD_PHP_INI_ENTRY("iconv.internal_encoding", ICONV_INTERNAL_ENCODING, PHP_INI_ALL, OnUpdateStringIconvCharset, internal_encoding, zend_iconv_globals, iconv_globals)
STD_PHP_INI_ENTRY("iconv.input_encoding", "", PHP_INI_ALL, OnUpdateInputEncoding, input_encoding, zend_iconv_globals, iconv_globals)
STD_PHP_INI_ENTRY("iconv.output_encoding", "", PHP_INI_ALL, OnUpdateOutputEncoding, output_encoding, zend_iconv_globals, iconv_globals)
STD_PHP_INI_ENTRY("iconv.internal_encoding", "", PHP_INI_ALL, OnUpdateInternalEncoding, internal_encoding, zend_iconv_globals, iconv_globals)
PHP_INI_END()
/* }}} */

Expand Down
6 changes: 0 additions & 6 deletions ext/iconv/php_iconv.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,9 @@ ZEND_END_MODULE_GLOBALS(iconv)
#endif

#ifdef HAVE_IBM_ICONV
# define ICONV_INPUT_ENCODING "ISO8859-1"
# define ICONV_OUTPUT_ENCODING "ISO8859-1"
# define ICONV_INTERNAL_ENCODING "ISO8859-1"
# define ICONV_ASCII_ENCODING "IBM-850"
# define ICONV_UCS4_ENCODING "UCS-4"
#else
# define ICONV_INPUT_ENCODING "ISO-8859-1"
# define ICONV_OUTPUT_ENCODING "ISO-8859-1"
# define ICONV_INTERNAL_ENCODING "ISO-8859-1"
# define ICONV_ASCII_ENCODING "ASCII"
# define ICONV_UCS4_ENCODING "UCS-4LE"
#endif
Expand Down
68 changes: 68 additions & 0 deletions ext/iconv/tests/iconv_ini_encoding.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
--TEST--
Encoding INI test
--SKIPIF--
<?php extension_loaded('iconv') or die('skip mbstring not available'); ?>
--INI--
default_charset=ISO-8859-1
internal_encoding=
input_encoding=
output_encoding=
iconv.internal_encoding=ISO-8859-1
iconv.http_input=ISO-8859-1
iconv.http_output=ISO-8859-1
--FILE--
<?php
echo "Getting INI\n";
var_dump(ini_get('default_charset'));
var_dump(ini_get('internal_encoding'));
var_dump(ini_get('input_encoding'));
var_dump(ini_get('output_encoding'));

var_dump(ini_get('iconv.internal_encoding'));
var_dump(ini_get('iconv.input_encoding'));
var_dump(ini_get('iconv.output_encoding'));

echo "Setting INI\n";
var_dump(ini_set('default_charset', 'UTF-8'));
var_dump(ini_set('internal_encoding', 'UTF-8'));
var_dump(ini_set('input_encoding', 'UTF-8'));
var_dump(ini_set('output_encoding', 'UTF-8'));
var_dump(ini_set('iconv.internal_encoding', 'UTF-8'));
var_dump(ini_set('iconv.input_encoding', 'UTF-8'));
var_dump(ini_set('iconv.output_encoding', 'UTF-8'));

echo "Getting INI\n";
var_dump(ini_get('default_charset'));
var_dump(ini_get('internal_encoding'));
var_dump(ini_get('input_encoding'));
var_dump(ini_get('output_encoding'));

var_dump(ini_get('iconv.internal_encoding'));
var_dump(ini_get('iconv.input_encoding'));
var_dump(ini_get('iconv.output_encoding'));

--EXPECT--
Getting INI
string(10) "ISO-8859-1"
string(0) ""
string(0) ""
string(0) ""
string(10) "ISO-8859-1"
string(0) ""
string(0) ""
Setting INI
string(10) "ISO-8859-1"
string(0) ""
string(0) ""
string(0) ""
string(10) "ISO-8859-1"
string(0) ""
string(0) ""
Getting INI
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"
54 changes: 27 additions & 27 deletions ext/iconv/tests/iconv_set_encoding_variation.phpt
Original file line number Diff line number Diff line change
Expand Up @@ -183,17 +183,17 @@ string(3) "0.5"
bool(true)
bool(true)
bool(true)
string(0) ""
string(0) ""
string(0) ""
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"

-- Iteration 11 --
bool(true)
bool(true)
bool(true)
string(0) ""
string(0) ""
string(0) ""
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"

-- Iteration 12 --
bool(true)
Expand All @@ -207,9 +207,9 @@ string(1) "1"
bool(true)
bool(true)
bool(true)
string(0) ""
string(0) ""
string(0) ""
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"

-- Iteration 14 --
bool(true)
Expand All @@ -223,25 +223,25 @@ string(1) "1"
bool(true)
bool(true)
bool(true)
string(0) ""
string(0) ""
string(0) ""
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"

-- Iteration 16 --
bool(true)
bool(true)
bool(true)
string(0) ""
string(0) ""
string(0) ""
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"

-- Iteration 17 --
bool(true)
bool(true)
bool(true)
string(0) ""
string(0) ""
string(0) ""
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"

-- Iteration 18 --
bool(true)
Expand Down Expand Up @@ -279,17 +279,17 @@ string(5) "UTF-8"
bool(true)
bool(true)
bool(true)
string(0) ""
string(0) ""
string(0) ""
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"

-- Iteration 23 --
bool(true)
bool(true)
bool(true)
string(0) ""
string(0) ""
string(0) ""
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"

-- Iteration 24 --

Expand All @@ -301,7 +301,7 @@ NULL

Warning: iconv_set_encoding() expects parameter 2 to be string, resource given in %s on line %d
NULL
string(0) ""
string(0) ""
string(0) ""
string(5) "UTF-8"
string(5) "UTF-8"
string(5) "UTF-8"
Done
85 changes: 34 additions & 51 deletions ext/mbstring/mbstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -1236,6 +1236,11 @@ static PHP_INI_MH(OnUpdate_mbstring_http_input)
if (MBSTRG(http_input_list)) {
pefree(MBSTRG(http_input_list), 1);
}
if (SUCCESS == php_mb_parse_encoding_list(PG(input_encoding), strlen(PG(input_encoding))+1, &list, &size, 1 TSRMLS_CC)) {
MBSTRG(http_input_list) = list;
MBSTRG(http_input_list_size) = 0;
return SUCCESS;
}
MBSTRG(http_input_list) = NULL;
MBSTRG(http_input_list_size) = 0;
return SUCCESS;
Expand All @@ -1261,18 +1266,20 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output)
const mbfl_encoding *encoding;

if (new_value == NULL || new_value_length == 0) {
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
return SUCCESS;
}

encoding = mbfl_name2encoding(new_value);
if (!encoding) {
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
return FAILURE;
encoding = mbfl_name2encoding(PG(output_encoding));
if (!encoding) {
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Raising error might be better, since this code would pass any encoding when invalid encoding name is provided.

MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
return SUCCESS;
}
} else {
encoding = mbfl_name2encoding(new_value);
if (!encoding) {
MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Save as above.

MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
return FAILURE;
}
}

MBSTRG(http_output_encoding) = encoding;
MBSTRG(current_http_output_encoding) = encoding;
return SUCCESS;
Expand All @@ -1285,47 +1292,17 @@ int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_v
const mbfl_encoding *encoding;

if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
switch (MBSTRG(language)) {
case mbfl_no_language_uni:
encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
break;
case mbfl_no_language_japanese:
encoding = mbfl_no2encoding(mbfl_no_encoding_euc_jp);
break;
case mbfl_no_language_korean:
encoding = mbfl_no2encoding(mbfl_no_encoding_euc_kr);
break;
case mbfl_no_language_simplified_chinese:
encoding = mbfl_no2encoding(mbfl_no_encoding_euc_cn);
break;
case mbfl_no_language_traditional_chinese:
encoding = mbfl_no2encoding(mbfl_no_encoding_euc_tw);
break;
case mbfl_no_language_russian:
encoding = mbfl_no2encoding(mbfl_no_encoding_koi8r);
break;
case mbfl_no_language_german:
encoding = mbfl_no2encoding(mbfl_no_encoding_8859_15);
break;
case mbfl_no_language_armenian:
encoding = mbfl_no2encoding(mbfl_no_encoding_armscii8);
break;
case mbfl_no_language_turkish:
encoding = mbfl_no2encoding(mbfl_no_encoding_8859_9);
break;
default:
encoding = mbfl_no2encoding(mbfl_no_encoding_8859_1);
break;
}
}
/* falls back to UTF-8 if an unkown encoding name is given */
encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
}
MBSTRG(internal_encoding) = encoding;
MBSTRG(current_internal_encoding) = encoding;
#if HAVE_MBREGEX
{
const char *enc_name = new_value;
if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
/* falls back to EUC-JP if an unknown encoding name is given */
enc_name = "EUC-JP";
/* falls back to UTF-8 if an unknown encoding name is given */
enc_name = "UTF-8";
php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
}
php_mb_regex_set_mbctype(new_value TSRMLS_CC);
Expand All @@ -1343,7 +1320,11 @@ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
}
if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
|| stage == PHP_INI_STAGE_RUNTIME) {
return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
if (new_value_length) {
return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
} else {
return _php_mb_ini_mbstring_internal_encoding_set(PG(internal_encoding), strlen(PG(internal_encoding))+1 TSRMLS_CC);
}
} else {
/* the corresponding mbstring globals needs to be set according to the
* ini value in the later stage because it never falls back to the
Expand Down Expand Up @@ -1450,8 +1431,8 @@ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
PHP_INI_BEGIN()
PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
Expand Down Expand Up @@ -2162,8 +2143,10 @@ PHP_FUNCTION(mb_output_handler)

/* feed the string */
mbfl_string_init(&string);
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
/* these are not needed. convd has encoding info.
string.no_language = MBSTRG(language);
string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
*/
string.val = (unsigned char *)arg_string;
string.len = arg_string_len;
mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
Expand Down
Loading