Skip to content

Commit c7ae8ce

Browse files
committed
Merge pull request brianmario#482 from brianmario/error-encoding
Ensure error messages are always valid UTF-8
2 parents 3751d8b + cd089e9 commit c7ae8ce

File tree

7 files changed

+255
-163
lines changed

7 files changed

+255
-163
lines changed

.travis.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,6 @@ script:
1515
before_install:
1616
- gem update --system 2.1.11
1717
- gem --version
18+
matrix:
19+
allow_failures:
20+
- rvm: rbx

ext/mysql2/client.c

Lines changed: 6 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
VALUE cMysql2Client;
1313
extern VALUE mMysql2, cMysql2Error;
1414
static VALUE sym_id, sym_version, sym_async, sym_symbolize_keys, sym_as, sym_array, sym_stream;
15-
static ID intern_merge, intern_merge_bang, intern_error_number_eql, intern_sql_state_eql;
15+
static ID intern_merge, intern_merge_bang, intern_error_number_eql, intern_sql_state_eql, intern_server_version;
1616

1717
#ifndef HAVE_RB_HASH_DUP
1818
static VALUE rb_hash_dup(VALUE other) {
@@ -125,26 +125,13 @@ static VALUE rb_raise_mysql2_error(mysql_client_wrapper *wrapper) {
125125
VALUE rb_error_msg = rb_str_new2(mysql_error(wrapper->client));
126126
VALUE rb_sql_state = rb_tainted_str_new2(mysql_sqlstate(wrapper->client));
127127
VALUE e;
128+
128129
#ifdef HAVE_RUBY_ENCODING_H
129-
if (wrapper->server_version < 50500) {
130-
/* MySQL < 5.5 uses mixed encoding, just call it binary. */
131-
int err_enc = rb_ascii8bit_encindex();
132-
rb_enc_associate_index(rb_error_msg, err_enc);
133-
rb_enc_associate_index(rb_sql_state, err_enc);
134-
} else {
135-
/* MySQL >= 5.5 uses UTF-8 errors internally and converts them to the connection encoding. */
136-
rb_encoding *default_internal_enc = rb_default_internal_encoding();
137-
rb_encoding *conn_enc = rb_to_encoding(wrapper->encoding);
138-
rb_enc_associate(rb_error_msg, conn_enc);
139-
rb_enc_associate(rb_sql_state, conn_enc);
140-
if (default_internal_enc) {
141-
rb_error_msg = rb_str_export_to_enc(rb_error_msg, default_internal_enc);
142-
rb_sql_state = rb_str_export_to_enc(rb_sql_state, default_internal_enc);
143-
}
144-
}
130+
rb_enc_associate(rb_error_msg, rb_utf8_encoding());
131+
rb_enc_associate(rb_sql_state, rb_usascii_encoding());
145132
#endif
146133

147-
e = rb_exc_new3(cMysql2Error, rb_error_msg);
134+
e = rb_funcall(cMysql2Error, rb_intern("new"), 2, rb_error_msg, LONG2FIX(wrapper->server_version));
148135
rb_funcall(e, intern_error_number_eql, 1, UINT2NUM(mysql_errno(wrapper->client)));
149136
rb_funcall(e, intern_sql_state_eql, 1, rb_sql_state);
150137
rb_exc_raise(e);
@@ -1221,6 +1208,7 @@ void init_mysql2_client() {
12211208
intern_merge_bang = rb_intern("merge!");
12221209
intern_error_number_eql = rb_intern("error_number=");
12231210
intern_sql_state_eql = rb_intern("sql_state=");
1211+
intern_server_version = rb_intern("server_version=");
12241212

12251213
#ifdef CLIENT_LONG_PASSWORD
12261214
rb_const_set(cMysql2Client, rb_intern("LONG_PASSWORD"),

lib/mysql2/error.rb

Lines changed: 72 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,81 @@
1+
# encoding: UTF-8
2+
13
module Mysql2
24
class Error < StandardError
3-
attr_accessor :error_number, :sql_state
5+
REPLACEMENT_CHAR = '?'
6+
ENCODE_OPTS = {:undef => :replace, :invalid => :replace, :replace => REPLACEMENT_CHAR}
47

5-
def initialize msg
6-
super
7-
@error_number = nil
8-
@sql_state = nil
9-
end
8+
attr_accessor :error_number, :sql_state
9+
attr_writer :server_version
1010

1111
# Mysql gem compatibility
1212
alias_method :errno, :error_number
1313
alias_method :error, :message
14+
15+
def initialize(msg, server_version=nil)
16+
self.server_version = server_version
17+
18+
super(clean_message(msg))
19+
end
20+
21+
if "".respond_to? :encode
22+
def sql_state=(state)
23+
@sql_state = state.encode(ENCODE_OPTS)
24+
end
25+
end
26+
27+
private
28+
29+
# In MySQL 5.5+ error messages are always constructed server-side as UTF-8
30+
# then returned in the encoding set by the `character_set_results` system
31+
# variable.
32+
#
33+
# See http://dev.mysql.com/doc/refman/5.5/en/charset-errors.html for
34+
# more contetx.
35+
#
36+
# Before MySQL 5.5 error message template strings are in whatever encoding
37+
# is associated with the error message language.
38+
# See http://dev.mysql.com/doc/refman/5.1/en/error-message-language.html
39+
# for more information.
40+
#
41+
# The issue is that the user-data inserted in the message could potentially
42+
# be in any encoding MySQL supports and is insert into the latin1, euckr or
43+
# koi8r string raw. Meaning there's a high probability the string will be
44+
# corrupt encoding-wise.
45+
#
46+
# See http://dev.mysql.com/doc/refman/5.1/en/charset-errors.html for
47+
# more information.
48+
#
49+
# So in an attempt to make sure the error message string is always in a valid
50+
# encoding, we'll assume UTF-8 and clean the string of anything that's not a
51+
# valid UTF-8 character.
52+
#
53+
# Except for if we're on 1.8, where we'll do nothing ;)
54+
#
55+
# Returns a valid UTF-8 string in Ruby 1.9+, the original string on Ruby 1.8
56+
def clean_message(message)
57+
return message if !message.respond_to?(:encoding)
58+
59+
if @server_version && @server_version > 50500
60+
message.encode(ENCODE_OPTS)
61+
else
62+
if message.respond_to? :scrub
63+
message.scrub(REPLACEMENT_CHAR).encode(ENCODE_OPTS)
64+
else
65+
# This is ugly as hell but Ruby 1.9 doesn't provide a way to clean a string
66+
# and retain it's valid UTF-8 characters, that I know of.
67+
68+
new_message = "".force_encoding(Encoding::UTF_8)
69+
message.chars.each do |char|
70+
if char.valid_encoding?
71+
new_message << char
72+
else
73+
new_message << REPLACEMENT_CHAR
74+
end
75+
end
76+
new_message.encode(ENCODE_OPTS)
77+
end
78+
end
79+
end
1480
end
1581
end

spec/mysql2/client_spec.rb

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77

88
it "should not raise an exception for valid defaults group" do
99
lambda {
10-
@client = Mysql2::Client.new(:default_file => cnf_file, :default_group => "test")
10+
opts = DatabaseCredentials['root'].merge(:default_file => cnf_file, :default_group => "test")
11+
@client = Mysql2::Client.new(opts)
1112
}.should_not raise_error(Mysql2::Error)
1213
end
1314

1415
it "should not raise an exception without default group" do
1516
lambda {
16-
@client = Mysql2::Client.new(:default_file => cnf_file)
17+
@client = Mysql2::Client.new(DatabaseCredentials['root'].merge(:default_file => cnf_file))
1718
}.should_not raise_error(Mysql2::Error)
1819
end
1920
end
@@ -655,18 +656,22 @@ def connect *args
655656
if defined? Encoding
656657
context "strings returned by #info" do
657658
it "should default to the connection's encoding if Encoding.default_internal is nil" do
658-
Encoding.default_internal = nil
659-
@client.info[:version].encoding.should eql(Encoding.find('utf-8'))
659+
with_internal_encoding nil do
660+
@client.info[:version].encoding.should eql(Encoding.find('utf-8'))
660661

661-
client2 = Mysql2::Client.new(DatabaseCredentials['root'].merge(:encoding => 'ascii'))
662-
client2.info[:version].encoding.should eql(Encoding.find('us-ascii'))
662+
client2 = Mysql2::Client.new(DatabaseCredentials['root'].merge(:encoding => 'ascii'))
663+
client2.info[:version].encoding.should eql(Encoding.find('us-ascii'))
664+
end
663665
end
664666

665667
it "should use Encoding.default_internal" do
666-
Encoding.default_internal = Encoding.find('utf-8')
667-
@client.info[:version].encoding.should eql(Encoding.default_internal)
668-
Encoding.default_internal = Encoding.find('us-ascii')
669-
@client.info[:version].encoding.should eql(Encoding.default_internal)
668+
with_internal_encoding 'utf-8' do
669+
@client.info[:version].encoding.should eql(Encoding.default_internal)
670+
end
671+
672+
with_internal_encoding 'us-ascii' do
673+
@client.info[:version].encoding.should eql(Encoding.default_internal)
674+
end
670675
end
671676
end
672677
end
@@ -694,18 +699,22 @@ def connect *args
694699
if defined? Encoding
695700
context "strings returned by #server_info" do
696701
it "should default to the connection's encoding if Encoding.default_internal is nil" do
697-
Encoding.default_internal = nil
698-
@client.server_info[:version].encoding.should eql(Encoding.find('utf-8'))
702+
with_internal_encoding nil do
703+
@client.server_info[:version].encoding.should eql(Encoding.find('utf-8'))
699704

700-
client2 = Mysql2::Client.new(DatabaseCredentials['root'].merge(:encoding => 'ascii'))
701-
client2.server_info[:version].encoding.should eql(Encoding.find('us-ascii'))
705+
client2 = Mysql2::Client.new(DatabaseCredentials['root'].merge(:encoding => 'ascii'))
706+
client2.server_info[:version].encoding.should eql(Encoding.find('us-ascii'))
707+
end
702708
end
703709

704710
it "should use Encoding.default_internal" do
705-
Encoding.default_internal = Encoding.find('utf-8')
706-
@client.server_info[:version].encoding.should eql(Encoding.default_internal)
707-
Encoding.default_internal = Encoding.find('us-ascii')
708-
@client.server_info[:version].encoding.should eql(Encoding.default_internal)
711+
with_internal_encoding 'utf-8' do
712+
@client.server_info[:version].encoding.should eql(Encoding.default_internal)
713+
end
714+
715+
with_internal_encoding 'us-ascii' do
716+
@client.server_info[:version].encoding.should eql(Encoding.default_internal)
717+
end
709718
end
710719
end
711720
end

spec/mysql2/error_spec.rb

Lines changed: 58 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,86 +1,83 @@
11
# encoding: UTF-8
2-
require 'spec_helper'
3-
4-
# The matrix of error encoding tests:
5-
# ('Enc = X' means 'Encoding.default_internal = X')
6-
# MySQL < 5.5 MySQL >= 5.5
7-
# Ruby 1.8 N/A N/A
8-
# Ruby 1.9+
9-
# Enc = nil
10-
# :enc = nil BINARY UTF-8
11-
#
12-
# Enc = XYZ
13-
# :enc = XYZ BINARY XYZ
14-
#
15-
# Enc = FOO
16-
# :enc = BAR BINARY FOO
17-
#
182

3+
require 'spec_helper'
194

205
describe Mysql2::Error do
21-
shared_examples "mysql2 error" do
6+
let(:client) { Mysql2::Client.new(DatabaseCredentials['root']) }
7+
8+
let :error do
229
begin
23-
err_client = Mysql2::Client.new(DatabaseCredentials['root'])
24-
err_client.query("HAHAHA")
10+
client.query("HAHAHA")
2511
rescue Mysql2::Error => e
2612
error = e
2713
ensure
28-
err_client.close
14+
client.close
2915
end
3016

31-
subject { error }
32-
it { should respond_to(:error_number) }
33-
it { should respond_to(:sql_state) }
17+
error
18+
end
19+
20+
it "responds to error_number and sql_state, with aliases" do
21+
error.should respond_to(:error_number)
22+
error.should respond_to(:sql_state)
3423

3524
# Mysql gem compatibility
36-
it { should respond_to(:errno) }
37-
it { should respond_to(:error) }
25+
error.should respond_to(:errno)
26+
error.should respond_to(:error)
3827
end
3928

40-
shared_examples "mysql2 error encoding" do |db_enc, def_enc, err_enc|
41-
Encoding.default_internal = def_enc
29+
if "".respond_to? :encoding
30+
let :error do
31+
client = Mysql2::Client.new(DatabaseCredentials['root'])
32+
begin
33+
client.query("\xE9\x80\xA0\xE5\xAD\x97")
34+
rescue Mysql2::Error => e
35+
error = e
36+
ensure
37+
client.close
38+
end
4239

43-
begin
44-
err_client = Mysql2::Client.new(DatabaseCredentials['root'].merge(:encoding => db_enc))
45-
err_client.query("造字")
46-
rescue Mysql2::Error => e
47-
error = e
48-
ensure
49-
err_client.close
40+
error
5041
end
5142

52-
subject { error.message.encoding }
53-
it "#message should transcode from #{db_enc.inspect} to #{err_enc}" do should eql(err_enc) end
43+
let :bad_err do
44+
client = Mysql2::Client.new(DatabaseCredentials['root'])
45+
begin
46+
client.query("\xE5\xC6\x7D\x1F")
47+
rescue Mysql2::Error => e
48+
error = e
49+
ensure
50+
client.close
51+
end
5452

55-
subject { error.error.encoding }
56-
it "#error should transcode from #{db_enc.inspect} to #{err_enc}" do should eql(err_enc) end
53+
error
54+
end
5755

58-
subject { error.sql_state.encoding }
59-
it "#sql_state should transcode from #{db_enc.inspect} to #{err_enc}" do should eql(err_enc) end
60-
end
56+
it "returns error messages as UTF-8 by default" do
57+
with_internal_encoding nil do
58+
error.message.encoding.should eql(Encoding::UTF_8)
59+
error.message.valid_encoding?
6160

62-
shared_examples "mysql2 error encoding (MySQL < 5.5)" do |db_enc, def_enc, err_enc|
63-
include_examples "mysql2 error encoding", db_enc, def_enc, err_enc
64-
end
61+
bad_err.message.encoding.should eql(Encoding::UTF_8)
62+
bad_err.message.valid_encoding?
6563

66-
shared_examples "mysql2 error encoding (MySQL >= 5.5)" do |db_enc, def_enc, err_enc|
67-
include_examples "mysql2 error encoding", db_enc, def_enc, err_enc
68-
end
64+
bad_err.message.should include("??}\u001F")
65+
end
66+
end
67+
68+
it "returns sql state as ASCII" do
69+
error.sql_state.encoding.should eql(Encoding::US_ASCII)
70+
error.sql_state.valid_encoding?
71+
end
72+
73+
it "returns error messages and sql state in Encoding.default_internal if set" do
74+
with_internal_encoding 'UTF-16LE' do
75+
error.message.encoding.should eql(Encoding.default_internal)
76+
error.message.valid_encoding?
6977

70-
it_behaves_like "mysql2 error"
71-
72-
unless RUBY_VERSION =~ /1.8/
73-
mysql_ver = Mysql2::Client.new(DatabaseCredentials['root']).server_info[:id]
74-
if mysql_ver < 50505
75-
it_behaves_like "mysql2 error encoding (MySQL < 5.5)", nil, nil, Encoding::ASCII_8BIT
76-
it_behaves_like "mysql2 error encoding (MySQL < 5.5)", 'utf8', Encoding::UTF_8, Encoding::ASCII_8BIT
77-
it_behaves_like "mysql2 error encoding (MySQL < 5.5)", 'big5', Encoding::Big5, Encoding::ASCII_8BIT
78-
it_behaves_like "mysql2 error encoding (MySQL < 5.5)", 'big5', Encoding::US_ASCII, Encoding::ASCII_8BIT
79-
else
80-
it_behaves_like "mysql2 error encoding (MySQL >= 5.5)", nil, nil, Encoding::UTF_8
81-
it_behaves_like "mysql2 error encoding (MySQL >= 5.5)", 'utf8', Encoding::UTF_8, Encoding::UTF_8
82-
it_behaves_like "mysql2 error encoding (MySQL >= 5.5)", 'big5', Encoding::Big5, Encoding::Big5
83-
it_behaves_like "mysql2 error encoding (MySQL >= 5.5)", 'big5', Encoding::US_ASCII, Encoding::US_ASCII
78+
bad_err.message.encoding.should eql(Encoding.default_internal)
79+
bad_err.message.valid_encoding?
80+
end
8481
end
8582
end
8683
end

0 commit comments

Comments
 (0)