Skip to content

Commit 0078df6

Browse files
committed
Update template to allow handlers to more cleanly handle encodings (ht: nex3)
1 parent 19d8c8c commit 0078df6

File tree

4 files changed

+78
-69
lines changed

4 files changed

+78
-69
lines changed

actionpack/lib/action_view/template.rb

Lines changed: 39 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ class Template
2222
# users will see diamonds with question marks in them in
2323
# the browser.
2424
#
25+
# For the rest of this documentation, when we say "UTF-8",
26+
# we mean "UTF-8 or whatever the default_internal encoding
27+
# is set to". By default, it will be UTF-8.
28+
#
2529
# To mitigate this problem, we use a few strategies:
2630
# 1. If the source is not valid UTF-8, we raise an exception
2731
# when the template is compiled to alert the user
@@ -32,8 +36,7 @@ class Template
3236
# to the resulting compiled source returned by the
3337
# template handler.
3438
# 3. In all cases, we transcode the resulting String to
35-
# the <tt>default_internal</tt> encoding (which defaults
36-
# to UTF-8).
39+
# the UTF-8.
3740
#
3841
# This means that other parts of Rails can always assume
3942
# that templates are encoded in UTF-8, even if the original
@@ -60,14 +63,14 @@ class Template
6063
#
6164
# If you want to provide an alternate mechanism for
6265
# specifying encodings (like ERB does via <%# encoding: ... %>),
63-
# you may indicate that you are willing to accept
64-
# BINARY data by implementing <tt>self.accepts_binary?</tt>
66+
# you may indicate that you will handle encodings yourself
67+
# by implementing <tt>self.handles_encoding?</tt>
6568
# on your handler.
6669
#
67-
# If you do, Rails will not raise an exception if
68-
# the template's encoding could not be determined,
69-
# assuming that you have another mechanism for
70-
# making the determination.
70+
# If you do, Rails will not try to encode the String
71+
# into the default_internal, passing you the unaltered
72+
# bytes tagged with the assumed encoding (from
73+
# default_external).
7174
#
7275
# In this case, make sure you return a String from
7376
# your handler encoded in the default_internal. Since
@@ -171,7 +174,12 @@ def inspect
171174
# before passing the source on to the template engine, leaving a
172175
# blank line in its stead.
173176
#
174-
# Note that after we figure out the correct encoding, we then
177+
# If the template engine handles encodings, we send the encoded
178+
# String to the engine without further processing. This allows
179+
# the template engine to support additional mechanisms for
180+
# specifying the encoding. For instance, ERB supports <%# encoding: %>
181+
#
182+
# Otherwise, after we figure out the correct encoding, we then
175183
# encode the source into Encoding.default_internal. In general,
176184
# this means that templates will be UTF-8 inside of Rails,
177185
# regardless of the original source encoding.
@@ -182,8 +190,11 @@ def compile(locals, view, mod)
182190
locals_code = locals.keys.map! { |key| "#{key} = local_assigns[:#{key}];" }.join
183191

184192
if source.encoding_aware?
193+
# Look for # encoding: *. If we find one, we'll encode the
194+
# String in that encoding, otherwise, we'll use the
195+
# default external encoding.
185196
if source.sub!(/\A#{ENCODING_FLAG}/, '')
186-
encoding = $1
197+
encoding = magic_encoding = $1
187198
else
188199
encoding = Encoding.default_external
189200
end
@@ -192,34 +203,28 @@ def compile(locals, view, mod)
192203
# or the encoding specified in the file
193204
source.force_encoding(encoding)
194205

195-
# If the original encoding is BINARY, the actual
196-
# encoding is either stored out-of-band (such as
197-
# in ERB <%# %> style magic comments) or missing.
198-
# This is also true if the original encoding is
199-
# something other than BINARY, but it's invalid.
200-
if source.encoding != Encoding::BINARY && source.valid_encoding?
206+
# If the user didn't specify an encoding, and the handler
207+
# handles encodings, we simply pass the String as is to
208+
# the handler (with the default_external tag)
209+
if !magic_encoding && @handler.respond_to?(:handles_encoding?) && @handler.handles_encoding?
210+
source
211+
# Otherwise, if the String is valid in the encoding,
212+
# encode immediately to default_internal. This means
213+
# that if a handler doesn't handle encodings, it will
214+
# always get Strings in the default_internal
215+
elsif source.valid_encoding?
201216
source.encode!
202-
# If the assumed encoding is incorrect, check to
203-
# see whether the handler accepts BINARY. If it
204-
# does, it has another mechanism for determining
205-
# the true encoding of the String.
206-
elsif @handler.respond_to?(:accepts_binary?) && @handler.accepts_binary?
207-
source.force_encoding(Encoding::BINARY)
208-
# If the handler does not accept BINARY, the
209-
# assumed encoding (either the default_external,
210-
# or the explicit encoding specified by the user)
211-
# is incorrect. We raise an exception here.
217+
# Otherwise, since the String is invalid in the encoding
218+
# specified, raise an exception
212219
else
213220
raise WrongEncodingError.new(source, encoding)
214221
end
215-
216-
# Don't validate the encoding yet -- the handler
217-
# may treat the String as raw bytes and extract
218-
# the encoding some other way
219222
end
220223

221224
code = @handler.call(self)
222225

226+
# Make sure that the resulting String to be evalled is in the
227+
# encoding of the code
223228
source = <<-end_src
224229
def #{method_name}(local_assigns)
225230
_old_virtual_path, @_virtual_path = @_virtual_path, #{@virtual_path.inspect};_old_output_buffer = @output_buffer;#{locals_code};#{code}
@@ -229,20 +234,16 @@ def #{method_name}(local_assigns)
229234
end_src
230235

231236
if source.encoding_aware?
232-
# Handlers should return their source Strings in either the
233-
# default_internal or BINARY. If the handler returns a BINARY
234-
# String, we assume its encoding is the one we determined
235-
# earlier, and encode the resulting source in the default_internal.
236-
if source.encoding == Encoding::BINARY
237-
source.force_encoding(Encoding.default_internal)
238-
end
237+
# Make sure the source is in the encoding of the returned code
238+
source.force_encoding(code.encoding)
239239

240240
# In case we get back a String from a handler that is not in
241241
# BINARY or the default_internal, encode it to the default_internal
242242
source.encode!
243243

244244
# Now, validate that the source we got back from the template
245-
# handler is valid in the default_internal
245+
# handler is valid in the default_internal. This is for handlers
246+
# that handle encoding but screw up
246247
unless source.valid_encoding?
247248
raise WrongEncodingError.new(@source, Encoding.default_internal)
248249
end

actionpack/lib/action_view/template/error.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ def initialize(string, encoding)
1313
end
1414

1515
def message
16+
@string.force_encoding("BINARY")
1617
"Your template was not saved as valid #{@encoding}. Please " \
1718
"either specify #{@encoding} as the encoding for your template " \
1819
"in your text editor, or mark the template with its " \

actionpack/lib/action_view/template/handlers/erb.rb

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -79,51 +79,49 @@ class ERB < Handler
7979

8080
ENCODING_TAG = Regexp.new("\\A(<%#{ENCODING_FLAG}-?%>)[ \\t]*")
8181

82-
def self.accepts_binary?
82+
def self.handles_encoding?
8383
true
8484
end
8585

8686
def compile(template)
8787
if template.source.encoding_aware?
88-
# Even though Rails has given us a String tagged with the
89-
# default_internal encoding (likely UTF-8), it is possible
90-
# that the String is actually encoded using a different
91-
# encoding, specified via an ERB magic comment. If the
92-
# String is not actually UTF-8, the regular expression
93-
# engine will (correctly) raise an exception. For now,
94-
# we'll reset the String to BINARY so we can run regular
95-
# expressions against it
88+
# First, convert to BINARY, so in case the encoding is
89+
# wrong, we can still find an encoding tag
90+
# (<%# encoding %>) inside the String using a regular
91+
# expression
9692
template_source = template.source.dup.force_encoding("BINARY")
9793

98-
# Erubis does not have direct support for encodings.
99-
# As a result, we will extract the ERB-style magic
100-
# comment, give the String to Erubis as BINARY data,
101-
# and then tag the resulting String with the extracted
102-
# encoding later
10394
erb = template_source.gsub(ENCODING_TAG, '')
10495
encoding = $2
10596

106-
if !encoding && (template.source.encoding == Encoding::BINARY)
107-
raise WrongEncodingError.new(template_source, Encoding.default_external)
108-
end
97+
erb.force_encoding valid_encoding(template.source.dup, encoding)
98+
99+
# Always make sure we return a String in the default_internal
100+
erb.encode!
109101
else
110102
erb = template.source.dup
111103
end
112104

113-
result = self.class.erb_implementation.new(
105+
self.class.erb_implementation.new(
114106
erb,
115107
:trim => (self.class.erb_trim_mode == "-")
116108
).src
109+
end
110+
111+
private
112+
def valid_encoding(string, encoding)
113+
# If a magic encoding comment was found, tag the
114+
# String with this encoding. This is for a case
115+
# where the original String was assumed to be,
116+
# for instance, UTF-8, but a magic comment
117+
# proved otherwise
118+
string.force_encoding(encoding) if encoding
119+
120+
# If the String is valid, return the encoding we found
121+
return string.encoding if string.valid_encoding?
117122

118-
# If an encoding tag was found, tag the String
119-
# we're returning with that encoding. Otherwise,
120-
# return a BINARY String, which is what ERB
121-
# returns. Note that if a magic comment was
122-
# not specified, we will return the data to
123-
# Rails as BINARY, which will then use its
124-
# own encoding logic to create a UTF-8 String.
125-
result = "\n#{result}".force_encoding(encoding).encode if encoding
126-
result
123+
# Otherwise, raise an exception
124+
raise WrongEncodingError.new(string, string.encoding)
127125
end
128126
end
129127
end

actionpack/test/template/template_test.rb

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,10 +114,12 @@ def test_lying_with_magic_comment
114114
end
115115

116116
def test_encoding_can_be_specified_with_magic_comment_in_erb
117-
@template = new_template("<%# encoding: ISO-8859-1 %>hello \xFCmlat")
118-
result = render
119-
assert_equal Encoding::UTF_8, render.encoding
120-
assert_equal "hello \u{fc}mlat", render
117+
with_external_encoding Encoding::UTF_8 do
118+
@template = new_template("<%# encoding: ISO-8859-1 %>hello \xFCmlat")
119+
result = render
120+
assert_equal Encoding::UTF_8, render.encoding
121+
assert_equal "hello \u{fc}mlat", render
122+
end
121123
end
122124

123125
def test_error_when_template_isnt_valid_utf8
@@ -126,5 +128,12 @@ def test_error_when_template_isnt_valid_utf8
126128
render
127129
end
128130
end
131+
132+
def with_external_encoding(encoding)
133+
old, Encoding.default_external = Encoding.default_external, encoding
134+
yield
135+
ensure
136+
Encoding.default_external = old
137+
end
129138
end
130139
end

0 commit comments

Comments
 (0)