@@ -22,6 +22,10 @@ class Template
22
22
# users will see diamonds with question marks in them in
23
23
# the browser.
24
24
#
25
+ # For the rest of this documentation, when we say "UTF-8",
26
+ # we mean "UTF-8 or whatever the default_internal encoding
27
+ # is set to". By default, it will be UTF-8.
28
+ #
25
29
# To mitigate this problem, we use a few strategies:
26
30
# 1. If the source is not valid UTF-8, we raise an exception
27
31
# when the template is compiled to alert the user
@@ -32,8 +36,7 @@ class Template
32
36
# to the resulting compiled source returned by the
33
37
# template handler.
34
38
# 3. In all cases, we transcode the resulting String to
35
- # the <tt>default_internal</tt> encoding (which defaults
36
- # to UTF-8).
39
+ # the UTF-8.
37
40
#
38
41
# This means that other parts of Rails can always assume
39
42
# that templates are encoded in UTF-8, even if the original
@@ -60,14 +63,14 @@ class Template
60
63
#
61
64
# If you want to provide an alternate mechanism for
62
65
# specifying encodings (like ERB does via <%# encoding: ... %>),
63
- # you may indicate that you are willing to accept
64
- # BINARY data by implementing <tt>self.accepts_binary ?</tt>
66
+ # you may indicate that you will handle encodings yourself
67
+ # by implementing <tt>self.handles_encoding ?</tt>
65
68
# on your handler.
66
69
#
67
- # If you do, Rails will not raise an exception if
68
- # the template's encoding could not be determined,
69
- # assuming that you have another mechanism for
70
- # making the determination .
70
+ # If you do, Rails will not try to encode the String
71
+ # into the default_internal, passing you the unaltered
72
+ # bytes tagged with the assumed encoding (from
73
+ # default_external) .
71
74
#
72
75
# In this case, make sure you return a String from
73
76
# your handler encoded in the default_internal. Since
@@ -171,7 +174,12 @@ def inspect
171
174
# before passing the source on to the template engine, leaving a
172
175
# blank line in its stead.
173
176
#
174
- # Note that after we figure out the correct encoding, we then
177
+ # If the template engine handles encodings, we send the encoded
178
+ # String to the engine without further processing. This allows
179
+ # the template engine to support additional mechanisms for
180
+ # specifying the encoding. For instance, ERB supports <%# encoding: %>
181
+ #
182
+ # Otherwise, after we figure out the correct encoding, we then
175
183
# encode the source into Encoding.default_internal. In general,
176
184
# this means that templates will be UTF-8 inside of Rails,
177
185
# regardless of the original source encoding.
@@ -182,8 +190,11 @@ def compile(locals, view, mod)
182
190
locals_code = locals . keys . map! { |key | "#{ key } = local_assigns[:#{ key } ];" } . join
183
191
184
192
if source . encoding_aware?
193
+ # Look for # encoding: *. If we find one, we'll encode the
194
+ # String in that encoding, otherwise, we'll use the
195
+ # default external encoding.
185
196
if source . sub! ( /\A #{ ENCODING_FLAG } / , '' )
186
- encoding = $1
197
+ encoding = magic_encoding = $1
187
198
else
188
199
encoding = Encoding . default_external
189
200
end
@@ -192,34 +203,28 @@ def compile(locals, view, mod)
192
203
# or the encoding specified in the file
193
204
source . force_encoding ( encoding )
194
205
195
- # If the original encoding is BINARY, the actual
196
- # encoding is either stored out-of-band (such as
197
- # in ERB <%# %> style magic comments) or missing.
198
- # This is also true if the original encoding is
199
- # something other than BINARY, but it's invalid.
200
- if source . encoding != Encoding ::BINARY && source . valid_encoding?
206
+ # If the user didn't specify an encoding, and the handler
207
+ # handles encodings, we simply pass the String as is to
208
+ # the handler (with the default_external tag)
209
+ if !magic_encoding && @handler . respond_to? ( :handles_encoding? ) && @handler . handles_encoding?
210
+ source
211
+ # Otherwise, if the String is valid in the encoding,
212
+ # encode immediately to default_internal. This means
213
+ # that if a handler doesn't handle encodings, it will
214
+ # always get Strings in the default_internal
215
+ elsif source . valid_encoding?
201
216
source . encode!
202
- # If the assumed encoding is incorrect, check to
203
- # see whether the handler accepts BINARY. If it
204
- # does, it has another mechanism for determining
205
- # the true encoding of the String.
206
- elsif @handler . respond_to? ( :accepts_binary? ) && @handler . accepts_binary?
207
- source . force_encoding ( Encoding ::BINARY )
208
- # If the handler does not accept BINARY, the
209
- # assumed encoding (either the default_external,
210
- # or the explicit encoding specified by the user)
211
- # is incorrect. We raise an exception here.
217
+ # Otherwise, since the String is invalid in the encoding
218
+ # specified, raise an exception
212
219
else
213
220
raise WrongEncodingError . new ( source , encoding )
214
221
end
215
-
216
- # Don't validate the encoding yet -- the handler
217
- # may treat the String as raw bytes and extract
218
- # the encoding some other way
219
222
end
220
223
221
224
code = @handler . call ( self )
222
225
226
+ # Make sure that the resulting String to be evalled is in the
227
+ # encoding of the code
223
228
source = <<-end_src
224
229
def #{ method_name } (local_assigns)
225
230
_old_virtual_path, @_virtual_path = @_virtual_path, #{ @virtual_path . inspect } ;_old_output_buffer = @output_buffer;#{ locals_code } ;#{ code }
@@ -229,20 +234,16 @@ def #{method_name}(local_assigns)
229
234
end_src
230
235
231
236
if source . encoding_aware?
232
- # Handlers should return their source Strings in either the
233
- # default_internal or BINARY. If the handler returns a BINARY
234
- # String, we assume its encoding is the one we determined
235
- # earlier, and encode the resulting source in the default_internal.
236
- if source . encoding == Encoding ::BINARY
237
- source . force_encoding ( Encoding . default_internal )
238
- end
237
+ # Make sure the source is in the encoding of the returned code
238
+ source . force_encoding ( code . encoding )
239
239
240
240
# In case we get back a String from a handler that is not in
241
241
# BINARY or the default_internal, encode it to the default_internal
242
242
source . encode!
243
243
244
244
# Now, validate that the source we got back from the template
245
- # handler is valid in the default_internal
245
+ # handler is valid in the default_internal. This is for handlers
246
+ # that handle encoding but screw up
246
247
unless source . valid_encoding?
247
248
raise WrongEncodingError . new ( @source , Encoding . default_internal )
248
249
end
0 commit comments