@@ -348,18 +348,16 @@ def test_convert_charrefs(self):
348
348
collector = lambda : EventCollectorCharrefs ()
349
349
self .assertTrue (collector ().convert_charrefs )
350
350
charrefs = ['"' , '"' , '"' , '"' , '"' , '"' ]
351
- # check charrefs in the middle of the text/attributes
352
- expected = [('starttag' , 'a' , [('href' , 'foo"zar' )]),
353
- ('data' , 'a"z' ), ('endtag' , 'a' )]
351
+ # check charrefs in the middle of the text
352
+ expected = [('starttag' , 'a' , []), ('data' , 'a"z' ), ('endtag' , 'a' )]
354
353
for charref in charrefs :
355
- self ._run_check ('<a href="https://github.com/foo%7B0%7Dzar" >a{0}z</a>' .format (charref ),
354
+ self ._run_check ('<a>a{0}z</a>' .format (charref ),
356
355
expected , collector = collector ())
357
- # check charrefs at the beginning/end of the text/attributes
358
- expected = [('data' , '"' ),
359
- ('starttag' , 'a' , [('x' , '"' ), ('y' , '"X' ), ('z' , 'X"' )]),
356
+ # check charrefs at the beginning/end of the text
357
+ expected = [('data' , '"' ), ('starttag' , 'a' , []),
360
358
('data' , '"' ), ('endtag' , 'a' ), ('data' , '"' )]
361
359
for charref in charrefs :
362
- self ._run_check ('{0}<a x="{0}" y="{0}X" z="X{0}" >'
360
+ self ._run_check ('{0}<a>'
363
361
'{0}</a>{0}' .format (charref ),
364
362
expected , collector = collector ())
365
363
# check charrefs in <script>/<style> elements
@@ -382,6 +380,35 @@ def test_convert_charrefs(self):
382
380
self ._run_check ('no charrefs here' , [('data' , 'no charrefs here' )],
383
381
collector = collector ())
384
382
383
+ def test_convert_charrefs_in_attribute_values (self ):
384
+ # default value for convert_charrefs is now True
385
+ collector = lambda : EventCollectorCharrefs ()
386
+ self .assertTrue (collector ().convert_charrefs )
387
+
388
+ # always unescape terminated entity refs, numeric and hex char refs:
389
+ # - regardless whether they are at start, middle, end of attribute
390
+ # - or followed by alphanumeric, non-alphanumeric, or equals char
391
+ charrefs = ['¢' , '¢' , '¢' , '¢' , '¢' ]
392
+ expected = [('starttag' , 'a' ,
393
+ [('x' , '¢' ), ('x' , 'z¢' ), ('x' , '¢z' ),
394
+ ('x' , 'z¢z' ), ('x' , '¢ z' ), ('x' , '¢=z' )]),
395
+ ('endtag' , 'a' )]
396
+ for charref in charrefs :
397
+ self ._run_check ('<a x="{0}" x="z{0}" x="{0}z" '
398
+ ' x="z{0}z" x="{0} z" x="{0}=z"></a>'
399
+ .format (charref ), expected , collector = collector ())
400
+
401
+ # only unescape unterminated entity matches if they are not followed by
402
+ # an alphanumeric or an equals sign
403
+ charref = '¢'
404
+ expected = [('starttag' , 'a' ,
405
+ [('x' , '¢' ), ('x' , 'z¢' ), ('x' , '¢z' ),
406
+ ('x' , 'z¢z' ), ('x' , '¢ z' ), ('x' , '¢=z' )]),
407
+ ('endtag' , 'a' )]
408
+ self ._run_check ('<a x="{0}" x="z{0}" x="{0}z" '
409
+ ' x="z{0}z" x="{0} z" x="{0}=z"></a>'
410
+ .format (charref ), expected , collector = collector ())
411
+
385
412
# the remaining tests were for the "tolerant" parser (which is now
386
413
# the default), and check various kind of broken markup
387
414
def test_tolerant_parsing (self ):
0 commit comments