File tree Expand file tree Collapse file tree 4 files changed +34
-3
lines changed Expand file tree Collapse file tree 4 files changed +34
-3
lines changed Original file line number Diff line number Diff line change 15
15
16
16
__all__ = ['Selector' , 'SelectorList' ]
17
17
18
+
19
+ class SafeXMLParser (etree .XMLParser ):
20
+ def __init__ (self , * args , ** kwargs ):
21
+ kwargs .setdefault ('resolve_entities' , False )
22
+ super (SafeXMLParser , self ).__init__ (* args , ** kwargs )
23
+
18
24
_ctgroup = {
19
25
'html' : {'_parser' : etree .HTMLParser ,
20
26
'_csstranslator' : ScrapyHTMLTranslator (),
21
27
'_tostring_method' : 'html' },
22
- 'xml' : {'_parser' : etree . XMLParser ,
28
+ 'xml' : {'_parser' : SafeXMLParser ,
23
29
'_csstranslator' : ScrapyGenericTranslator (),
24
30
'_tostring_method' : 'xml' },
25
31
}
Original file line number Diff line number Diff line change @@ -332,6 +332,16 @@ class SmartStringsSelector(Selector):
332
332
div_class = x .xpath ('//div/@class' )
333
333
self .assertTrue (all (map (lambda e : hasattr (e ._root , 'getparent' ), div_class )))
334
334
335
+ def test_xml_entity_expansion (self ):
336
+ malicious_xml = '<?xml version="1.0" encoding="ISO-8859-1"?>' \
337
+ '<!DOCTYPE foo [ <!ELEMENT foo ANY > <!ENTITY xxe SYSTEM ' \
338
+ '"file:///etc/passwd" >]><foo>&xxe;</foo>'
339
+
340
+ response = XmlResponse ('http://example.com' , body = malicious_xml )
341
+ sel = self .sscls (response = response )
342
+
343
+ self .assertEqual (sel .extract (), '<foo>&xxe;</foo>' )
344
+
335
345
336
346
class DeprecatedXpathSelectorTest (unittest .TestCase ):
337
347
Original file line number Diff line number Diff line change @@ -188,13 +188,28 @@ def test_alternate(self):
188
188
<xhtml:link rel="alternate" hreflang="en"/><!-- wrong tag without href -->
189
189
</url>
190
190
</urlset>""" )
191
-
191
+
192
192
self .assertEqual (list (s ), [
193
193
{'loc' : 'http://www.example.com/english/' ,
194
194
'alternate' : ['http://www.example.com/deutsch/' , 'http://www.example.com/schweiz-deutsch/' , 'http://www.example.com/english/' ]
195
195
}
196
196
])
197
197
198
+ def test_xml_entity_expansion (self ):
199
+ s = Sitemap ("""<?xml version="1.0" encoding="utf-8"?>
200
+ <!DOCTYPE foo [
201
+ <!ELEMENT foo ANY >
202
+ <!ENTITY xxe SYSTEM "file:///etc/passwd" >
203
+ ]>
204
+ <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
205
+ <url>
206
+ <loc>http://127.0.0.1:8000/&xxe;</loc>
207
+ </url>
208
+ </urlset>
209
+ """ )
210
+
211
+ self .assertEqual (list (s ), [{'loc' : 'http://127.0.0.1:8000/' }])
212
+
198
213
199
214
if __name__ == '__main__' :
200
215
unittest .main ()
Original file line number Diff line number Diff line change @@ -12,7 +12,7 @@ class Sitemap(object):
12
12
(type=sitemapindex) files"""
13
13
14
14
def __init__ (self , xmltext ):
15
- xmlp = lxml .etree .XMLParser (recover = True , remove_comments = True )
15
+ xmlp = lxml .etree .XMLParser (recover = True , remove_comments = True , resolve_entities = False )
16
16
self ._root = lxml .etree .fromstring (xmltext , parser = xmlp )
17
17
rt = self ._root .tag
18
18
self .type = self ._root .tag .split ('}' , 1 )[1 ] if '}' in rt else rt
You can’t perform that action at this time.
0 commit comments