Skip to content

Commit da3c155

Browse files
Digenisdangra
authored andcommitted
escape nodename in xmliter regex
1 parent 4418fc3 commit da3c155

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

scrapy/utils/iterators.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,16 +25,18 @@ def xmliter(obj, nodename):
2525
- a unicode string
2626
- a string encoded as utf-8
2727
"""
28-
HEADER_START_RE = re.compile(r'^(.*?)<\s*%s(?:\s|>)' % nodename, re.S)
29-
HEADER_END_RE = re.compile(r'<\s*/%s\s*>' % nodename, re.S)
28+
nodename_patt = re.escape(nodename)
29+
30+
HEADER_START_RE = re.compile(r'^(.*?)<\s*%s(?:\s|>)' % nodename_patt, re.S)
31+
HEADER_END_RE = re.compile(r'<\s*/%s\s*>' % nodename_patt, re.S)
3032
text = _body_or_str(obj)
3133

3234
header_start = re.search(HEADER_START_RE, text)
3335
header_start = header_start.group(1).strip() if header_start else ''
3436
header_end = re_rsearch(HEADER_END_RE, text)
3537
header_end = text[header_end[1]:].strip() if header_end else ''
3638

37-
r = re.compile(r"<%s[\s>].*?</%s>" % (nodename, nodename), re.DOTALL)
39+
r = re.compile(r"<{0}[\s>].*?</{0}>".format(nodename_patt), re.DOTALL)
3840
for match in r.finditer(text):
3941
nodetext = header_start + match.group() + header_end
4042
yield Selector(text=nodetext, type='xml').xpath('//' + nodename)[0]

0 commit comments

Comments
 (0)