Skip to content

Commit 07cb3e5

Browse files
Digenisdangra
authored andcommitted
encode invalid xpath with unicode_escape under PY2
The exception quotes an xpath string which may be unicode.
1 parent 2c8e573 commit 07cb3e5

File tree

2 files changed

+14
-9
lines changed

2 files changed

+14
-9
lines changed

scrapy/selector/unified.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
"""
44

55
from lxml import etree
6+
import six
67

78
from scrapy.utils.misc import extract_regex
89
from scrapy.utils.trackref import object_ref
@@ -95,7 +96,8 @@ def xpath(self, query):
9596
result = xpathev(query, namespaces=self.namespaces,
9697
smart_strings=self._lxml_smart_strings)
9798
except etree.XPathError:
98-
raise ValueError("Invalid XPath: %s" % query)
99+
msg = u"Invalid XPath: %s" % query
100+
raise ValueError(msg if six.PY3 else msg.encode("unicode_escape"))
99101

100102
if type(result) is not list:
101103
result = [result]

scrapy/tests/test_selector.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import re
22
import warnings
33
import weakref
4+
import six
45
from twisted.trial import unittest
56
from scrapy.exceptions import ScrapyDeprecationWarning
67
from scrapy.http import TextResponse, HtmlResponse, XmlResponse
@@ -188,17 +189,19 @@ def test_selector_over_text(self):
188189
self.assertEqual(xs.xpath('.').extract(), [u'<root>lala</root>'])
189190

190191
def test_invalid_xpath(self):
192+
"Test invalid xpath raises ValueError with the invalid xpath"
191193
response = XmlResponse(url="http://example.com", body="<html></html>")
192194
x = self.sscls(response)
193195
xpath = "//test[@foo='bar]"
194-
try:
195-
x.xpath(xpath)
196-
except ValueError as e:
197-
assert xpath in str(e), "Exception message does not contain invalid xpath"
198-
except Exception:
199-
raise AssertionError("A invalid XPath does not raise ValueError")
200-
else:
201-
raise AssertionError("A invalid XPath does not raise an exception")
196+
self.assertRaisesRegexp(ValueError, re.escape(xpath), x.xpath, xpath)
197+
198+
def test_invalid_xpath_unicode(self):
199+
"Test *Unicode* invalid xpath raises ValueError with the invalid xpath"
200+
response = XmlResponse(url="http://example.com", body="<html></html>")
201+
x = self.sscls(response)
202+
xpath = u"//test[@foo='\u0431ar]"
203+
encoded = xpath if six.PY3 else xpath.encode('unicode_escape')
204+
self.assertRaisesRegexp(ValueError, re.escape(encoded), x.xpath, xpath)
202205

203206
def test_http_header_encoding_precedence(self):
204207
# u'\xa3' = pound symbol in unicode

0 commit comments

Comments
 (0)