Skip to content

Commit 0cb68af

Browse files
committed
meta tag attributes for content-type http equiv can be in any order. scrapy#123
1 parent 4d17048 commit 0cb68af

File tree

2 files changed

+11
-2
lines changed

2 files changed

+11
-2
lines changed

scrapy/http/response/html.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class HtmlResponse(TextResponse):
1818
_content_re = _template % ('content', r'(?P<mime>[^;]+);\s*charset=(?P<charset>[\w-]+)')
1919
_content2_re = _template % ('charset', r'(?P<charset>[\w-]+)')
2020

21-
METATAG_RE = re.compile(r'<meta\s+%s\s+%s' % (_httpequiv_re, _content_re), re.I)
21+
METATAG_RE = re.compile(r'<meta(?:\s+(?:%s|%s)){2}' % (_httpequiv_re, _content_re), re.I)
2222
METATAG2_RE = re.compile(r'<meta\s+%s' % _content2_re, re.I)
2323

2424
@memoizemethod_noargs

scrapy/tests/test_http_response.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,6 @@ class HtmlResponseTest(TextResponseTest):
226226
response_class = HtmlResponse
227227

228228
def test_html_encoding(self):
229-
230229
body = """<html><head><title>Some page</title><meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
231230
</head><body>Price: \xa3100</body></html>'
232231
"""
@@ -257,6 +256,16 @@ def test_html5_meta_charset(self):
257256
r1 = self.response_class("http://www.example.com", body=body)
258257
self._assert_response_values(r1, 'gb2312', body)
259258

259+
def test_httpequiv_meta(self):
260+
body = '''<html><head><meta content="text/html; charset=gb18030" http-equiv="Content-Type" /></head></html>'''
261+
response = self.response_class('http://example.com', body=body)
262+
self._assert_response_values(response, 'gb18030', body)
263+
264+
body = '''<html><head><meta http-equiv="Content-Type" content="text/html; charset=gb18030" /></head></html>'''
265+
response = self.response_class('http://example.com', body=body)
266+
self._assert_response_values(response, 'gb18030', body)
267+
268+
260269

261270
class XmlResponseTest(TextResponseTest):
262271

0 commit comments

Comments
 (0)