Skip to content

Commit 95e6bd2

Browse files
committed
Merge pull request scrapy#1398 from scrapy/py3-cookies
PY3 port http cookies handling
2 parents cbfb24d + f4fc05c commit 95e6bd2

File tree

6 files changed

+142
-41
lines changed

6 files changed

+142
-41
lines changed

scrapy/downloadermiddlewares/cookies.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from scrapy.exceptions import NotConfigured
77
from scrapy.http import Response
88
from scrapy.http.cookies import CookieJar
9+
from scrapy.utils.python import to_native_str
910

1011
logger = logging.getLogger(__name__)
1112

@@ -52,18 +53,20 @@ def process_response(self, request, response, spider):
5253

5354
def _debug_cookie(self, request, spider):
5455
if self.debug:
55-
cl = request.headers.getlist('Cookie')
56+
cl = [to_native_str(c, errors='replace')
57+
for c in request.headers.getlist('Cookie')]
5658
if cl:
57-
msg = "Sending cookies to: %s" % request + os.linesep
58-
msg += os.linesep.join("Cookie: %s" % c for c in cl)
59+
cookies = "\n".join("Cookie: {}\n".format(c) for c in cl)
60+
msg = "Sending cookies to: {}\n{}".format(request, cookies)
5961
logger.debug(msg, extra={'spider': spider})
6062

6163
def _debug_set_cookie(self, response, spider):
6264
if self.debug:
63-
cl = response.headers.getlist('Set-Cookie')
65+
cl = [to_native_str(c, errors='replace')
66+
for c in response.headers.getlist('Set-Cookie')]
6467
if cl:
65-
msg = "Received cookies from: %s" % response + os.linesep
66-
msg += os.linesep.join("Set-Cookie: %s" % c for c in cl)
68+
cookies = "\n".join("Set-Cookie: {}\n".format(c) for c in cl)
69+
msg = "Received cookies from: {}\n{}".format(response, cookies)
6770
logger.debug(msg, extra={'spider': spider})
6871

6972
def _format_cookie(self, cookie):

scrapy/http/cookies.py

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import time
2-
from cookielib import CookieJar as _CookieJar, DefaultCookiePolicy, IPV4_RE
2+
from six.moves.http_cookiejar import (
3+
CookieJar as _CookieJar, DefaultCookiePolicy, IPV4_RE
4+
)
35
from scrapy.utils.httpobj import urlparse_cached
6+
from scrapy.utils.python import to_native_str
47

58

69
class CookieJar(object):
@@ -97,6 +100,7 @@ def potential_domain_matches(domain):
97100
pass
98101
return matches + ['.' + d for d in matches]
99102

103+
100104
class _DummyLock(object):
101105
def acquire(self):
102106
pass
@@ -133,21 +137,30 @@ def is_unverifiable(self):
133137
"""
134138
return self.request.meta.get('is_unverifiable', False)
135139

140+
# python3 uses request.unverifiable
141+
@property
142+
def unverifiable(self):
143+
return self.is_unverifiable()
144+
136145
def get_origin_req_host(self):
137146
return urlparse_cached(self.request).hostname
138147

139148
def has_header(self, name):
140149
return name in self.request.headers
141150

142151
def get_header(self, name, default=None):
143-
return self.request.headers.get(name, default)
152+
return to_native_str(self.request.headers.get(name, default),
153+
errors='replace')
144154

145155
def header_items(self):
146-
return self.request.headers.items()
156+
return [
157+
(to_native_str(k, errors='replace'),
158+
[to_native_str(x, errors='replace') for x in v])
159+
for k, v in self.request.headers.items()
160+
]
147161

148162
def add_unredirected_header(self, name, value):
149163
self.request.headers.appendlist(name, value)
150-
#print 'add_unredirected_header', self.request.headers
151164

152165

153166
class WrappedResponse(object):
@@ -158,5 +171,9 @@ def __init__(self, response):
158171
def info(self):
159172
return self
160173

161-
def getheaders(self, name):
162-
return self.response.headers.getlist(name)
174+
# python3 cookiejars calls get_all
175+
def get_all(self, name, default=None):
176+
return [to_native_str(v, errors='replace')
177+
for v in self.response.headers.getlist(name)]
178+
# python2 cookiejars calls getheaders
179+
getheaders = get_all

scrapy/mail.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
from email import encoders as Encoders
2121

2222
from twisted.internet import defer, reactor, ssl
23-
from twisted.mail.smtp import ESMTPSenderFactory
2423

2524
logger = logging.getLogger(__name__)
2625

@@ -102,6 +101,8 @@ def _sent_failed(self, failure, to, cc, subject, nattachs):
102101
'mailattachs': nattachs, 'mailerr': errstr})
103102

104103
def _sendmail(self, to_addrs, msg):
104+
# Import twisted.mail here because it is not available in python3
105+
from twisted.mail.smtp import ESMTPSenderFactory
105106
msg = StringIO(msg)
106107
d = defer.Deferred()
107108
factory = ESMTPSenderFactory(self.smtpuser, self.smtppass, self.mailfrom, \

tests/py3-ignores.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ tests/test_crawl.py
1111
tests/test_crawler.py
1212
tests/test_downloader_handlers.py
1313
tests/test_downloadermiddleware_ajaxcrawlable.py
14-
tests/test_downloadermiddleware_cookies.py
1514
tests/test_downloadermiddleware_defaultheaders.py
1615
tests/test_downloadermiddleware_downloadtimeout.py
1716
tests/test_downloadermiddleware_httpauth.py
@@ -24,7 +23,6 @@ tests/test_downloadermiddleware_retry.py
2423
tests/test_downloadermiddleware_stats.py
2524
tests/test_downloadermiddleware_useragent.py
2625
tests/test_engine.py
27-
tests/test_http_cookies.py
2826
tests/test_logformatter.py
2927
tests/test_mail.py
3028
tests/test_pipeline_files.py
@@ -51,7 +49,6 @@ scrapy/xlib/tx/endpoints.py
5149
scrapy/xlib/tx/client.py
5250
scrapy/xlib/tx/_newclient.py
5351
scrapy/xlib/tx/__init__.py
54-
scrapy/http/cookies.py
5552
scrapy/core/downloader/handlers/s3.py
5653
scrapy/core/downloader/handlers/http11.py
5754
scrapy/core/downloader/handlers/http.py

tests/test_downloadermiddleware_cookies.py

Lines changed: 94 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,19 @@
1-
from unittest import TestCase
21
import re
2+
import logging
3+
from unittest import TestCase
4+
from testfixtures import LogCapture
35

46
from scrapy.http import Response, Request
57
from scrapy.spiders import Spider
8+
from scrapy.utils.test import get_crawler
9+
from scrapy.exceptions import NotConfigured
610
from scrapy.downloadermiddlewares.cookies import CookiesMiddleware
711

812

913
class CookiesMiddlewareTest(TestCase):
1014

1115
def assertCookieValEqual(self, first, second, msg=None):
12-
cookievaleq = lambda cv: re.split(';\s*', cv)
16+
cookievaleq = lambda cv: re.split(';\s*', cv.decode('latin1'))
1317
return self.assertEqual(
1418
sorted(cookievaleq(first)),
1519
sorted(cookievaleq(second)), msg)
@@ -22,19 +26,91 @@ def tearDown(self):
2226
del self.mw
2327

2428
def test_basic(self):
25-
headers = {'Set-Cookie': 'C1=value1; path=/'}
2629
req = Request('http://scrapytest.org/')
2730
assert self.mw.process_request(req, self.spider) is None
2831
assert 'Cookie' not in req.headers
2932

33+
headers = {'Set-Cookie': 'C1=value1; path=/'}
3034
res = Response('http://scrapytest.org/', headers=headers)
3135
assert self.mw.process_response(req, res, self.spider) is res
3236

33-
#assert res.cookies
37+
req2 = Request('http://scrapytest.org/sub1/')
38+
assert self.mw.process_request(req2, self.spider) is None
39+
self.assertEquals(req2.headers.get('Cookie'), b"C1=value1")
40+
41+
def test_setting_false_cookies_enabled(self):
42+
self.assertRaises(
43+
NotConfigured,
44+
CookiesMiddleware.from_crawler,
45+
get_crawler(settings_dict={'COOKIES_ENABLED': False})
46+
)
47+
48+
def test_setting_default_cookies_enabled(self):
49+
self.assertIsInstance(
50+
CookiesMiddleware.from_crawler(get_crawler()),
51+
CookiesMiddleware
52+
)
53+
54+
def test_setting_true_cookies_enabled(self):
55+
self.assertIsInstance(
56+
CookiesMiddleware.from_crawler(
57+
get_crawler(settings_dict={'COOKIES_ENABLED': True})
58+
),
59+
CookiesMiddleware
60+
)
61+
62+
def test_setting_enabled_cookies_debug(self):
63+
crawler = get_crawler(settings_dict={'COOKIES_DEBUG': True})
64+
mw = CookiesMiddleware.from_crawler(crawler)
65+
with LogCapture('scrapy.downloadermiddlewares.cookies',
66+
propagate=False,
67+
level=logging.DEBUG) as l:
68+
req = Request('http://scrapytest.org/')
69+
res = Response('http://scrapytest.org/',
70+
headers={'Set-Cookie': 'C1=value1; path=/'})
71+
mw.process_response(req, res, crawler.spider)
72+
req2 = Request('http://scrapytest.org/sub1/')
73+
mw.process_request(req2, crawler.spider)
74+
75+
l.check(
76+
('scrapy.downloadermiddlewares.cookies',
77+
'DEBUG',
78+
'Received cookies from: <200 http://scrapytest.org/>\n'
79+
'Set-Cookie: C1=value1; path=/\n'),
80+
('scrapy.downloadermiddlewares.cookies',
81+
'DEBUG',
82+
'Sending cookies to: <GET http://scrapytest.org/sub1/>\n'
83+
'Cookie: C1=value1\n'),
84+
)
85+
86+
def test_setting_disabled_cookies_debug(self):
87+
crawler = get_crawler(settings_dict={'COOKIES_DEBUG': False})
88+
mw = CookiesMiddleware.from_crawler(crawler)
89+
with LogCapture('scrapy.downloadermiddlewares.cookies',
90+
propagate=False,
91+
level=logging.DEBUG) as l:
92+
req = Request('http://scrapytest.org/')
93+
res = Response('http://scrapytest.org/',
94+
headers={'Set-Cookie': 'C1=value1; path=/'})
95+
mw.process_response(req, res, crawler.spider)
96+
req2 = Request('http://scrapytest.org/sub1/')
97+
mw.process_request(req2, crawler.spider)
98+
99+
l.check()
100+
101+
def test_do_not_break_on_non_utf8_header(self):
102+
req = Request('http://scrapytest.org/')
103+
assert self.mw.process_request(req, self.spider) is None
104+
assert 'Cookie' not in req.headers
105+
106+
headers = {'Set-Cookie': b'C1=in\xa3valid; path=/',
107+
'Other': b'ignore\xa3me'}
108+
res = Response('http://scrapytest.org/', headers=headers)
109+
assert self.mw.process_response(req, res, self.spider) is res
34110

35111
req2 = Request('http://scrapytest.org/sub1/')
36112
assert self.mw.process_request(req2, self.spider) is None
37-
self.assertEquals(req2.headers.get('Cookie'), "C1=value1")
113+
self.assertIn('Cookie', req2.headers)
38114

39115
def test_dont_merge_cookies(self):
40116
# merge some cookies into jar
@@ -55,12 +131,12 @@ def test_dont_merge_cookies(self):
55131
# check that cookies are merged back
56132
req = Request('http://scrapytest.org/mergeme')
57133
assert self.mw.process_request(req, self.spider) is None
58-
self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
134+
self.assertEquals(req.headers.get('Cookie'), b'C1=value1')
59135

60136
# check that cookies are merged when dont_merge_cookies is passed as 0
61137
req = Request('http://scrapytest.org/mergeme', meta={'dont_merge_cookies': 0})
62138
assert self.mw.process_request(req, self.spider) is None
63-
self.assertEquals(req.headers.get('Cookie'), 'C1=value1')
139+
self.assertEquals(req.headers.get('Cookie'), b'C1=value1')
64140

65141
def test_complex_cookies(self):
66142
# merge some cookies into jar
@@ -76,12 +152,12 @@ def test_complex_cookies(self):
76152
# embed C1 and C3 for scrapytest.org/foo
77153
req = Request('http://scrapytest.org/foo')
78154
self.mw.process_request(req, self.spider)
79-
assert req.headers.get('Cookie') in ('C1=value1; C3=value3', 'C3=value3; C1=value1')
155+
assert req.headers.get('Cookie') in (b'C1=value1; C3=value3', b'C3=value3; C1=value1')
80156

81157
# embed C2 for scrapytest.org/bar
82158
req = Request('http://scrapytest.org/bar')
83159
self.mw.process_request(req, self.spider)
84-
self.assertEquals(req.headers.get('Cookie'), 'C2=value2')
160+
self.assertEquals(req.headers.get('Cookie'), b'C2=value2')
85161

86162
# embed nothing for scrapytest.org/baz
87163
req = Request('http://scrapytest.org/baz')
@@ -91,7 +167,7 @@ def test_complex_cookies(self):
91167
def test_merge_request_cookies(self):
92168
req = Request('http://scrapytest.org/', cookies={'galleta': 'salada'})
93169
assert self.mw.process_request(req, self.spider) is None
94-
self.assertEquals(req.headers.get('Cookie'), 'galleta=salada')
170+
self.assertEquals(req.headers.get('Cookie'), b'galleta=salada')
95171

96172
headers = {'Set-Cookie': 'C1=value1; path=/'}
97173
res = Response('http://scrapytest.org/', headers=headers)
@@ -100,32 +176,32 @@ def test_merge_request_cookies(self):
100176
req2 = Request('http://scrapytest.org/sub1/')
101177
assert self.mw.process_request(req2, self.spider) is None
102178

103-
self.assertCookieValEqual(req2.headers.get('Cookie'), "C1=value1; galleta=salada")
179+
self.assertCookieValEqual(req2.headers.get('Cookie'), b"C1=value1; galleta=salada")
104180

105181
def test_cookiejar_key(self):
106182
req = Request('http://scrapytest.org/', cookies={'galleta': 'salada'}, meta={'cookiejar': "store1"})
107183
assert self.mw.process_request(req, self.spider) is None
108-
self.assertEquals(req.headers.get('Cookie'), 'galleta=salada')
184+
self.assertEquals(req.headers.get('Cookie'), b'galleta=salada')
109185

110186
headers = {'Set-Cookie': 'C1=value1; path=/'}
111187
res = Response('http://scrapytest.org/', headers=headers, request=req)
112188
assert self.mw.process_response(req, res, self.spider) is res
113189

114190
req2 = Request('http://scrapytest.org/', meta=res.meta)
115191
assert self.mw.process_request(req2, self.spider) is None
116-
self.assertCookieValEqual(req2.headers.get('Cookie'),'C1=value1; galleta=salada')
192+
self.assertCookieValEqual(req2.headers.get('Cookie'), b'C1=value1; galleta=salada')
117193

118194
req3 = Request('http://scrapytest.org/', cookies={'galleta': 'dulce'}, meta={'cookiejar': "store2"})
119195
assert self.mw.process_request(req3, self.spider) is None
120-
self.assertEquals(req3.headers.get('Cookie'), 'galleta=dulce')
196+
self.assertEquals(req3.headers.get('Cookie'), b'galleta=dulce')
121197

122198
headers = {'Set-Cookie': 'C2=value2; path=/'}
123199
res2 = Response('http://scrapytest.org/', headers=headers, request=req3)
124200
assert self.mw.process_response(req3, res2, self.spider) is res2
125201

126202
req4 = Request('http://scrapytest.org/', meta=res2.meta)
127203
assert self.mw.process_request(req4, self.spider) is None
128-
self.assertCookieValEqual(req4.headers.get('Cookie'), 'C2=value2; galleta=dulce')
204+
self.assertCookieValEqual(req4.headers.get('Cookie'), b'C2=value2; galleta=dulce')
129205

130206
#cookies from hosts with port
131207
req5_1 = Request('http://scrapytest.org:1104/')
@@ -137,11 +213,11 @@ def test_cookiejar_key(self):
137213

138214
req5_2 = Request('http://scrapytest.org:1104/some-redirected-path')
139215
assert self.mw.process_request(req5_2, self.spider) is None
140-
self.assertEquals(req5_2.headers.get('Cookie'), 'C1=value1')
216+
self.assertEquals(req5_2.headers.get('Cookie'), b'C1=value1')
141217

142218
req5_3 = Request('http://scrapytest.org/some-redirected-path')
143219
assert self.mw.process_request(req5_3, self.spider) is None
144-
self.assertEquals(req5_3.headers.get('Cookie'), 'C1=value1')
220+
self.assertEquals(req5_3.headers.get('Cookie'), b'C1=value1')
145221

146222
#skip cookie retrieval for not http request
147223
req6 = Request('file:///scrapy/sometempfile')
@@ -152,5 +228,4 @@ def test_local_domain(self):
152228
request = Request("http://example-host/", cookies={'currencyCookie': 'USD'})
153229
assert self.mw.process_request(request, self.spider) is None
154230
self.assertIn('Cookie', request.headers)
155-
self.assertIn('currencyCookie', request.headers['Cookie'])
156-
231+
self.assertEqual(b'currencyCookie=USD', request.headers['Cookie'])

0 commit comments

Comments
 (0)