Skip to content

Commit 1a20bba

Browse files
committed
New HTTP client wraps connection losts in ResponseFailed exception. fix scrapy#373
1 parent 3b01bb8 commit 1a20bba

File tree

5 files changed

+34
-19
lines changed

5 files changed

+34
-19
lines changed

scrapy/contrib/downloadermiddleware/retry.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@
1818
indicate server overload, which would be something we want to retry
1919
"""
2020

21-
from twisted.internet.error import TimeoutError as ServerTimeoutError, DNSLookupError, \
22-
ConnectionRefusedError, ConnectionDone, ConnectError, \
23-
ConnectionLost, TCPTimedOutError
2421
from twisted.internet.defer import TimeoutError as UserTimeoutError
25-
from scrapy.xlib.tx._newclient import ResponseFailed
22+
from twisted.internet.error import TimeoutError as ServerTimeoutError, \
23+
DNSLookupError, ConnectionRefusedError, ConnectionDone, ConnectError, \
24+
ConnectionLost, TCPTimedOutError
2625

2726
from scrapy import log
2827
from scrapy.exceptions import NotConfigured
2928
from scrapy.utils.response import response_status_message
29+
from scrapy.xlib.tx import ResponseFailed
30+
3031

3132
class RetryMiddleware(object):
3233

scrapy/core/downloader/handlers/http11.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,16 @@
77
from zope.interface import implements
88
from twisted.internet import defer, reactor, protocol
99
from twisted.web.http_headers import Headers as TxHeaders
10-
from twisted.web.http import PotentialDataLoss
1110
from twisted.web.iweb import IBodyProducer
11+
from twisted.web.http import PotentialDataLoss
1212
from twisted.internet.error import TimeoutError
1313
from scrapy.xlib.tx import Agent, ProxyAgent, ResponseDone, \
14-
ResponseFailed, HTTPConnectionPool, TCP4ClientEndpoint
14+
HTTPConnectionPool, TCP4ClientEndpoint, ResponseFailed
1515

1616
from scrapy.http import Headers
1717
from scrapy.responsetypes import responsetypes
1818
from scrapy.core.downloader.webclient import _parse
1919
from scrapy.utils.misc import load_object
20-
from scrapy import log
2120

2221

2322

@@ -55,7 +54,7 @@ def _get_agent(self, request, timeout):
5554
if proxy:
5655
scheme, _, host, port, _ = _parse(proxy)
5756
endpoint = TCP4ClientEndpoint(reactor, host, port, timeout=timeout,
58-
bindAddress=bindaddress)
57+
bindAddress=bindaddress)
5958
return self._ProxyAgent(endpoint)
6059

6160
return self._Agent(reactor, contextFactory=self._contextFactory,
@@ -145,10 +144,9 @@ def dataReceived(self, bodyBytes):
145144
def connectionLost(self, reason):
146145
if self._finished.called:
147146
return
147+
148148
body = self._bodybuf.getvalue()
149149
if reason.check(ResponseDone):
150150
self._finished.callback((self._txresponse, body, None))
151-
elif reason.check(PotentialDataLoss, ResponseFailed):
152-
self._finished.callback((self._txresponse, body, ['partial']))
153151
else:
154152
self._finished.errback(reason)

scrapy/tests/mockserver.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,13 @@ def _delayedRender(self, request):
101101
class Drop(Partial):
102102

103103
def _delayedRender(self, request):
104+
abort = getarg(request, "abort", 0, type=int)
104105
request.write("this connection will be dropped\n")
105-
request.channel.transport.abortConnection()
106-
request.finish()
106+
if abort:
107+
request.channel.transport.abortConnection()
108+
else:
109+
request.channel.transport.loseConnection()
110+
request.finish()
107111

108112

109113
class Root(Resource):

scrapy/tests/test_crawl.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,16 @@ def test_retry_dns_error(self):
9191
self._assert_retried()
9292

9393
@defer.inlineCallbacks
94-
def test_retry_dropped_connection(self):
95-
spider = SimpleSpider("http://localhost:8998/drop")
94+
def test_retry_conn_lost(self):
95+
# connection lost after receiving data
96+
spider = SimpleSpider("http://localhost:8998/drop?abort=0")
97+
yield docrawl(spider)
98+
self._assert_retried()
99+
100+
@defer.inlineCallbacks
101+
def test_retry_conn_aborted(self):
102+
# connection lost before receiving data
103+
spider = SimpleSpider("http://localhost:8998/drop?abort=1")
96104
yield docrawl(spider)
97105
self._assert_retried()
98106

scrapy/tests/test_downloadermiddleware_retry.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
import unittest
22

3-
from twisted.internet.error import TimeoutError as ServerTimeoutError, DNSLookupError, \
4-
ConnectionRefusedError, ConnectionDone, ConnectError, \
5-
ConnectionLost
3+
from twisted.internet.error import TimeoutError as ServerTimeoutError, \
4+
DNSLookupError, ConnectionRefusedError, ConnectionDone, ConnectError, \
5+
ConnectionLost
66

77
from scrapy.contrib.downloadermiddleware.retry import RetryMiddleware
8+
from scrapy.xlib.tx import ResponseFailed
89
from scrapy.spider import BaseSpider
910
from scrapy.http import Request, Response
1011
from scrapy.utils.test import get_crawler
1112

13+
1214
class RetryTest(unittest.TestCase):
1315
def setUp(self):
1416
crawler = get_crawler()
@@ -62,9 +64,11 @@ def test_503(self):
6264
assert self.mw.process_response(req, rsp, self.spider) is rsp
6365

6466
def test_twistederrors(self):
65-
for exc in (ServerTimeoutError, DNSLookupError, ConnectionRefusedError, ConnectionDone, ConnectError, ConnectionLost):
67+
for exc in (ServerTimeoutError, DNSLookupError, ConnectionRefusedError,
68+
ConnectionDone, ConnectError, ConnectionLost,
69+
ResponseFailed):
6670
req = Request('http://www.scrapytest.org/%s' % exc.__name__)
67-
self._test_retry_exception(req, exc())
71+
self._test_retry_exception(req, exc('foo'))
6872

6973
def _test_retry_exception(self, req, exception):
7074
# first retry

0 commit comments

Comments
 (0)