Skip to content

Commit b15470d

Browse files
committed
test PotentiaDataLoss errors on unbound responses
1 parent c4bf324 commit b15470d

File tree

3 files changed

+48
-0
lines changed

3 files changed

+48
-0
lines changed

scrapy/core/downloader/handlers/http11.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from twisted.web.iweb import IBodyProducer
1111
from twisted.web.http import PotentialDataLoss
1212
from twisted.internet.error import TimeoutError
13+
from twisted.web.http import PotentialDataLoss
1314
from scrapy.xlib.tx import Agent, ProxyAgent, ResponseDone, \
1415
HTTPConnectionPool, TCP4ClientEndpoint, ResponseFailed
1516

scrapy/tests/mockserver.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ def _cancelrequest(_):
2020
# silence CancelledError
2121
d.addErrback(lambda _: None)
2222
d.cancel()
23+
2324
d = deferLater(reactor, delay, f, *a, **kw)
2425
request.notifyFinish().addErrback(_cancelrequest)
2526
return d
@@ -84,6 +85,23 @@ def render_GET(self, request):
8485
return ""
8586

8687

88+
class Raw(DeferMixin, Resource):
89+
90+
isLeaf = True
91+
92+
def render_GET(self, request):
93+
request.startedWriting = 1
94+
self.deferRequest(request, 0, self._delayedRender, request)
95+
return NOT_DONE_YET
96+
97+
def _delayedRender(self, request):
98+
raw = getarg(request, 'raw', 'HTTP 1.1 200 OK\n')
99+
request.startedWriting = 1
100+
request.write(raw)
101+
request.channel.transport.loseConnection()
102+
request.finish()
103+
104+
87105
class Partial(DeferMixin, Resource):
88106

89107
isLeaf = True
@@ -92,6 +110,7 @@ def render_GET(self, request):
92110
request.setHeader("Content-Length", "1024")
93111
self.deferRequest(request, 0, self._delayedRender, request)
94112
return NOT_DONE_YET
113+
render_POST = render_GET
95114

96115
def _delayedRender(self, request):
97116
request.write("partial content\n")
@@ -119,6 +138,7 @@ def __init__(self):
119138
self.putChild("delay", Delay())
120139
self.putChild("partial", Partial())
121140
self.putChild("drop", Drop())
141+
self.putChild("raw", Raw())
122142

123143
def getChild(self, name, request):
124144
return self

scrapy/tests/test_crawl.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,33 @@ def test_retry_dns_error(self):
9090
yield docrawl(spider)
9191
self._assert_retried()
9292

93+
@defer.inlineCallbacks
94+
def test_unbounded_response(self):
95+
# Completeness of responses without Content-Length or Transfer-Encoding
96+
# can not be determined, we treat them as valid but flagged as "partial"
97+
from urllib import urlencode
98+
query = urlencode({'raw': '''\
99+
HTTP/1.1 200 OK
100+
Server: Apache-Coyote/1.1
101+
X-Powered-By: Servlet 2.4; JBoss-4.2.3.GA (build: SVNTag=JBoss_4_2_3_GA date=200807181417)/JBossWeb-2.0
102+
Set-Cookie: JSESSIONID=08515F572832D0E659FD2B0D8031D75F; Path=/
103+
Pragma: no-cache
104+
Expires: Thu, 01 Jan 1970 00:00:00 GMT
105+
Cache-Control: no-cache
106+
Cache-Control: no-store
107+
Content-Type: text/html;charset=UTF-8
108+
Content-Language: en
109+
Date: Tue, 27 Aug 2013 13:05:05 GMT
110+
Connection: close
111+
112+
foo body
113+
with multiples lines
114+
'''})
115+
spider = SimpleSpider("http://localhost:8998/raw?{}".format(query))
116+
yield docrawl(spider)
117+
log = get_testlog()
118+
self.assertEqual(log.count("Got response 200"), 1)
119+
93120
@defer.inlineCallbacks
94121
def test_retry_conn_lost(self):
95122
# connection lost after receiving data

0 commit comments

Comments
 (0)