Skip to content

Commit 513ba7a

Browse files
committed
Merge pull request scrapy#1800 from redapple/http11-post-content-length
[MRG+1] Add "Content-Length: 0" for body-less HTTP/1.1 POST requests
2 parents 329a77a + ac8f97c commit 513ba7a

File tree

2 files changed

+47
-1
lines changed

2 files changed

+47
-1
lines changed

scrapy/core/downloader/handlers/http11.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,21 @@ def download_request(self, request):
211211
headers = TxHeaders(request.headers)
212212
if isinstance(agent, self._TunnelingAgent):
213213
headers.removeHeader(b'Proxy-Authorization')
214-
bodyproducer = _RequestBodyProducer(request.body) if request.body else None
214+
if request.body:
215+
bodyproducer = _RequestBodyProducer(request.body)
216+
else:
217+
bodyproducer = None
218+
# Setting Content-Length: 0 even for POST requests is not a
219+
# MUST per HTTP RFCs, but it's common behavior, and some
220+
# servers require this, otherwise returning HTTP 411 Length required
221+
#
222+
# RFC 7230#section-3.3.2:
223+
# "a Content-Length header field is normally sent in a POST
224+
# request even when the value is 0 (indicating an empty payload body)."
225+
#
226+
# Twisted Agent will not add "Content-Length: 0" by itself
227+
if method == b'POST':
228+
headers.addRawHeader(b'Content-Length', b'0')
215229

216230
start_time = time()
217231
d = agent.request(

tests/test_downloader_handlers.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,15 @@ def test_non_existent(self):
105105
return self.assertFailure(d, IOError)
106106

107107

108+
class ContentLengthHeaderResource(resource.Resource):
109+
"""
110+
A testing resource which renders itself as the value of the Content-Length
111+
header from the request.
112+
"""
113+
def render(self, request):
114+
return request.requestHeaders.getRawHeaders(b"content-length")[0]
115+
116+
108117
class HttpTestCase(unittest.TestCase):
109118

110119
scheme = 'http'
@@ -122,6 +131,7 @@ def setUp(self):
122131
r.putChild(b"host", HostHeaderResource())
123132
r.putChild(b"payload", PayloadResource())
124133
r.putChild(b"broken", BrokenDownloadResource())
134+
r.putChild(b"contentlength", ContentLengthHeaderResource())
125135
self.site = server.Site(r, timeout=None)
126136
self.wrapper = WrappingFactory(self.site)
127137
self.host = 'localhost'
@@ -209,6 +219,28 @@ def _test(response):
209219
d.addCallback(self.assertEquals, b'example.com')
210220
return d
211221

222+
def test_content_length_zero_bodyless_post_request_headers(self):
223+
"""Tests if "Content-Length: 0" is sent for bodyless POST requests.
224+
225+
This is not strictly required by HTTP RFCs but can cause trouble
226+
for some web servers.
227+
See:
228+
https://github.com/scrapy/scrapy/issues/823
229+
https://issues.apache.org/jira/browse/TS-2902
230+
https://github.com/kennethreitz/requests/issues/405
231+
https://bugs.python.org/issue14721
232+
"""
233+
def _test(response):
234+
self.assertEquals(response.body, b'0')
235+
236+
request = Request(self.getURL('contentlength'), method='POST', headers={'Host': 'example.com'})
237+
return self.download_request(request, Spider('foo')).addCallback(_test)
238+
239+
d = self.download_request(request, Spider('foo'))
240+
d.addCallback(lambda r: r.body)
241+
d.addCallback(self.assertEquals, b'0')
242+
return d
243+
212244
def test_payload(self):
213245
body = b'1'*100 # PayloadResource requires body length to be 100
214246
request = Request(self.getURL('payload'), method='POST', body=body)

0 commit comments

Comments
 (0)