Skip to content

Commit b1d8919

Browse files
committed
Fix AlreadyCalledError replacing a request in shell command. closes scrapy#407
1 parent 89faf52 commit b1d8919

File tree

3 files changed

+45
-22
lines changed

3 files changed

+45
-22
lines changed

scrapy/shell.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
"""
66
import signal
77

8-
from twisted.internet import reactor, threads
8+
from twisted.internet import reactor, threads, defer
99
from twisted.python import threadable
1010
from w3lib.url import any_to_uri
1111

@@ -14,7 +14,6 @@
1414
from scrapy.selector import XPathSelector, XmlXPathSelector, HtmlXPathSelector
1515
from scrapy.utils.spider import create_spider_for_request
1616
from scrapy.utils.misc import load_object
17-
from scrapy.utils.request import request_deferred
1817
from scrapy.utils.response import open_in_browser
1918
from scrapy.utils.console import start_python_console
2019
from scrapy.settings import Settings
@@ -55,17 +54,20 @@ def start(self, url=None, request=None, response=None, spider=None):
5554

5655
def _schedule(self, request, spider):
5756
spider = self._open_spider(request, spider)
58-
d = request_deferred(request)
57+
d = _request_deferred(request)
5958
d.addCallback(lambda x: (x, spider))
6059
self.crawler.engine.crawl(request, spider)
6160
return d
6261

6362
def _open_spider(self, request, spider):
6463
if self.spider:
6564
return self.spider
65+
6666
if spider is None:
67-
spider = create_spider_for_request(self.crawler.spiders, request,
68-
BaseSpider('default'), log_multiple=True)
67+
spider = create_spider_for_request(self.crawler.spiders,
68+
request,
69+
BaseSpider('default'),
70+
log_multiple=True)
6971
spider.set_crawler(self.crawler)
7072
self.crawler.engine.open_spider(spider, close_if_idle=False)
7173
self.spider = spider
@@ -127,3 +129,30 @@ def inspect_response(response, spider=None):
127129
"""Open a shell to inspect the given response"""
128130
from scrapy.project import crawler
129131
Shell(crawler).start(response=response, spider=spider)
132+
133+
134+
def _request_deferred(request):
135+
"""Wrap a request inside a Deferred.
136+
137+
This function is harmful, do not use it until you know what you are doing.
138+
139+
This returns a Deferred whose first pair of callbacks are the request
140+
callback and errback. The Deferred also triggers when the request
141+
callback/errback is executed (ie. when the request is downloaded)
142+
143+
WARNING: Do not call request.replace() until after the deferred is called.
144+
"""
145+
request_callback = request.callback
146+
request_errback = request.errback
147+
def _restore_callbacks(result):
148+
request.callback = request_callback
149+
request.errback = request_errback
150+
return result
151+
152+
d = defer.Deferred()
153+
d.addBoth(_restore_callbacks)
154+
if request.callback:
155+
d.addCallbacks(request.callback, request.errback)
156+
157+
request.callback, request.errback = d.callback, d.errback
158+
return d

scrapy/tests/test_command_shell.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,9 @@ def test_redirect(self):
4545
_, out, _ = yield self.execute([self.url('/redirect'), '-c', 'response.url'])
4646
assert out.strip().endswith('/redirected')
4747

48+
@defer.inlineCallbacks
49+
def test_request_replace(self):
50+
url = self.url('/text')
51+
code = "fetch('{0}') or fetch(response.request.replace(method='POST'))"
52+
errcode, out, _ = yield self.execute(['-c', code.format(url)])
53+
self.assertEqual(errcode, 0, out)

scrapy/utils/request.py

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,10 @@
1818
def request_fingerprint(request, include_headers=None):
1919
"""
2020
Return the request fingerprint.
21-
21+
2222
The request fingerprint is a hash that uniquely identifies the resource the
2323
request points to. For example, take the following two urls:
24-
24+
2525
http://www.example.com/query?id=111&cat=222
2626
http://www.example.com/query?cat=222&id=111
2727
@@ -30,13 +30,13 @@ def request_fingerprint(request, include_headers=None):
3030
3131
Another example are cookies used to store session ids. Suppose the
3232
following page is only accesible to authenticated users:
33-
33+
3434
http://www.example.com/members/offers.html
3535
3636
Lot of sites use a cookie to store the session id, which adds a random
3737
component to the HTTP Request and thus should be ignored when calculating
38-
the fingerprint.
39-
38+
the fingerprint.
39+
4040
For this reason, request headers are ignored by default when calculating
4141
the fingeprint. If you want to include specific headers use the
4242
include_headers argument, which is a list of Request headers to include.
@@ -81,15 +81,3 @@ def request_httprepr(request):
8181
s += request.body
8282
return s
8383

84-
def request_deferred(request):
85-
"""Wrap a request inside a Deferred.
86-
87-
This returns a Deferred whose first pair of callbacks are the request
88-
callback and errback. The Deferred also triggers when the request
89-
callback/errback is executed (ie. when the request is downloaded)
90-
"""
91-
d = Deferred()
92-
if request.callback:
93-
d.addCallbacks(request.callback, request.errback)
94-
request.callback, request.errback = d.callback, d.errback
95-
return d

0 commit comments

Comments
 (0)