|
5 | 5 | """
|
6 | 6 | import signal
|
7 | 7 |
|
8 |
| -from twisted.internet import reactor, threads |
| 8 | +from twisted.internet import reactor, threads, defer |
9 | 9 | from twisted.python import threadable
|
10 | 10 | from w3lib.url import any_to_uri
|
11 | 11 |
|
|
14 | 14 | from scrapy.selector import XPathSelector, XmlXPathSelector, HtmlXPathSelector
|
15 | 15 | from scrapy.utils.spider import create_spider_for_request
|
16 | 16 | from scrapy.utils.misc import load_object
|
17 |
| -from scrapy.utils.request import request_deferred |
18 | 17 | from scrapy.utils.response import open_in_browser
|
19 | 18 | from scrapy.utils.console import start_python_console
|
20 | 19 | from scrapy.settings import Settings
|
@@ -55,17 +54,20 @@ def start(self, url=None, request=None, response=None, spider=None):
|
55 | 54 |
|
56 | 55 | def _schedule(self, request, spider):
|
57 | 56 | spider = self._open_spider(request, spider)
|
58 |
| - d = request_deferred(request) |
| 57 | + d = _request_deferred(request) |
59 | 58 | d.addCallback(lambda x: (x, spider))
|
60 | 59 | self.crawler.engine.crawl(request, spider)
|
61 | 60 | return d
|
62 | 61 |
|
63 | 62 | def _open_spider(self, request, spider):
|
64 | 63 | if self.spider:
|
65 | 64 | return self.spider
|
| 65 | + |
66 | 66 | if spider is None:
|
67 |
| - spider = create_spider_for_request(self.crawler.spiders, request, |
68 |
| - BaseSpider('default'), log_multiple=True) |
| 67 | + spider = create_spider_for_request(self.crawler.spiders, |
| 68 | + request, |
| 69 | + BaseSpider('default'), |
| 70 | + log_multiple=True) |
69 | 71 | spider.set_crawler(self.crawler)
|
70 | 72 | self.crawler.engine.open_spider(spider, close_if_idle=False)
|
71 | 73 | self.spider = spider
|
@@ -127,3 +129,30 @@ def inspect_response(response, spider=None):
|
127 | 129 | """Open a shell to inspect the given response"""
|
128 | 130 | from scrapy.project import crawler
|
129 | 131 | Shell(crawler).start(response=response, spider=spider)
|
| 132 | + |
| 133 | + |
| 134 | +def _request_deferred(request): |
| 135 | + """Wrap a request inside a Deferred. |
| 136 | +
|
| 137 | + This function is harmful, do not use it until you know what you are doing. |
| 138 | +
|
| 139 | + This returns a Deferred whose first pair of callbacks are the request |
| 140 | + callback and errback. The Deferred also triggers when the request |
| 141 | + callback/errback is executed (ie. when the request is downloaded) |
| 142 | +
|
| 143 | + WARNING: Do not call request.replace() until after the deferred is called. |
| 144 | + """ |
| 145 | + request_callback = request.callback |
| 146 | + request_errback = request.errback |
| 147 | + def _restore_callbacks(result): |
| 148 | + request.callback = request_callback |
| 149 | + request.errback = request_errback |
| 150 | + return result |
| 151 | + |
| 152 | + d = defer.Deferred() |
| 153 | + d.addBoth(_restore_callbacks) |
| 154 | + if request.callback: |
| 155 | + d.addCallbacks(request.callback, request.errback) |
| 156 | + |
| 157 | + request.callback, request.errback = d.callback, d.errback |
| 158 | + return d |
0 commit comments