Skip to content

Commit 78a4052

Browse files
committed
scrapy shell: start shell in main thread and crawler in secondary thread, instead of the other way around. fixes scrapy#100
1 parent 7bbeadb commit 78a4052

File tree

2 files changed

+14
-18
lines changed

2 files changed

+14
-18
lines changed

scrapy/commands/shell.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
See documentation in docs/topics/shell.rst
55
"""
66

7+
from threading import Thread
8+
79
from scrapy.command import ScrapyCommand
810
from scrapy.shell import Shell
911
from scrapy import log
@@ -35,12 +37,11 @@ def update_vars(self, vars):
3537

3638
def run(self, args, opts):
3739
url = args[0] if args else None
38-
shell = Shell(self.crawler, update_vars=self.update_vars, inthread=True, \
39-
code=opts.code)
40-
def err(f):
41-
log.err(f, "Shell error")
42-
self.exitcode = 1
43-
d = shell.start(url=url)
44-
d.addErrback(err)
45-
d.addBoth(lambda _: self.crawler.stop())
46-
self.crawler.start()
40+
shell = Shell(self.crawler, update_vars=self.update_vars, code=opts.code)
41+
self._start_crawler_thread()
42+
shell.start(url=url)
43+
44+
def _start_crawler_thread(self):
45+
t = Thread(target=self.crawler.start)
46+
t.daemon = True
47+
t.start()

scrapy/shell.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import signal
88

99
from twisted.internet import reactor, threads
10+
from twisted.python import threadable
1011
from w3lib.url import any_to_uri
1112

1213
from scrapy.item import BaseItem
@@ -25,24 +26,18 @@ class Shell(object):
2526
relevant_classes = (BaseSpider, Request, Response, BaseItem, \
2627
XPathSelector, Settings)
2728

28-
def __init__(self, crawler, update_vars=None, inthread=False, code=None):
29+
def __init__(self, crawler, update_vars=None, code=None):
2930
self.crawler = crawler
3031
self.update_vars = update_vars or (lambda x: None)
3132
self.item_class = load_object(crawler.settings['DEFAULT_ITEM_CLASS'])
3233
self.spider = None
33-
self.inthread = inthread
34+
self.inthread = not threadable.isInIOThread()
3435
self.code = code
3536
self.vars = {}
3637

37-
def start(self, *a, **kw):
38+
def start(self, url=None, request=None, response=None, spider=None):
3839
# disable accidental Ctrl-C key press from shutting down the engine
3940
signal.signal(signal.SIGINT, signal.SIG_IGN)
40-
if self.inthread:
41-
return threads.deferToThread(self._start, *a, **kw)
42-
else:
43-
self._start(*a, **kw)
44-
45-
def _start(self, url=None, request=None, response=None, spider=None):
4641
if url:
4742
self.fetch(url, spider)
4843
elif request:

0 commit comments

Comments
 (0)