Skip to content

Commit a5ffdf9

Browse files
alexcepoidangra
authored andcommitted
default to multi crawler commands
compatibility for old single crawler process deprecated command's `crawler` property logs about settings only shown once
1 parent 97d1972 commit a5ffdf9

File tree

5 files changed

+64
-61
lines changed

5 files changed

+64
-61
lines changed

scrapy/cmdline.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pkg_resources
66

77
import scrapy
8-
from scrapy.crawler import CrawlerProcess, MultiCrawlerProcess
8+
from scrapy.crawler import CrawlerProcess
99
from scrapy.xlib import lsprofcalltree
1010
from scrapy.command import ScrapyCommand
1111
from scrapy.exceptions import UsageError
@@ -138,14 +138,7 @@ def execute(argv=None, settings=None):
138138
opts, args = parser.parse_args(args=argv[1:])
139139
_run_print_help(parser, cmd.process_options, args, opts)
140140

141-
if cmd.multi_crawlers:
142-
process = MultiCrawlerProcess(settings)
143-
cmd.process = process
144-
else:
145-
process = CrawlerProcess(settings)
146-
process.install()
147-
cmd.set_crawler(process)
148-
141+
cmd.crawler_process = CrawlerProcess(settings)
149142
_run_print_help(parser, _run_command, cmd, args, opts)
150143
sys.exit(cmd.exitcode)
151144

scrapy/command.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,17 @@
33
"""
44

55
import os
6+
import warnings
67
from optparse import OptionGroup
78
from twisted.python import failure
89

9-
from scrapy import log
1010
from scrapy.utils.conf import arglist_to_dict
11-
from scrapy.exceptions import UsageError
11+
from scrapy.exceptions import UsageError, ScrapyDeprecationWarning
1212

1313
class ScrapyCommand(object):
1414

1515
requires_project = False
16-
multi_crawlers = False
16+
crawler_process = None
1717

1818
# default settings to be used for this command instead of global defaults
1919
default_settings = {}
@@ -29,9 +29,24 @@ def set_crawler(self, crawler):
2929

3030
@property
3131
def crawler(self):
32-
if not self.multi_crawlers and not self._crawler.configured:
33-
log.start_from_crawler(self._crawler)
34-
self._crawler.configure()
32+
warnings.warn("Command's default `crawler` is deprecated and will be removed. "
33+
"Use `create_crawler` method to instatiate crawlers.",
34+
ScrapyDeprecationWarning)
35+
36+
if not hasattr(self, '_crawler'):
37+
crawler = self.crawler_process.create_crawler('default')
38+
39+
old_start = crawler.start
40+
self.crawler_process.started = False
41+
def wrapped_start():
42+
if self.crawler_process.started:
43+
old_start()
44+
else:
45+
self.crawler_process.started = True
46+
self.crawler_process.start()
47+
crawler.start = wrapped_start
48+
49+
self.set_crawler(crawler)
3550

3651
return self._crawler
3752

scrapy/commands/check.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ def wrapper(response):
2020

2121
class Command(ScrapyCommand):
2222
requires_project = True
23-
multi_crawlers = True
2423
default_settings = {'LOG_ENABLED': False}
2524

2625
def syntax(self):
@@ -59,7 +58,7 @@ def run(self, args, opts):
5958
for req in requests:
6059
contract_reqs[spider.name].append(req.callback.__name__)
6160
elif requests:
62-
crawler = self.process.create_crawler(spider.name)
61+
crawler = self.crawler_process.create_crawler(spider.name)
6362
crawler.crawl(spider, requests)
6463

6564
# start checks
@@ -69,7 +68,7 @@ def run(self, args, opts):
6968
for method in sorted(methods):
7069
print ' * %s' % method
7170
else:
72-
self.process.start()
71+
self.crawler_process.start()
7372
self.results.printErrors()
7473

7574
def get_requests(self, spider):

scrapy/crawler.py

Lines changed: 20 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from scrapy.signalmanager import SignalManager
99
from scrapy.utils.ossignal import install_shutdown_handlers, signal_names
1010
from scrapy.utils.misc import load_object
11-
from scrapy.settings import overridden_settings
1211
from scrapy import log, signals
1312

1413

@@ -35,9 +34,8 @@ def uninstall(self):
3534
def configure(self):
3635
if self.configured:
3736
return
37+
3838
self.configured = True
39-
d = dict(overridden_settings(self.settings))
40-
log.msg(format="Overridden settings: %(settings)r", settings=d, level=log.DEBUG)
4139
lf_cls = load_object(self.settings['LOG_FORMATTER'])
4240
self.logformatter = lf_cls.from_crawler(self)
4341
self.extensions = ExtensionManager.from_crawler(self)
@@ -84,11 +82,15 @@ def __init__(self, *a, **kw):
8482
install_shutdown_handlers(self._signal_shutdown)
8583

8684
def start(self):
85+
self.start_crawling()
8786
if self.settings.getbool('DNSCACHE_ENABLED'):
8887
reactor.installResolver(CachingThreadedResolver(reactor))
8988
reactor.addSystemEventTrigger('before', 'shutdown', self.stop)
9089
reactor.run(installSignalHandlers=False) # blocking call
9190

91+
def start_crawling(self):
92+
raise NotImplementedError
93+
9294
def stop(self):
9395
raise NotImplementedError
9496

@@ -113,48 +115,34 @@ def _signal_kill(self, signum, _):
113115
reactor.callFromThread(self._stop_reactor)
114116

115117

116-
class CrawlerProcess(Crawler, ProcessMixin):
117-
""" A class to run a single Scrapy crawler in a process
118-
"""
119-
120-
def __init__(self, *a, **kw):
121-
Crawler.__init__(self, *a, **kw)
122-
ProcessMixin.__init__(self, *a, **kw)
123-
self.signals.connect(self.stop, signals.engine_stopped)
124-
125-
def start(self):
126-
Crawler.start(self)
127-
ProcessMixin.start(self)
128-
129-
def stop(self):
130-
d = Crawler.stop(self)
131-
d.addBoth(self._stop_reactor)
132-
return d
133-
134-
135-
class MultiCrawlerProcess(ProcessMixin):
118+
class CrawlerProcess(ProcessMixin):
136119
""" A class to run multiple scrapy crawlers in a process sequentially
137120
"""
138121

139122
def __init__(self, settings):
140-
super(MultiCrawlerProcess, self).__init__(settings)
123+
super(CrawlerProcess, self).__init__(settings)
141124

142125
self.settings = settings
143126
self.crawlers = {}
144127
self.stopping = False
145128

146129
def create_crawler(self, name):
147130
if name not in self.crawlers:
148-
self.crawlers[name] = Crawler(self.settings)
131+
crawler = Crawler(self.settings)
132+
crawler.configure()
133+
134+
self.crawlers[name] = crawler
149135

150136
return self.crawlers[name]
151137

152-
def start_crawler(self):
138+
def start_crawling(self):
153139
name, crawler = self.crawlers.popitem()
154140

155-
crawler.sflo = log.start_from_crawler(crawler)
156-
if crawler.sflo:
157-
crawler.signals.connect(crawler.sflo.stop, signals.engine_stopped)
141+
sflo = log.start_from_crawler(crawler)
142+
crawler.install()
143+
crawler.signals.connect(crawler.uninstall, signals.engine_stopped)
144+
if sflo:
145+
crawler.signals.connect(sflo.stop, signals.engine_stopped)
158146

159147
crawler.signals.connect(self.check_done, signals.engine_stopped)
160148
crawler.start()
@@ -163,13 +151,13 @@ def start_crawler(self):
163151

164152
def check_done(self, **kwargs):
165153
if self.crawlers and not self.stopping:
166-
self.start_crawler()
154+
self.start_crawling()
167155
else:
168156
self._stop_reactor()
169157

170158
def start(self):
171-
self.start_crawler()
172-
super(MultiCrawlerProcess, self).start()
159+
log.scrapy_info(self.settings)
160+
return super(CrawlerProcess, self).start()
173161

174162
@defer.inlineCallbacks
175163
def stop(self):

scrapy/log.py

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
import scrapy
1313
from scrapy.utils.python import unicode_to_str
14+
from scrapy.settings import overridden_settings
1415

1516
# Logging levels
1617
DEBUG = logging.DEBUG
@@ -133,17 +134,24 @@ def err(_stuff=None, _why=None, **kw):
133134
kw.setdefault('system', 'scrapy')
134135
log.err(_stuff, _why, **kw)
135136

136-
def start_from_crawler(crawler):
137-
settings = crawler.settings
138-
if not settings.getbool('LOG_ENABLED'):
139-
return
137+
def start_from_settings(settings, crawler=None):
138+
if settings.getbool('LOG_ENABLED'):
139+
return start(settings['LOG_FILE'], settings['LOG_LEVEL'], settings['LOG_STDOUT'],
140+
settings['LOG_ENCODING'], crawler)
140141

141-
sflo = start(settings['LOG_FILE'], settings['LOG_LEVEL'], settings['LOG_STDOUT'],
142-
settings['LOG_ENCODING'], crawler)
142+
def scrapy_info(settings):
143+
sflo = start_from_settings(settings)
144+
if sflo:
145+
msg("Scrapy %s started (bot: %s)" % (scrapy.__version__, \
146+
settings['BOT_NAME']))
143147

144-
msg("Scrapy %s started (bot: %s)" % (scrapy.__version__, \
145-
settings['BOT_NAME']))
146-
msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
147-
level=DEBUG)
148+
msg("Optional features available: %s" % ", ".join(scrapy.optional_features),
149+
level=DEBUG)
148150

149-
return sflo
151+
d = dict(overridden_settings(settings))
152+
msg(format="Overridden settings: %(settings)r", settings=d, level=DEBUG)
153+
154+
sflo.stop()
155+
156+
def start_from_crawler(crawler, print_headers=False):
157+
return start_from_settings(crawler.settings, crawler)

0 commit comments

Comments
 (0)