Skip to content

Commit d311fef

Browse files
alexcepoidangra
authored andcommitted
fix some missing logs with new crawlerprocess
1 parent f9b18f9 commit d311fef

File tree

2 files changed

+19
-12
lines changed

2 files changed

+19
-12
lines changed

scrapy/commands/settings.py

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def add_options(self, parser):
2626

2727
def run(self, args, opts):
2828
crawler = self.crawler_process.create_crawler()
29+
crawler.configure()
2930
settings = crawler.settings
3031
if opts.get:
3132
print settings.get(opts.get)

scrapy/crawler.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def __init__(self, settings):
1919
self.signals = SignalManager(self)
2020
self.stats = load_object(settings['STATS_CLASS'])(self)
2121

22+
spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS'])
23+
self.spiders = spman_cls.from_crawler(self)
24+
2225
self.scheduled = {}
2326

2427
def install(self):
@@ -39,20 +42,25 @@ def configure(self):
3942
lf_cls = load_object(self.settings['LOG_FORMATTER'])
4043
self.logformatter = lf_cls.from_crawler(self)
4144
self.extensions = ExtensionManager.from_crawler(self)
42-
spman_cls = load_object(self.settings['SPIDER_MANAGER_CLASS'])
43-
self.spiders = spman_cls.from_crawler(self)
4445
self.engine = ExecutionEngine(self, self._spider_closed)
4546

4647
def crawl(self, spider, requests=None):
4748
spider.set_crawler(self)
48-
if requests is None:
49-
requests = spider.start_requests()
5049

5150
if self.configured and self.engine.running:
5251
assert not self.scheduled
53-
return self.engine.open_spider(spider, requests)
52+
return self.schedule(spider, requests)
5453
else:
55-
self.scheduled.setdefault(spider, []).extend(requests)
54+
self.scheduled.setdefault(spider, []).append(requests)
55+
56+
def schedule(self, spider, batches=[]):
57+
requests = []
58+
for batch in batches:
59+
if batch is None:
60+
batch = spider.start_requests()
61+
requests.extend(batch)
62+
63+
return self.engine.open_spider(spider, requests)
5664

5765
def _spider_closed(self, spider=None):
5866
if not self.engine.open_spiders:
@@ -62,8 +70,8 @@ def _spider_closed(self, spider=None):
6270
def start(self):
6371
yield defer.maybeDeferred(self.configure)
6472

65-
for spider, requests in self.scheduled.iteritems():
66-
yield self.engine.open_spider(spider, requests)
73+
for spider, batches in self.scheduled.iteritems():
74+
yield self.schedule(spider, batches)
6775

6876
yield defer.maybeDeferred(self.engine.start)
6977

@@ -128,17 +136,15 @@ def __init__(self, settings):
128136

129137
def create_crawler(self, name=None):
130138
if name not in self.crawlers:
131-
crawler = Crawler(self.settings)
132-
crawler.configure()
133-
134-
self.crawlers[name] = crawler
139+
self.crawlers[name] = Crawler(self.settings)
135140

136141
return self.crawlers[name]
137142

138143
def start_crawling(self):
139144
name, crawler = self.crawlers.popitem()
140145

141146
sflo = log.start_from_crawler(crawler)
147+
crawler.configure()
142148
crawler.install()
143149
crawler.signals.connect(crawler.uninstall, signals.engine_stopped)
144150
if sflo:

0 commit comments

Comments
 (0)