Skip to content

Commit f9b18f9

Browse files
alexcepoidangra
authored andcommitted
port all scrapy commands to new CrawlerProcess
1 parent a5ffdf9 commit f9b18f9

12 files changed

+47
-31
lines changed

scrapy/command.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def crawler(self):
3434
ScrapyDeprecationWarning)
3535

3636
if not hasattr(self, '_crawler'):
37-
crawler = self.crawler_process.create_crawler('default')
37+
crawler = self.crawler_process.create_crawler()
3838

3939
old_start = crawler.start
4040
self.crawler_process.started = False

scrapy/commands/bench.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,6 @@ def short_desc(self):
1616
def run(self, args, opts):
1717
with MockServer():
1818
spider = FollowAllSpider(total=100000)
19-
self.crawler.crawl(spider)
20-
self.crawler.start()
19+
crawler = self.crawler_process.create_crawler()
20+
crawler.crawl(spider)
21+
self.crawler_process.start()

scrapy/commands/crawl.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ def run(self, args, opts):
4343
elif len(args) > 1:
4444
raise UsageError("running 'scrapy crawl' with more than one spider is no longer supported")
4545
spname = args[0]
46-
spider = self.crawler.spiders.create(spname, **opts.spargs)
47-
self.crawler.crawl(spider)
48-
self.crawler.start()
46+
47+
crawler = self.crawler_process.create_crawler()
48+
spider = crawler.spiders.create(spname, **opts.spargs)
49+
crawler.crawl(spider)
50+
self.crawler_process.start()

scrapy/commands/edit.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@ def _err(self, msg):
2424
def run(self, args, opts):
2525
if len(args) != 1:
2626
raise UsageError()
27-
editor = self.crawler.settings['EDITOR']
27+
28+
crawler = self.crawler_process.create_crawler()
29+
editor = crawler.settings['EDITOR']
2830
try:
29-
spider = self.crawler.spiders.create(args[0])
31+
spider = crawler.spiders.create(args[0])
3032
except KeyError:
3133
return self._err("Spider not found: %s" % args[0])
3234

scrapy/commands/fetch.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ def run(self, args, opts):
4747
request = Request(args[0], callback=cb, dont_filter=True)
4848
request.meta['handle_httpstatus_all'] = True
4949

50+
crawler = self.crawler_process.create_crawler()
5051
spider = None
5152
if opts.spider:
52-
spider = self.crawler.spiders.create(opts.spider)
53+
spider = crawler.spiders.create(opts.spider)
5354
else:
54-
spider = create_spider_for_request(self.crawler.spiders, request, \
55+
spider = create_spider_for_request(crawler.spiders, request, \
5556
default_spider=BaseSpider('default'))
56-
self.crawler.crawl(spider, [request])
57-
self.crawler.start()
58-
57+
crawler.crawl(spider, [request])
58+
self.crawler_process.start()

scrapy/commands/genspider.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ def run(self, args, opts):
6262
return
6363

6464
try:
65-
spider = self.crawler.spiders.create(name)
65+
crawler = self.crawler_process.create_crawler()
66+
spider = crawler.spiders.create(name)
6667
except KeyError:
6768
pass
6869
else:

scrapy/commands/list.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ def short_desc(self):
99
return "List available spiders"
1010

1111
def run(self, args, opts):
12-
for s in self.crawler.spiders.list():
12+
crawler = self.crawler_process.create_crawler()
13+
for s in crawler.spiders.list():
1314
print s

scrapy/commands/parse.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -125,12 +125,12 @@ def get_callback_from_rules(self, response):
125125
def set_spider(self, url, opts):
126126
if opts.spider:
127127
try:
128-
self.spider = self.crawler.spiders.create(opts.spider, **opts.spargs)
128+
self.spider = self.pcrawler.spiders.create(opts.spider, **opts.spargs)
129129
except KeyError:
130130
log.msg(format='Unable to find spider: %(spider)s',
131131
level=log.ERROR, spider=opts.spider)
132132
else:
133-
self.spider = create_spider_for_request(self.crawler.spiders, Request(url), **opts.spargs)
133+
self.spider = create_spider_for_request(self.pcrawler.spiders, Request(url), **opts.spargs)
134134
if not self.spider:
135135
log.msg(format='Unable to find spider for: %(url)s',
136136
level=log.ERROR, url=url)
@@ -139,8 +139,8 @@ def start_parsing(self, url, opts):
139139
request = Request(url, opts.callback)
140140
request = self.prepare_request(request, opts)
141141

142-
self.crawler.crawl(self.spider, [request])
143-
self.crawler.start()
142+
self.pcrawler.crawl(self.spider, [request])
143+
self.crawler_process.start()
144144

145145
if not self.first_response:
146146
log.msg(format='No response downloaded for: %(request)s',
@@ -174,7 +174,7 @@ def callback(response):
174174

175175
items, requests = self.run_callback(response, cb)
176176
if opts.pipelines:
177-
itemproc = self.crawler.engine.scraper.itemproc
177+
itemproc = self.pcrawler.engine.scraper.itemproc
178178
for item in items:
179179
itemproc.process_item(item, self.spider)
180180
self.add_items(depth, items)
@@ -207,6 +207,7 @@ def run(self, args, opts):
207207
url = args[0]
208208

209209
# prepare spider
210+
self.pcrawler = self.crawler_process.create_crawler()
210211
self.set_spider(url, opts)
211212

212213
if self.spider and opts.depth > 0:

scrapy/commands/runspider.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,6 @@ def run(self, args, opts):
7474
raise UsageError("No spider found in file: %s\n" % filename)
7575
spider = spclasses.pop()(**opts.spargs)
7676

77-
self.crawler.crawl(spider)
78-
self.crawler.start()
77+
crawler = self.crawler_process.create_crawler()
78+
crawler.crawl(spider)
79+
self.crawler_process.start()

scrapy/commands/settings.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ def add_options(self, parser):
2525
help="print setting value, intepreted as an float")
2626

2727
def run(self, args, opts):
28-
settings = self.crawler.settings
28+
crawler = self.crawler_process.create_crawler()
29+
settings = crawler.settings
2930
if opts.get:
3031
print settings.get(opts.get)
3132
elif opts.getbool:

scrapy/commands/shell.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from scrapy.command import ScrapyCommand
1010
from scrapy.shell import Shell
11-
from scrapy import log
11+
1212

1313
class Command(ScrapyCommand):
1414

@@ -38,15 +38,17 @@ def update_vars(self, vars):
3838
pass
3939

4040
def run(self, args, opts):
41+
crawler = self.crawler_process.create_crawler()
42+
4143
url = args[0] if args else None
42-
spider = None
43-
if opts.spider:
44-
spider = self.crawler.spiders.create(opts.spider)
45-
shell = Shell(self.crawler, update_vars=self.update_vars, code=opts.code)
44+
spider = crawler.spiders.create(opts.spider) if opts.spider else None
45+
46+
shell = Shell(crawler, update_vars=self.update_vars, code=opts.code)
4647
self._start_crawler_thread()
4748
shell.start(url=url, spider=spider)
4849

4950
def _start_crawler_thread(self):
50-
t = Thread(target=self.crawler.start)
51+
self.crawler_process.print_headers()
52+
t = Thread(target=self.crawler_process.start, kwargs={'headers': False})
5153
t.daemon = True
5254
t.start()

scrapy/crawler.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def __init__(self, settings):
126126
self.crawlers = {}
127127
self.stopping = False
128128

129-
def create_crawler(self, name):
129+
def create_crawler(self, name=None):
130130
if name not in self.crawlers:
131131
crawler = Crawler(self.settings)
132132
crawler.configure()
@@ -155,8 +155,12 @@ def check_done(self, **kwargs):
155155
else:
156156
self._stop_reactor()
157157

158-
def start(self):
158+
def print_headers(self):
159159
log.scrapy_info(self.settings)
160+
161+
def start(self, headers=True):
162+
if headers:
163+
self.print_headers()
160164
return super(CrawlerProcess, self).start()
161165

162166
@defer.inlineCallbacks

0 commit comments

Comments
 (0)