@@ -19,6 +19,9 @@ def __init__(self, settings):
19
19
self .signals = SignalManager (self )
20
20
self .stats = load_object (settings ['STATS_CLASS' ])(self )
21
21
22
+ spman_cls = load_object (self .settings ['SPIDER_MANAGER_CLASS' ])
23
+ self .spiders = spman_cls .from_crawler (self )
24
+
22
25
self .scheduled = {}
23
26
24
27
def install (self ):
@@ -39,20 +42,25 @@ def configure(self):
39
42
lf_cls = load_object (self .settings ['LOG_FORMATTER' ])
40
43
self .logformatter = lf_cls .from_crawler (self )
41
44
self .extensions = ExtensionManager .from_crawler (self )
42
- spman_cls = load_object (self .settings ['SPIDER_MANAGER_CLASS' ])
43
- self .spiders = spman_cls .from_crawler (self )
44
45
self .engine = ExecutionEngine (self , self ._spider_closed )
45
46
46
47
def crawl (self , spider , requests = None ):
47
48
spider .set_crawler (self )
48
- if requests is None :
49
- requests = spider .start_requests ()
50
49
51
50
if self .configured and self .engine .running :
52
51
assert not self .scheduled
53
- return self .engine . open_spider (spider , requests )
52
+ return self .schedule (spider , requests )
54
53
else :
55
- self .scheduled .setdefault (spider , []).extend (requests )
54
+ self .scheduled .setdefault (spider , []).append (requests )
55
+
56
+ def schedule (self , spider , batches = []):
57
+ requests = []
58
+ for batch in batches :
59
+ if batch is None :
60
+ batch = spider .start_requests ()
61
+ requests .extend (batch )
62
+
63
+ return self .engine .open_spider (spider , requests )
56
64
57
65
def _spider_closed (self , spider = None ):
58
66
if not self .engine .open_spiders :
@@ -62,8 +70,8 @@ def _spider_closed(self, spider=None):
62
70
def start (self ):
63
71
yield defer .maybeDeferred (self .configure )
64
72
65
- for spider , requests in self .scheduled .iteritems ():
66
- yield self .engine . open_spider (spider , requests )
73
+ for spider , batches in self .scheduled .iteritems ():
74
+ yield self .schedule (spider , batches )
67
75
68
76
yield defer .maybeDeferred (self .engine .start )
69
77
@@ -128,17 +136,15 @@ def __init__(self, settings):
128
136
129
137
def create_crawler (self , name = None ):
130
138
if name not in self .crawlers :
131
- crawler = Crawler (self .settings )
132
- crawler .configure ()
133
-
134
- self .crawlers [name ] = crawler
139
+ self .crawlers [name ] = Crawler (self .settings )
135
140
136
141
return self .crawlers [name ]
137
142
138
143
def start_crawling (self ):
139
144
name , crawler = self .crawlers .popitem ()
140
145
141
146
sflo = log .start_from_crawler (crawler )
147
+ crawler .configure ()
142
148
crawler .install ()
143
149
crawler .signals .connect (crawler .uninstall , signals .engine_stopped )
144
150
if sflo :
0 commit comments