1
1
import signal
2
+ from itertools import chain
2
3
3
4
from twisted .internet import reactor , defer
4
5
@@ -21,8 +22,7 @@ def __init__(self, settings):
21
22
22
23
spman_cls = load_object (self .settings ['SPIDER_MANAGER_CLASS' ])
23
24
self .spiders = spman_cls .from_crawler (self )
24
-
25
- self .scheduled = {}
25
+ self ._scheduled = {}
26
26
27
27
def install (self ):
28
28
import scrapy .project
@@ -46,20 +46,17 @@ def configure(self):
46
46
47
47
def crawl (self , spider , requests = None ):
48
48
spider .set_crawler (self )
49
-
50
49
if self .configured and self .engine .running :
51
- assert not self .scheduled
52
- return self .schedule (spider , requests )
50
+ assert not self ._scheduled
51
+ return self ._schedule (spider , requests )
52
+ elif requests is None :
53
+ self ._scheduled [spider ] = None
53
54
else :
54
- self .scheduled .setdefault (spider , []).append (requests )
55
-
56
- def schedule (self , spider , batches = []):
57
- requests = []
58
- for batch in batches :
59
- if batch is None :
60
- batch = spider .start_requests ()
61
- requests .extend (batch )
55
+ self ._scheduled .setdefault (spider , []).append (requests )
62
56
57
+ def _schedule (self , spider , batches = ()):
58
+ requests = chain .from_iterable (batches ) \
59
+ if batches else spider .start_requests ()
63
60
return self .engine .open_spider (spider , requests )
64
61
65
62
def _spider_closed (self , spider = None ):
@@ -70,8 +67,8 @@ def _spider_closed(self, spider=None):
70
67
def start (self ):
71
68
yield defer .maybeDeferred (self .configure )
72
69
73
- for spider , batches in self .scheduled .iteritems ():
74
- yield self .schedule (spider , batches )
70
+ for spider , batches in self ._scheduled .iteritems ():
71
+ yield self ._schedule (spider , batches )
75
72
76
73
yield defer .maybeDeferred (self .engine .start )
77
74
0 commit comments