3
3
import logging
4
4
from os .path import join , exists
5
5
6
- from queuelib import PriorityQueue
7
6
from scrapy .utils .reqser import request_to_dict , request_from_dict
8
7
from scrapy .utils .misc import load_object
9
8
from scrapy .utils .job import job_dir
13
12
14
13
class Scheduler (object ):
15
14
16
- def __init__ (self , dupefilter , jobdir = None , dqclass = None , mqclass = None , logunser = False , stats = None ):
15
+ def __init__ (self , dupefilter , jobdir = None , dqclass = None , mqclass = None ,
16
+ logunser = False , stats = None , pqclass = None ):
17
17
self .df = dupefilter
18
18
self .dqdir = self ._dqdir (jobdir )
19
+ self .pqclass = pqclass
19
20
self .dqclass = dqclass
20
21
self .mqclass = mqclass
21
22
self .logunser = logunser
@@ -26,17 +27,19 @@ def from_crawler(cls, crawler):
26
27
settings = crawler .settings
27
28
dupefilter_cls = load_object (settings ['DUPEFILTER_CLASS' ])
28
29
dupefilter = dupefilter_cls .from_settings (settings )
30
+ pqclass = load_object (settings ['SCHEDULER_PRIORITY_QUEUE' ])
29
31
dqclass = load_object (settings ['SCHEDULER_DISK_QUEUE' ])
30
32
mqclass = load_object (settings ['SCHEDULER_MEMORY_QUEUE' ])
31
33
logunser = settings .getbool ('LOG_UNSERIALIZABLE_REQUESTS' )
32
- return cls (dupefilter , job_dir (settings ), dqclass , mqclass , logunser , crawler .stats )
34
+ return cls (dupefilter , jobdir = job_dir (settings ), logunser = logunser ,
35
+ stats = crawler .stats , pqclass = pqclass , dqclass = dqclass , mqclass = mqclass )
33
36
34
37
def has_pending_requests (self ):
35
38
return len (self ) > 0
36
39
37
40
def open (self , spider ):
38
41
self .spider = spider
39
- self .mqs = PriorityQueue (self ._newmq )
42
+ self .mqs = self . pqclass (self ._newmq )
40
43
self .dqs = self ._dq () if self .dqdir else None
41
44
return self .df .open ()
42
45
@@ -112,7 +115,7 @@ def _dq(self):
112
115
prios = json .load (f )
113
116
else :
114
117
prios = ()
115
- q = PriorityQueue (self ._newdq , startprios = prios )
118
+ q = self . pqclass (self ._newdq , startprios = prios )
116
119
if q :
117
120
logger .info ("Resuming crawl (%(queuesize)d requests scheduled)" ,
118
121
{'queuesize' : len (q )}, extra = {'spider' : self .spider })
0 commit comments