Skip to content

Commit 9f4fe5d

Browse files
committed
Merge pull request scrapy#1822 from nyov/nyov/scheduler
[MRG+1] Allow core Scheduler priority queue customization
2 parents 6b2871d + 2a6524e commit 9f4fe5d

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

scrapy/core/scheduler.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import logging
44
from os.path import join, exists
55

6-
from queuelib import PriorityQueue
76
from scrapy.utils.reqser import request_to_dict, request_from_dict
87
from scrapy.utils.misc import load_object
98
from scrapy.utils.job import job_dir
@@ -13,9 +12,11 @@
1312

1413
class Scheduler(object):
1514

16-
def __init__(self, dupefilter, jobdir=None, dqclass=None, mqclass=None, logunser=False, stats=None):
15+
def __init__(self, dupefilter, jobdir=None, dqclass=None, mqclass=None,
16+
logunser=False, stats=None, pqclass=None):
1717
self.df = dupefilter
1818
self.dqdir = self._dqdir(jobdir)
19+
self.pqclass = pqclass
1920
self.dqclass = dqclass
2021
self.mqclass = mqclass
2122
self.logunser = logunser
@@ -26,17 +27,19 @@ def from_crawler(cls, crawler):
2627
settings = crawler.settings
2728
dupefilter_cls = load_object(settings['DUPEFILTER_CLASS'])
2829
dupefilter = dupefilter_cls.from_settings(settings)
30+
pqclass = load_object(settings['SCHEDULER_PRIORITY_QUEUE'])
2931
dqclass = load_object(settings['SCHEDULER_DISK_QUEUE'])
3032
mqclass = load_object(settings['SCHEDULER_MEMORY_QUEUE'])
3133
logunser = settings.getbool('LOG_UNSERIALIZABLE_REQUESTS')
32-
return cls(dupefilter, job_dir(settings), dqclass, mqclass, logunser, crawler.stats)
34+
return cls(dupefilter, jobdir=job_dir(settings), logunser=logunser,
35+
stats=crawler.stats, pqclass=pqclass, dqclass=dqclass, mqclass=mqclass)
3336

3437
def has_pending_requests(self):
3538
return len(self) > 0
3639

3740
def open(self, spider):
3841
self.spider = spider
39-
self.mqs = PriorityQueue(self._newmq)
42+
self.mqs = self.pqclass(self._newmq)
4043
self.dqs = self._dq() if self.dqdir else None
4144
return self.df.open()
4245

@@ -112,7 +115,7 @@ def _dq(self):
112115
prios = json.load(f)
113116
else:
114117
prios = ()
115-
q = PriorityQueue(self._newdq, startprios=prios)
118+
q = self.pqclass(self._newdq, startprios=prios)
116119
if q:
117120
logger.info("Resuming crawl (%(queuesize)d requests scheduled)",
118121
{'queuesize': len(q)}, extra={'spider': self.spider})

scrapy/settings/default_settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@
234234
SCHEDULER = 'scrapy.core.scheduler.Scheduler'
235235
SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleLifoDiskQueue'
236236
SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.LifoMemoryQueue'
237+
SCHEDULER_PRIORITY_QUEUE = 'queuelib.PriorityQueue'
237238

238239
SPIDER_LOADER_CLASS = 'scrapy.spiderloader.SpiderLoader'
239240

0 commit comments

Comments
 (0)