Skip to content

Commit 8a140b6

Browse files
committed
Merge pull request scrapy#1315 from scrapy/downloader-slots-cleanup
Small downloader slots cleanup
2 parents ea4b175 + cb0445e commit 8a140b6

File tree

1 file changed

+22
-4
lines changed

1 file changed

+22
-4
lines changed

scrapy/core/downloader/__init__.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
1+
from __future__ import absolute_import
12
import random
23
import warnings
34
from time import time
5+
from datetime import datetime
46
from collections import deque
57

68
from twisted.internet import reactor, defer, task
79

810
from scrapy.utils.defer import mustbe_deferred
911
from scrapy.utils.httpobj import urlparse_cached
1012
from scrapy.resolver import dnscache
11-
from scrapy.exceptions import ScrapyDeprecationWarning
1213
from scrapy import signals
1314
from .middleware import DownloaderMiddlewareManager
1415
from .handlers import DownloadHandlers
@@ -17,10 +18,11 @@
1718
class Slot(object):
1819
"""Downloader slot"""
1920

20-
def __init__(self, concurrency, delay, settings):
21+
def __init__(self, concurrency, delay, randomize_delay):
2122
self.concurrency = concurrency
2223
self.delay = delay
23-
self.randomize_delay = settings.getbool('RANDOMIZE_DOWNLOAD_DELAY')
24+
self.randomize_delay = randomize_delay
25+
2426
self.active = set()
2527
self.queue = deque()
2628
self.transferring = set()
@@ -39,6 +41,21 @@ def close(self):
3941
if self.latercall and self.latercall.active():
4042
self.latercall.cancel()
4143

44+
def __repr__(self):
45+
cls_name = self.__class__.__name__
46+
return "%s(concurrency=%r, delay=%0.2f, randomize_delay=%r)" % (
47+
cls_name, self.concurrency, self.delay, self.randomize_delay)
48+
49+
def __str__(self):
50+
return (
51+
"<downloader.Slot concurrency=%r delay=%0.2f randomize_delay=%r "
52+
"len(active)=%d len(queue)=%d len(transferring)=%d lastseen=%s>" % (
53+
self.concurrency, self.delay, self.randomize_delay,
54+
len(self.active), len(self.queue), len(self.transferring),
55+
datetime.fromtimestamp(self.lastseen).isoformat()
56+
)
57+
)
58+
4259

4360
def _get_concurrency_delay(concurrency, spider, settings):
4461
delay = settings.getfloat('DOWNLOAD_DELAY')
@@ -66,6 +83,7 @@ def __init__(self, crawler):
6683
self.total_concurrency = self.settings.getint('CONCURRENT_REQUESTS')
6784
self.domain_concurrency = self.settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
6885
self.ip_concurrency = self.settings.getint('CONCURRENT_REQUESTS_PER_IP')
86+
self.randomize_delay = self.settings.getbool('RANDOMIZE_DOWNLOAD_DELAY')
6987
self.middleware = DownloaderMiddlewareManager.from_crawler(crawler)
7088
self._slot_gc_loop = task.LoopingCall(self._slot_gc)
7189
self._slot_gc_loop.start(60)
@@ -87,7 +105,7 @@ def _get_slot(self, request, spider):
87105
if key not in self.slots:
88106
conc = self.ip_concurrency if self.ip_concurrency else self.domain_concurrency
89107
conc, delay = _get_concurrency_delay(conc, spider, self.settings)
90-
self.slots[key] = Slot(conc, delay, self.settings)
108+
self.slots[key] = Slot(conc, delay, self.randomize_delay)
91109

92110
return key, self.slots[key]
93111

0 commit comments

Comments
 (0)