Skip to content

Commit c9f3dec

Browse files
committed
Use psutil if available to measure RSS on memusage extension
1 parent 6fa3f24 commit c9f3dec

File tree

1 file changed

+24
-4
lines changed

1 file changed

+24
-4
lines changed

scrapy/extensions/memusage.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,9 @@ class MemoryUsage(object):
2424
def __init__(self, crawler):
2525
if not crawler.settings.getbool('MEMUSAGE_ENABLED'):
2626
raise NotConfigured
27-
try:
28-
# stdlib's resource module is only available on unix platforms.
29-
self.resource = import_module('resource')
30-
except ImportError:
27+
28+
self._get_rss = self._choose_rss_func()
29+
if self._get_rss is None:
3130
raise NotConfigured
3231

3332
self.crawler = crawler
@@ -45,12 +44,32 @@ def from_crawler(cls, crawler):
4544
return cls(crawler)
4645

4746
def get_virtual_size(self):
47+
return self._get_rss()
48+
49+
def _get_rss_psutil(self):
50+
return self.psutil_module.Process().memory_info()[0]
51+
52+
def _get_rss_resource(self):
4853
size = self.resource.getrusage(self.resource.RUSAGE_SELF).ru_maxrss
4954
if sys.platform != 'darwin':
5055
# on Mac OS X ru_maxrss is in bytes, on Linux it is in KB
5156
size *= 1024
5257
return size
5358

59+
def _choose_rss_func(self):
60+
try:
61+
# psutil is recommended as it can measure current RSS usage
62+
self.psutil_module = import_module('psutil')
63+
return self._get_rss_psutil
64+
except ImportError:
65+
pass
66+
try:
67+
# stdlib's resource module is only available on unix platforms.
68+
self.resource = import_module('resource')
69+
return self._get_rss_resource
70+
except ImportError:
71+
pass
72+
5473
def engine_started(self):
5574
self.crawler.stats.set_value('memusage/startup', self.get_virtual_size())
5675
self.tasks = []
@@ -96,6 +115,7 @@ def _check_limit(self):
96115
def _check_warning(self):
97116
if self.warned: # warn only once
98117
return
118+
99119
if self.get_virtual_size() > self.warning:
100120
self.crawler.stats.set_value('memusage/warning_reached', 1)
101121
mem = self.warning/1024/1024

0 commit comments

Comments
 (0)