@@ -24,10 +24,9 @@ class MemoryUsage(object):
24
24
def __init__ (self , crawler ):
25
25
if not crawler .settings .getbool ('MEMUSAGE_ENABLED' ):
26
26
raise NotConfigured
27
- try :
28
- # stdlib's resource module is only available on unix platforms.
29
- self .resource = import_module ('resource' )
30
- except ImportError :
27
+
28
+ self ._get_rss = self ._choose_rss_func ()
29
+ if self ._get_rss is None :
31
30
raise NotConfigured
32
31
33
32
self .crawler = crawler
@@ -45,12 +44,32 @@ def from_crawler(cls, crawler):
45
44
return cls (crawler )
46
45
47
46
def get_virtual_size (self ):
47
+ return self ._get_rss ()
48
+
49
+ def _get_rss_psutil (self ):
50
+ return self .psutil_module .Process ().memory_info ()[0 ]
51
+
52
+ def _get_rss_resource (self ):
48
53
size = self .resource .getrusage (self .resource .RUSAGE_SELF ).ru_maxrss
49
54
if sys .platform != 'darwin' :
50
55
# on Mac OS X ru_maxrss is in bytes, on Linux it is in KB
51
56
size *= 1024
52
57
return size
53
58
59
+ def _choose_rss_func (self ):
60
+ try :
61
+ # psutil is recommended as it can measure current RSS usage
62
+ self .psutil_module = import_module ('psutil' )
63
+ return self ._get_rss_psutil
64
+ except ImportError :
65
+ pass
66
+ try :
67
+ # stdlib's resource module is only available on unix platforms.
68
+ self .resource = import_module ('resource' )
69
+ return self ._get_rss_resource
70
+ except ImportError :
71
+ pass
72
+
54
73
def engine_started (self ):
55
74
self .crawler .stats .set_value ('memusage/startup' , self .get_virtual_size ())
56
75
self .tasks = []
@@ -96,6 +115,7 @@ def _check_limit(self):
96
115
def _check_warning (self ):
97
116
if self .warned : # warn only once
98
117
return
118
+
99
119
if self .get_virtual_size () > self .warning :
100
120
self .crawler .stats .set_value ('memusage/warning_reached' , 1 )
101
121
mem = self .warning / 1024 / 1024
0 commit comments