Skip to content

Commit 457b97c

Browse files
committed
Merge pull request scrapy#1388 from scrapy/dupefilter-persist
Dupefilter persistence
2 parents e2f31f3 + 836cb84 commit 457b97c

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

scrapy/dupefilters.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def __init__(self, path=None, debug=False):
3636
self.logger = logging.getLogger(__name__)
3737
if path:
3838
self.file = open(os.path.join(path, 'requests.seen'), 'a+')
39+
self.file.seek(0)
3940
self.fingerprints.update(x.rstrip() for x in self.file)
4041

4142
@classmethod

tests/test_dupefilters.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import hashlib
2+
import tempfile
23
import unittest
4+
import shutil
35

46
from scrapy.dupefilters import RFPDupeFilter
57
from scrapy.http import Request
@@ -24,6 +26,27 @@ def test_filter(self):
2426

2527
dupefilter.close('finished')
2628

29+
def test_dupefilter_path(self):
30+
r1 = Request('http://scrapytest.org/1')
31+
r2 = Request('http://scrapytest.org/2')
32+
33+
path = tempfile.mkdtemp()
34+
try:
35+
df = RFPDupeFilter(path)
36+
df.open()
37+
assert not df.request_seen(r1)
38+
assert df.request_seen(r1)
39+
df.close('finished')
40+
41+
df2 = RFPDupeFilter(path)
42+
df2.open()
43+
assert df2.request_seen(r1)
44+
assert not df2.request_seen(r2)
45+
assert df2.request_seen(r2)
46+
df2.close('finished')
47+
finally:
48+
shutil.rmtree(path)
49+
2750
def test_request_fingerprint(self):
2851
"""Test if customization of request_fingerprint method will change
2952
output of request_seen.

0 commit comments

Comments
 (0)