Skip to content

Commit c9d85fa

Browse files
authored
Merge pull request scrapy#6469 from Laerte/master
Add support for meta in Spider Contracts
2 parents 6ce0342 + ddbdfeb commit c9d85fa

File tree

4 files changed

+104
-2
lines changed

4 files changed

+104
-2
lines changed

docs/topics/contracts.rst

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@ following example:
2020
This function parses a sample response. Some contracts are mingled
2121
with this docstring.
2222
23-
@url http://www.amazon.com/s?field-keywords=selfish+gene
23+
@url http://www.example.com/s?field-keywords=selfish+gene
2424
@returns items 1 16
2525
@returns requests 0 0
2626
@scrapes Title Author Year Price
2727
"""
2828
29-
This callback is tested using three built-in contracts:
29+
You can use the following contracts:
3030

3131
.. module:: scrapy.contracts.default
3232

@@ -46,6 +46,14 @@ This callback is tested using three built-in contracts:
4646

4747
@cb_kwargs {"arg1": "value1", "arg2": "value2", ...}
4848

49+
.. class:: MetadataContract
50+
51+
This contract (``@meta``) sets the :attr:`meta <scrapy.Request.meta>`
52+
attribute for the sample request. It must be a valid JSON dictionary.
53+
::
54+
55+
@meta {"arg1": "value1", "arg2": "value2", ...}
56+
4957
.. class:: ReturnsContract
5058

5159
This contract (``@returns``) sets lower and upper bounds for the items and

scrapy/contracts/default.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,20 @@ def adjust_request_args(self, args: Dict[str, Any]) -> Dict[str, Any]:
3535
return args
3636

3737

38+
class MetadataContract(Contract):
39+
"""Contract to set metadata arguments for the request.
40+
The value should be JSON-encoded dictionary, e.g.:
41+
42+
@meta {"arg1": "some value"}
43+
"""
44+
45+
name = "meta"
46+
47+
def adjust_request_args(self, args: Dict[str, Any]) -> Dict[str, Any]:
48+
args["meta"] = json.loads(" ".join(self.args))
49+
return args
50+
51+
3852
class ReturnsContract(Contract):
3953
"""Contract to check the output of a callback
4054

scrapy/settings/default_settings.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@
333333
SPIDER_CONTRACTS_BASE = {
334334
"scrapy.contracts.default.UrlContract": 1,
335335
"scrapy.contracts.default.CallbackKeywordArgumentsContract": 1,
336+
"scrapy.contracts.default.MetadataContract": 1,
336337
"scrapy.contracts.default.ReturnsContract": 2,
337338
"scrapy.contracts.default.ScrapesContract": 3,
338339
}

tests/test_contracts.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from scrapy.contracts import Contract, ContractsManager
99
from scrapy.contracts.default import (
1010
CallbackKeywordArgumentsContract,
11+
MetadataContract,
1112
ReturnsContract,
1213
ScrapesContract,
1314
UrlContract,
@@ -29,6 +30,10 @@ class ResponseMock:
2930
url = "http://scrapy.org"
3031

3132

33+
class ResponseMetaMock(ResponseMock):
34+
meta = None
35+
36+
3237
class CustomSuccessContract(Contract):
3338
name = "custom_success_contract"
3439

@@ -195,6 +200,33 @@ def invalid_regex_with_valid_contract(self, response):
195200
"""
196201
pass
197202

203+
def returns_request_meta(self, response):
204+
"""method which returns request
205+
@url https://example.org
206+
@meta {"cookiejar": "session1"}
207+
@returns requests 1
208+
"""
209+
return Request(
210+
"https://example.org", meta=response.meta, callback=self.returns_item_meta
211+
)
212+
213+
def returns_item_meta(self, response):
214+
"""method which returns item
215+
@url http://scrapy.org
216+
@meta {"key": "example"}
217+
@returns items 1 1
218+
"""
219+
return TestItem(name="example", url=response.url)
220+
221+
def returns_error_missing_meta(self, response):
222+
"""method which depends of metadata be defined
223+
224+
@url http://scrapy.org
225+
@returns items 1
226+
"""
227+
key = response.meta["key"]
228+
yield {key: "value"}
229+
198230

199231
class CustomContractSuccessSpider(Spider):
200232
name = "custom_contract_success_spider"
@@ -224,6 +256,7 @@ class ContractsManagerTest(unittest.TestCase):
224256
contracts = [
225257
UrlContract,
226258
CallbackKeywordArgumentsContract,
259+
MetadataContract,
227260
ReturnsContract,
228261
ScrapesContract,
229262
CustomFormContract,
@@ -328,6 +361,52 @@ def test_cb_kwargs(self):
328361
request.callback(response, **request.cb_kwargs)
329362
self.should_error()
330363

364+
def test_meta(self):
365+
spider = TestSpider()
366+
367+
# extract contracts correctly
368+
contracts = self.conman.extract_contracts(spider.returns_request_meta)
369+
self.assertEqual(len(contracts), 3)
370+
self.assertEqual(
371+
frozenset(type(x) for x in contracts),
372+
frozenset([UrlContract, MetadataContract, ReturnsContract]),
373+
)
374+
375+
contracts = self.conman.extract_contracts(spider.returns_item_meta)
376+
self.assertEqual(len(contracts), 3)
377+
self.assertEqual(
378+
frozenset(type(x) for x in contracts),
379+
frozenset([UrlContract, MetadataContract, ReturnsContract]),
380+
)
381+
382+
response = ResponseMetaMock()
383+
384+
# returns_request
385+
request = self.conman.from_method(spider.returns_request_meta, self.results)
386+
assert request.meta["cookiejar"] == "session1"
387+
response.meta = request.meta
388+
request.callback(response)
389+
assert response.meta["cookiejar"] == "session1"
390+
self.should_succeed()
391+
392+
response = ResponseMetaMock()
393+
394+
# returns_item
395+
request = self.conman.from_method(spider.returns_item_meta, self.results)
396+
assert request.meta["key"] == "example"
397+
response.meta = request.meta
398+
request.callback(ResponseMetaMock)
399+
assert response.meta["key"] == "example"
400+
self.should_succeed()
401+
402+
response = ResponseMetaMock()
403+
404+
request = self.conman.from_method(
405+
spider.returns_error_missing_meta, self.results
406+
)
407+
request.callback(response)
408+
self.should_error()
409+
331410
def test_returns(self):
332411
spider = TestSpider()
333412
response = ResponseMock()

0 commit comments

Comments
 (0)