@@ -117,6 +117,8 @@ Request objects
117
117
raised while processing the request. This includes pages that failed
118
118
with 404 HTTP errors and such. It receives a `Twisted Failure `_ instance
119
119
as first parameter.
120
+ For more information,
121
+ see :ref: `topics-request-response-ref-errbacks ` below.
120
122
:type errback: callable
121
123
122
124
.. attribute :: Request.url
@@ -212,6 +214,69 @@ different fields from different pages::
212
214
item['other_url'] = response.url
213
215
return item
214
216
217
+
218
+ .. _topics-request-response-ref-errbacks :
219
+
220
+ Using errbacks to catch exceptions in request processing
221
+ --------------------------------------------------------
222
+
223
+ The errback of a request is a function that will be called when an exception
224
+ is raise while processing it.
225
+
226
+ It receives a `Twisted Failure `_ instance as first parameter and can be
227
+ used to track connection establishment timeouts, DNS errors etc.
228
+
229
+ Here's an example spider logging all errors and catching some specific
230
+ errors if needed::
231
+
232
+ import scrapy
233
+
234
+ from scrapy.spidermiddlewares.httperror import HttpError
235
+ from twisted.internet.error import DNSLookupError
236
+ from twisted.internet.error import TimeoutError, TCPTimedOutError
237
+
238
+ class ErrbackSpider(scrapy.Spider):
239
+ name = "errback_example"
240
+ start_urls = [
241
+ "http://www.httpbin.org/", # HTTP 200 expected
242
+ "http://www.httpbin.org/status/404", # Not found error
243
+ "http://www.httpbin.org/status/500", # server issue
244
+ "http://www.httpbin.org:12345/", # non-responding host, timeout expected
245
+ "http://www.httphttpbinbin.org/", # DNS error expected
246
+ ]
247
+
248
+ def start_requests(self):
249
+ for u in self.start_urls:
250
+ yield scrapy.Request(u, callback=self.parse_httpbin,
251
+ errback=self.errback_httpbin,
252
+ dont_filter=True)
253
+
254
+ def parse_httpbin(self, response):
255
+ self.logger.info('Got successful response from {}'.format(response.url))
256
+ # do something useful here...
257
+
258
+ def errback_httpbin(self, failure):
259
+ # log all failures
260
+ self.logger.error(repr(failure))
261
+
262
+ # in case you want to do something special for some errors,
263
+ # you may need the failure's type:
264
+
265
+ if failure.check(HttpError):
266
+ # these exceptions come from HttpError spider middleware
267
+ # you can get the non-200 response
268
+ response = failure.value.response
269
+ self.logger.error('HttpError on %s', response.url)
270
+
271
+ elif failure.check(DNSLookupError):
272
+ # this is the original request
273
+ request = failure.request
274
+ self.logger.error('DNSLookupError on %s', request.url)
275
+
276
+ elif failure.check(TimeoutError, TCPTimedOutError):
277
+ request = failure.request
278
+ self.logger.error('TimeoutError on %s', request.url)
279
+
215
280
.. _topics-request-meta :
216
281
217
282
Request.meta special keys
0 commit comments