|
1 | 1 | from OpenSSL import SSL
|
2 | 2 | from twisted.internet.ssl import ClientContextFactory
|
| 3 | + |
3 | 4 | try:
|
4 |
| - # available since twisted 14.0 |
| 5 | + |
| 6 | + from zope.interface.declarations import implementer |
| 7 | + |
| 8 | + # the following should be available from Twisted 14.0.0 |
| 9 | + from twisted.internet.ssl import optionsForClientTLS, CertificateOptions, platformTrust |
5 | 10 | from twisted.internet._sslverify import ClientTLSOptions
|
| 11 | + from twisted.web.client import BrowserLikePolicyForHTTPS |
| 12 | + from twisted.web.iweb import IPolicyForHTTPS |
| 13 | + |
| 14 | + @implementer(IPolicyForHTTPS) |
| 15 | + class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS): |
| 16 | + """ |
| 17 | + Non-peer-certificate verifying HTTPS context factory |
| 18 | +
|
| 19 | + Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD) |
| 20 | + which allows TLS protocol negotiation |
| 21 | +
|
| 22 | + 'A TLS/SSL connection established with [this method] may |
| 23 | + understand the SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols.' |
| 24 | + """ |
| 25 | + |
| 26 | + def __init__(self, method=SSL.SSLv23_METHOD, *args, **kwargs): |
| 27 | + super(ScrapyClientContextFactory, self).__init__(*args, **kwargs) |
| 28 | + self._ssl_method = method |
| 29 | + |
| 30 | + def getCertificateOptions(self): |
| 31 | + # setting verify=True will require you to provide CAs |
| 32 | + # to verify against; in other words: it's not that simple |
| 33 | + |
| 34 | + # backward-compatible SSL/TLS method: |
| 35 | + # |
| 36 | + # * this will respect `method` attribute in often recommended |
| 37 | + # `ScrapyClientContextFactory` subclass |
| 38 | + # (https://github.com/scrapy/scrapy/issues/1429#issuecomment-131782133) |
| 39 | + # |
| 40 | + # * getattr() for `_ssl_method` attribute for context factories |
| 41 | + # not calling super(..., self).__init__ |
| 42 | + return CertificateOptions(verify=False, |
| 43 | + method=getattr(self, 'method', |
| 44 | + getattr(self, '_ssl_method', None))) |
| 45 | + |
| 46 | + # kept for old-style HTTP/1.0 downloader context twisted calls, |
| 47 | + # e.g. connectSSL() |
| 48 | + def getContext(self, hostname=None, port=None): |
| 49 | + return self.getCertificateOptions().getContext() |
| 50 | + |
| 51 | + def creatorForNetloc(self, hostname, port): |
| 52 | + return ClientTLSOptions(hostname.decode("ascii"), self.getContext()) |
| 53 | + |
| 54 | + |
| 55 | + @implementer(IPolicyForHTTPS) |
| 56 | + class BrowserLikeContextFactory(ScrapyClientContextFactory): |
| 57 | + """ |
| 58 | + Twisted-recommended context factory for web clients. |
| 59 | +
|
| 60 | + Quoting http://twistedmatrix.com/documents/current/api/twisted.web.client.Agent.html: |
| 61 | + "The default is to use a BrowserLikePolicyForHTTPS, |
| 62 | + so unless you have special requirements you can leave this as-is." |
| 63 | +
|
| 64 | + creatorForNetloc() is the same as BrowserLikePolicyForHTTPS |
| 65 | + except this context factory allows setting the TLS/SSL method to use. |
| 66 | +
|
| 67 | + Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD) |
| 68 | + which allows TLS protocol negotiation. |
| 69 | + """ |
| 70 | + def creatorForNetloc(self, hostname, port): |
| 71 | + |
| 72 | + # trustRoot set to platformTrust() will use the platform's root CAs. |
| 73 | + # |
| 74 | + # This means that a website like https://www.cacert.org will be rejected |
| 75 | + # by default, since CAcert.org CA certificate is seldom shipped. |
| 76 | + return optionsForClientTLS(hostname.decode("ascii"), |
| 77 | + trustRoot=platformTrust(), |
| 78 | + extraCertificateOptions={ |
| 79 | + 'method': self._ssl_method, |
| 80 | + }) |
| 81 | + |
6 | 82 | except ImportError:
|
7 |
| - ClientTLSOptions = None |
8 |
| - |
9 |
| - |
10 |
| -class ScrapyClientContextFactory(ClientContextFactory): |
11 |
| - "A SSL context factory which is more permissive against SSL bugs." |
12 |
| - # see https://github.com/scrapy/scrapy/issues/82 |
13 |
| - # and https://github.com/scrapy/scrapy/issues/26 |
14 |
| - # and https://github.com/scrapy/scrapy/issues/981 |
15 |
| - |
16 |
| - def __init__(self): |
17 |
| - # see this issue on why we use TLSv1_METHOD by default |
18 |
| - # https://github.com/scrapy/scrapy/issues/194 |
19 |
| - self.method = SSL.TLSv1_METHOD |
20 |
| - |
21 |
| - def getContext(self, hostname=None, port=None): |
22 |
| - ctx = ClientContextFactory.getContext(self) |
23 |
| - # Enable all workarounds to SSL bugs as documented by |
24 |
| - # http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html |
25 |
| - ctx.set_options(SSL.OP_ALL) |
26 |
| - if hostname and ClientTLSOptions is not None: # workaround for TLS SNI |
27 |
| - ClientTLSOptions(hostname, ctx) |
28 |
| - return ctx |
| 83 | + |
| 84 | + class ScrapyClientContextFactory(ClientContextFactory): |
| 85 | + "A SSL context factory which is more permissive against SSL bugs." |
| 86 | + # see https://github.com/scrapy/scrapy/issues/82 |
| 87 | + # and https://github.com/scrapy/scrapy/issues/26 |
| 88 | + # and https://github.com/scrapy/scrapy/issues/981 |
| 89 | + |
| 90 | + def __init__(self, method=SSL.SSLv23_METHOD): |
| 91 | + self.method = method |
| 92 | + |
| 93 | + def getContext(self, hostname=None, port=None): |
| 94 | + ctx = ClientContextFactory.getContext(self) |
| 95 | + # Enable all workarounds to SSL bugs as documented by |
| 96 | + # http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html |
| 97 | + ctx.set_options(SSL.OP_ALL) |
| 98 | + return ctx |
0 commit comments