Skip to content

Commit f7a48b0

Browse files
committed
Merge pull request scrapy#1794 from redapple/twisted-tls
[MRG+1] Use best practices for TLS connections when using Twisted>=14.0
2 parents 4e93501 + 0336c25 commit f7a48b0

File tree

4 files changed

+127
-24
lines changed

4 files changed

+127
-24
lines changed
Lines changed: 93 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,98 @@
11
from OpenSSL import SSL
22
from twisted.internet.ssl import ClientContextFactory
3+
34
try:
4-
# available since twisted 14.0
5+
6+
from zope.interface.declarations import implementer
7+
8+
# the following should be available from Twisted 14.0.0
9+
from twisted.internet.ssl import optionsForClientTLS, CertificateOptions, platformTrust
510
from twisted.internet._sslverify import ClientTLSOptions
11+
from twisted.web.client import BrowserLikePolicyForHTTPS
12+
from twisted.web.iweb import IPolicyForHTTPS
13+
14+
@implementer(IPolicyForHTTPS)
15+
class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
16+
"""
17+
Non-peer-certificate verifying HTTPS context factory
18+
19+
Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD)
20+
which allows TLS protocol negotiation
21+
22+
'A TLS/SSL connection established with [this method] may
23+
understand the SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols.'
24+
"""
25+
26+
def __init__(self, method=SSL.SSLv23_METHOD, *args, **kwargs):
27+
super(ScrapyClientContextFactory, self).__init__(*args, **kwargs)
28+
self._ssl_method = method
29+
30+
def getCertificateOptions(self):
31+
# setting verify=True will require you to provide CAs
32+
# to verify against; in other words: it's not that simple
33+
34+
# backward-compatible SSL/TLS method:
35+
#
36+
# * this will respect `method` attribute in often recommended
37+
# `ScrapyClientContextFactory` subclass
38+
# (https://github.com/scrapy/scrapy/issues/1429#issuecomment-131782133)
39+
#
40+
# * getattr() for `_ssl_method` attribute for context factories
41+
# not calling super(..., self).__init__
42+
return CertificateOptions(verify=False,
43+
method=getattr(self, 'method',
44+
getattr(self, '_ssl_method', None)))
45+
46+
# kept for old-style HTTP/1.0 downloader context twisted calls,
47+
# e.g. connectSSL()
48+
def getContext(self, hostname=None, port=None):
49+
return self.getCertificateOptions().getContext()
50+
51+
def creatorForNetloc(self, hostname, port):
52+
return ClientTLSOptions(hostname.decode("ascii"), self.getContext())
53+
54+
55+
@implementer(IPolicyForHTTPS)
56+
class BrowserLikeContextFactory(ScrapyClientContextFactory):
57+
"""
58+
Twisted-recommended context factory for web clients.
59+
60+
Quoting http://twistedmatrix.com/documents/current/api/twisted.web.client.Agent.html:
61+
"The default is to use a BrowserLikePolicyForHTTPS,
62+
so unless you have special requirements you can leave this as-is."
63+
64+
creatorForNetloc() is the same as BrowserLikePolicyForHTTPS
65+
except this context factory allows setting the TLS/SSL method to use.
66+
67+
Default OpenSSL method is TLS_METHOD (also called SSLv23_METHOD)
68+
which allows TLS protocol negotiation.
69+
"""
70+
def creatorForNetloc(self, hostname, port):
71+
72+
# trustRoot set to platformTrust() will use the platform's root CAs.
73+
#
74+
# This means that a website like https://www.cacert.org will be rejected
75+
# by default, since CAcert.org CA certificate is seldom shipped.
76+
return optionsForClientTLS(hostname.decode("ascii"),
77+
trustRoot=platformTrust(),
78+
extraCertificateOptions={
79+
'method': self._ssl_method,
80+
})
81+
682
except ImportError:
7-
ClientTLSOptions = None
8-
9-
10-
class ScrapyClientContextFactory(ClientContextFactory):
11-
"A SSL context factory which is more permissive against SSL bugs."
12-
# see https://github.com/scrapy/scrapy/issues/82
13-
# and https://github.com/scrapy/scrapy/issues/26
14-
# and https://github.com/scrapy/scrapy/issues/981
15-
16-
def __init__(self):
17-
# see this issue on why we use TLSv1_METHOD by default
18-
# https://github.com/scrapy/scrapy/issues/194
19-
self.method = SSL.TLSv1_METHOD
20-
21-
def getContext(self, hostname=None, port=None):
22-
ctx = ClientContextFactory.getContext(self)
23-
# Enable all workarounds to SSL bugs as documented by
24-
# http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html
25-
ctx.set_options(SSL.OP_ALL)
26-
if hostname and ClientTLSOptions is not None: # workaround for TLS SNI
27-
ClientTLSOptions(hostname, ctx)
28-
return ctx
83+
84+
class ScrapyClientContextFactory(ClientContextFactory):
85+
"A SSL context factory which is more permissive against SSL bugs."
86+
# see https://github.com/scrapy/scrapy/issues/82
87+
# and https://github.com/scrapy/scrapy/issues/26
88+
# and https://github.com/scrapy/scrapy/issues/981
89+
90+
def __init__(self, method=SSL.SSLv23_METHOD):
91+
self.method = method
92+
93+
def getContext(self, hostname=None, port=None):
94+
ctx = ClientContextFactory.getContext(self)
95+
# Enable all workarounds to SSL bugs as documented by
96+
# http://www.openssl.org/docs/ssl/SSL_CTX_set_options.html
97+
ctx.set_options(SSL.OP_ALL)
98+
return ctx

scrapy/core/downloader/handlers/http11.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55
from io import BytesIO
66
from time import time
7+
import warnings
78
from six.moves.urllib.parse import urldefrag
89

910
from zope.interface import implementer
@@ -18,6 +19,7 @@
1819
from scrapy.http import Headers
1920
from scrapy.responsetypes import responsetypes
2021
from scrapy.core.downloader.webclient import _parse
22+
from scrapy.core.downloader.tls import openssl_methods
2123
from scrapy.utils.misc import load_object
2224
from scrapy.utils.python import to_bytes, to_unicode
2325
from scrapy import twisted_version
@@ -31,8 +33,21 @@ def __init__(self, settings):
3133
self._pool = HTTPConnectionPool(reactor, persistent=True)
3234
self._pool.maxPersistentPerHost = settings.getint('CONCURRENT_REQUESTS_PER_DOMAIN')
3335
self._pool._factory.noisy = False
36+
37+
self._sslMethod = openssl_methods[settings.get('DOWNLOADER_CLIENT_TLS_METHOD')]
3438
self._contextFactoryClass = load_object(settings['DOWNLOADER_CLIENTCONTEXTFACTORY'])
35-
self._contextFactory = self._contextFactoryClass()
39+
# try method-aware context factory
40+
try:
41+
self._contextFactory = self._contextFactoryClass(method=self._sslMethod)
42+
except TypeError:
43+
# use context factory defaults
44+
self._contextFactory = self._contextFactoryClass()
45+
msg = """
46+
'%s' does not accept `method` argument (type OpenSSL.SSL method,\
47+
e.g. OpenSSL.SSL.SSLv23_METHOD).\
48+
Please upgrade your context factory class to handle it or ignore it.""" % (
49+
settings['DOWNLOADER_CLIENTCONTEXTFACTORY'],)
50+
warnings.warn(msg)
3651
self._default_maxsize = settings.getint('DOWNLOAD_MAXSIZE')
3752
self._default_warnsize = settings.getint('DOWNLOAD_WARNSIZE')
3853
self._disconnect_timeout = 1

scrapy/core/downloader/tls.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from OpenSSL import SSL
2+
3+
4+
METHOD_SSLv3 = 'SSLv3'
5+
METHOD_TLS = 'TLS'
6+
METHOD_TLSv10 = 'TLSv1.0'
7+
METHOD_TLSv11 = 'TLSv1.1'
8+
METHOD_TLSv12 = 'TLSv1.2'
9+
10+
openssl_methods = {
11+
METHOD_TLS: SSL.SSLv23_METHOD, # protocol negotiation (recommended)
12+
METHOD_SSLv3: SSL.SSLv3_METHOD, # SSL 3 (NOT recommended)
13+
METHOD_TLSv10: SSL.TLSv1_METHOD, # TLS 1.0 only
14+
METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only
15+
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only
16+
}

scrapy/settings/default_settings.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@
8383

8484
DOWNLOADER_HTTPCLIENTFACTORY = 'scrapy.core.downloader.webclient.ScrapyHTTPClientFactory'
8585
DOWNLOADER_CLIENTCONTEXTFACTORY = 'scrapy.core.downloader.contextfactory.ScrapyClientContextFactory'
86+
DOWNLOADER_CLIENT_TLS_METHOD = 'TLS' # Use highest TLS/SSL protocol version supported by the platform,
87+
# also allowing negotiation
8688

8789
DOWNLOADER_MIDDLEWARES = {}
8890

0 commit comments

Comments
 (0)