Skip to content

Commit 679a680

Browse files
committed
Merge pull request scrapy#1933 from scrapy/cert-verif-ignore
Ignore HTTPS certificate verification failures
2 parents 3735eb8 + cd979ac commit 679a680

8 files changed

+227
-7
lines changed

scrapy/core/downloader/contextfactory.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,16 @@
66
from zope.interface.declarations import implementer
77

88
# the following should be available from Twisted 14.0.0
9-
from twisted.internet.ssl import optionsForClientTLS, CertificateOptions, platformTrust
10-
from twisted.internet._sslverify import ClientTLSOptions
9+
from twisted.internet.ssl import (optionsForClientTLS,
10+
CertificateOptions,
11+
platformTrust)
12+
1113
from twisted.web.client import BrowserLikePolicyForHTTPS
1214
from twisted.web.iweb import IPolicyForHTTPS
1315

16+
from scrapy.core.downloader.tls import ScrapyClientTLSOptions
17+
18+
1419
@implementer(IPolicyForHTTPS)
1520
class ScrapyClientContextFactory(BrowserLikePolicyForHTTPS):
1621
"""
@@ -49,7 +54,7 @@ def getContext(self, hostname=None, port=None):
4954
return self.getCertificateOptions().getContext()
5055

5156
def creatorForNetloc(self, hostname, port):
52-
return ClientTLSOptions(hostname.decode("ascii"), self.getContext())
57+
return ScrapyClientTLSOptions(hostname.decode("ascii"), self.getContext())
5358

5459

5560
@implementer(IPolicyForHTTPS)

scrapy/core/downloader/tls.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
import logging
12
from OpenSSL import SSL
23

34

5+
logger = logging.getLogger(__name__)
6+
47
METHOD_SSLv3 = 'SSLv3'
58
METHOD_TLS = 'TLS'
69
METHOD_TLSv10 = 'TLSv1.0'
@@ -14,3 +17,36 @@
1417
METHOD_TLSv11: getattr(SSL, 'TLSv1_1_METHOD', 5), # TLS 1.1 only
1518
METHOD_TLSv12: getattr(SSL, 'TLSv1_2_METHOD', 6), # TLS 1.2 only
1619
}
20+
21+
# ClientTLSOptions requires a recent-enough version of Twisted
22+
try:
23+
24+
# taken from twisted/twisted/internet/_sslverify.py
25+
try:
26+
from OpenSSL.SSL import SSL_CB_HANDSHAKE_DONE, SSL_CB_HANDSHAKE_START
27+
except ImportError:
28+
SSL_CB_HANDSHAKE_START = 0x10
29+
SSL_CB_HANDSHAKE_DONE = 0x20
30+
31+
from twisted.internet._sslverify import (ClientTLSOptions,
32+
_maybeSetHostNameIndication,
33+
verifyHostname,
34+
VerificationError)
35+
36+
class ScrapyClientTLSOptions(ClientTLSOptions):
37+
# same as Twisted's ClientTLSOptions,
38+
# except that VerificationError is caught
39+
# and doesn't close the connection
40+
def _identityVerifyingInfoCallback(self, connection, where, ret):
41+
if where & SSL_CB_HANDSHAKE_START:
42+
_maybeSetHostNameIndication(connection, self._hostnameBytes)
43+
elif where & SSL_CB_HANDSHAKE_DONE:
44+
try:
45+
verifyHostname(connection, self._hostnameASCII)
46+
except VerificationError as e:
47+
logger.warning(e)
48+
49+
except ImportError:
50+
# ImportError should not matter for older Twisted versions
51+
# as the above is not used in the fallback ScrapyClientContextFactory
52+
pass

tests/keys/example-com.cert.pem

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
-----BEGIN CERTIFICATE-----
2+
MIIEVTCCAz2gAwIBAgIJANuZ/6fbAJNcMA0GCSqGSIb3DQEBCwUAMH0xCzAJBgNV
3+
BAYTAlhXMQswCQYDVQQIDAJYVzEVMBMGA1UEBwwMVGhlIEludGVybmV0MQ8wDQYD
4+
VQQKDAZTY3JhcHkxGDAWBgNVBAMMD3d3dy5leGFtcGxlLmNvbTEfMB0GCSqGSIb3
5+
DQEJARYQdGVzdEBleGFtcGxlLmNvbTAgFw0xNjA0MjAxMjExNTZaGA8yMTE2MDMy
6+
NzEyMTE1NlowfTELMAkGA1UEBhMCWFcxCzAJBgNVBAgMAlhXMRUwEwYDVQQHDAxU
7+
aGUgSW50ZXJuZXQxDzANBgNVBAoMBlNjcmFweTEYMBYGA1UEAwwPd3d3LmV4YW1w
8+
bGUuY29tMR8wHQYJKoZIhvcNAQkBFhB0ZXN0QGV4YW1wbGUuY29tMIIBIjANBgkq
9+
hkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA5r2BzxXivtQXvIwrTGug8l6vjuDhf0WD
10+
HBU5yIHWvX1rT2MQFuZpu120iFGOK7MBYBBdmAIGsR5cHZ03cqQkAXOGQ5ug5O/u
11+
d/GZUbcgyv8WCwW71MWLb5CNiDWj/vZq6CUqRg2QkUpkjr/DcQrKsm67yadKLgyd
12+
G85OyZO6NBuAukQcKrNhspk/Ms55X7RbgYPUbZ0bBee4b3GRnE7PLltIsHo/tloV
13+
ynC0Sd3T1taYyyG7IJd2LWJELzK0Ww+QUV2qoOdZjl8db1x5c99OR6xY0+Mjf14r
14+
6kkXOBpPkrJ990qU40+z406u2HPf2abR4D/DUoe9qw+fElCeiuXFFQIDAQABo4HV
15+
MIHSMB0GA1UdDgQWBBTY3DPInWZxrmQfPHA5w2R+AsbnOjAfBgNVHSMEGDAWgBTY
16+
3DPInWZxrmQfPHA5w2R+AsbnOjAJBgNVHRMEAjAAMAsGA1UdDwQEAwIFoDBKBgNV
17+
HREEQzBBggtleGFtcGxlLmNvbYIPd3d3LmV4YW1wbGUuY29tghBtYWlsLmV4YW1w
18+
bGUuY29tgg9mdHAuZXhhbXBsZS5jb20wLAYJYIZIAYb4QgENBB8WHU9wZW5TU0wg
19+
R2VuZXJhdGVkIENlcnRpZmljYXRlMA0GCSqGSIb3DQEBCwUAA4IBAQDmnzdIu9XV
20+
/Tnn5/mt9E98YEsF/eaXBSKG+f2oZc3n2errKwY5qYqULlu8mhajGWTd5EfFCjbD
21+
lH7nmBdRUAhjzKWntc1G84eaWwHyv+N/5WJrtUfa2A1Ps3Mu9Vz4k2M9HGi/s6KX
22+
IshezlHe3/TyhIT/WC+MZhpFTL73dpuIgHmp2NjjlJqtG25eC6zmonsc2RApJPa8
23+
6J0WY/ISH9OwhDSbI9+TIE8QwdC7draiCKK/oid3Jg9fzaEQW+Pr3/4AmYWH1j8s
24+
iaOVYIXYtt3urQ2Q+qfh34kfRfX5IqAdd1r/fnUjaOLhPNJxGP2KuFaYbdSC9p+n
25+
bfExzQHUvH2n
26+
-----END CERTIFICATE-----

tests/keys/example-com.conf

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# this is copied from http://stackoverflow.com/a/27931596
2+
[ req ]
3+
default_bits = 2048
4+
default_keyfile = server-key.pem
5+
distinguished_name = subject
6+
req_extensions = req_ext
7+
x509_extensions = x509_ext
8+
string_mask = utf8only
9+
10+
# The Subject DN can be formed using X501 or RFC 4514 (see RFC 4519 for a description).
11+
# Its sort of a mashup. For example, RFC 4514 does not provide emailAddress.
12+
[ subject ]
13+
countryName = Country Name (2 letter code)
14+
countryName_default = US
15+
16+
stateOrProvinceName = State or Province Name (full name)
17+
stateOrProvinceName_default = NY
18+
19+
localityName = Locality Name (eg, city)
20+
localityName_default = New York
21+
22+
organizationName = Organization Name (eg, company)
23+
organizationName_default = Example, LLC
24+
25+
# Use a friendly name here because its presented to the user. The server's DNS
26+
# names are placed in Subject Alternate Names. Plus, DNS names here is deprecated
27+
# by both IETF and CA/Browser Forums. If you place a DNS name here, then you
28+
# must include the DNS name in the SAN too (otherwise, Chrome and others that
29+
# strictly follow the CA/Browser Baseline Requirements will fail).
30+
commonName = Common Name (e.g. server FQDN or YOUR name)
31+
commonName_default = Example Company
32+
33+
emailAddress = Email Address
34+
emailAddress_default = [email protected]
35+
36+
# Section x509_ext is used when generating a self-signed certificate. I.e., openssl req -x509 ...
37+
[ x509_ext ]
38+
39+
subjectKeyIdentifier = hash
40+
authorityKeyIdentifier = keyid,issuer
41+
42+
# You only need digitalSignature below. *If* you don't allow
43+
# RSA Key transport (i.e., you use ephemeral cipher suites), then
44+
# omit keyEncipherment because that's key transport.
45+
basicConstraints = CA:FALSE
46+
keyUsage = digitalSignature, keyEncipherment
47+
subjectAltName = @alternate_names
48+
nsComment = "OpenSSL Generated Certificate"
49+
50+
# RFC 5280, Section 4.2.1.12 makes EKU optional
51+
# CA/Browser Baseline Requirements, Appendix (B)(3)(G) makes me confused
52+
# In either case, you probably only need serverAuth.
53+
# extendedKeyUsage = serverAuth, clientAuth
54+
55+
# Section req_ext is used when generating a certificate signing request. I.e., openssl req ...
56+
[ req_ext ]
57+
58+
subjectKeyIdentifier = hash
59+
60+
basicConstraints = CA:FALSE
61+
keyUsage = digitalSignature, keyEncipherment
62+
subjectAltName = @alternate_names
63+
nsComment = "OpenSSL Generated Certificate"
64+
65+
# RFC 5280, Section 4.2.1.12 makes EKU optional
66+
# CA/Browser Baseline Requirements, Appendix (B)(3)(G) makes me confused
67+
# In either case, you probably only need serverAuth.
68+
# extendedKeyUsage = serverAuth, clientAuth
69+
70+
[ alternate_names ]
71+
72+
DNS.1 = example.com
73+
DNS.2 = www.example.com
74+
DNS.3 = mail.example.com
75+
DNS.4 = ftp.example.com
76+
77+
# Add these if you need them. But usually you don't want them or
78+
# need them in production. You may need them for development.
79+
# DNS.5 = localhost
80+
# DNS.6 = localhost.localdomain
81+
# DNS.7 = 127.0.0.1
82+
83+
# IPv6 localhost
84+
# DNS.8 = ::1

tests/keys/example-com.gen.README

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
$ openssl req -config example-com.conf \
2+
-new -x509 -sha256 -newkey rsa:2048 -nodes \
3+
-keyout example-com.key.pem \
4+
-days 36500 \
5+
-out example-com.cert.pem
6+
Generating a 2048 bit RSA private key
7+
....+++
8+
.....................+++
9+
writing new private key to 'example-com.key.pem'
10+
-----
11+
You are about to be asked to enter information that will be incorporated
12+
into your certificate request.
13+
What you are about to enter is what is called a Distinguished Name or a DN.
14+
There are quite a few fields but you can leave some blank
15+
For some fields there will be a default value,
16+
If you enter '.', the field will be left blank.
17+
-----
18+
Country Name (2 letter code) [US]:XW
19+
State or Province Name (full name) [NY]:XW
20+
Locality Name (eg, city) [New York]:The Internet
21+
Organization Name (eg, company) [Example, LLC]:Scrapy
22+
Common Name (e.g. server FQDN or YOUR name) [Example Company]:www.example.com
23+
Email Address [[email protected]]:
24+

tests/keys/example-com.key.pem

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
-----BEGIN PRIVATE KEY-----
2+
MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDmvYHPFeK+1Be8
3+
jCtMa6DyXq+O4OF/RYMcFTnIgda9fWtPYxAW5mm7XbSIUY4rswFgEF2YAgaxHlwd
4+
nTdypCQBc4ZDm6Dk7+538ZlRtyDK/xYLBbvUxYtvkI2INaP+9mroJSpGDZCRSmSO
5+
v8NxCsqybrvJp0ouDJ0bzk7Jk7o0G4C6RBwqs2GymT8yznlftFuBg9RtnRsF57hv
6+
cZGcTs8uW0iwej+2WhXKcLRJ3dPW1pjLIbsgl3YtYkQvMrRbD5BRXaqg51mOXx1v
7+
XHlz305HrFjT4yN/XivqSRc4Gk+Ssn33SpTjT7PjTq7Yc9/ZptHgP8NSh72rD58S
8+
UJ6K5cUVAgMBAAECggEAEVxi3vTzmY4Vtx8Ixvg2JOZQ6TwsW0ocoklKjraONLWy
9+
FEgM1txBSlKzmaohO1J9oP+6Owyz+jGhlqzUljCZcO6DRKT6Bx+yXp3z/jz8H6nv
10+
u6aTyl+OrTdAHuaCT1W1F4BsXNb6cKQbSs5M4z1/oMtKH9MRdsOGMqhkLzCJSxA+
11+
E+rRomrP6E9XooLxqpSJooxmX772XPHE7+ZILzRF0viXJ6z0Jd1cOuAQqyIYvmHM
12+
4313kYJdAKYgJNxe8M8mYLeZcGwImAz/pNQ5R+uVyZlW3kXVzvS7B2m+KcW7Olu8
13+
r4Ocpdyh20GStpw1f+tk7PLl+SkwFslK+uI7Wl/ygQKBgQD+QsREu1sv507CnvYC
14+
FZnFryhHUzxMWIX5bvH3YoPbVptqwG5Nj05zIQPCZAEanW4HENCujO9oWbZqLANO
15+
Th5sNOkBJiC4X6+1NIzQIszaZs4nKIyWNLIOcP4p20k3cR0sS8wLSLwcuW2is27N
16+
ACKa8u93X1Gb27V0qUhmEqP3NQKBgQDoUY3HRtOVQnpoi5zbTiSKwlBKEkMAawaW
17+
Q8VSZmrNQZXpcwa2JYN0IeiHnVjctdLul1u9qj5goghTV4XMQ8LSZs0emhvgJxMa
18+
QpsDLTRr0mBtmduOwZW9a8EcbI2NCth/Irsdl892+y8UVoAO2G6Fgr0DhgXWOJEo
19+
RcUUkGHyYQKBgHCkT6NLhYhhZykdl0sxGqDTinqey3XfOetZVWUNhfDkG5JdkgBW
20+
XqDunWW/PCX4XMhQkMLjuSR3qjK7MPO99AhoIFnb9F76NTOIBmInKK4RIX+DnVTm
21+
H4P6Vv9gIL2pJQ18vva40G3BUGrmJ042ox4WRjSSS+tKmMcIQATIeU+JAoGBAIpr
22+
TRZW1Oox2c1Aogzo2kzyyfPYPaOaISqW5pLaAviZA0E9D9qnL1OagHmM/s1CaJNQ
23+
C5FORiw9XsiJdWbnWMUUC1MYb1N29KLI3KNf48P6bQngijjcjuN1uHG/G/fVZnkZ
24+
sHNJaItzzfFIOLSfr/pMk2HuELw6qAJez7YY8MMBAoGAf5sC2S7duDH2KliMSYhd
25+
yByHEVTbf7JdJVd/7kq5MPZauJtYztGTSwyyM+gBv+lxdY1jGu/iNu9xAD2DJlE3
26+
RTPgTIID/PaSmUVotHLq86hbazKPBorx6UWkbGsthEaSF/cTY2eFFFsK/Awoj0yU
27+
ZhraPWu8S27Pcr9HvZvh5tg=
28+
-----END PRIVATE KEY-----

tests/mockserver.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -204,10 +204,10 @@ def __exit__(self, exc_type, exc_value, traceback):
204204
time.sleep(0.2)
205205

206206

207-
def ssl_context_factory():
207+
def ssl_context_factory(keyfile='keys/cert.pem', certfile='keys/cert.pem'):
208208
return ssl.DefaultOpenSSLContextFactory(
209-
os.path.join(os.path.dirname(__file__), 'keys/cert.pem'),
210-
os.path.join(os.path.dirname(__file__), 'keys/cert.pem'),
209+
os.path.join(os.path.dirname(__file__), keyfile),
210+
os.path.join(os.path.dirname(__file__), certfile),
211211
)
212212

213213

tests/test_downloader_handlers.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ class HttpTestCase(unittest.TestCase):
119119
scheme = 'http'
120120
download_handler_cls = HTTPDownloadHandler
121121

122+
# only used for HTTPS tests
123+
keyfile = 'keys/cert.pem'
124+
certfile = 'keys/cert.pem'
125+
122126
def setUp(self):
123127
name = self.mktemp()
124128
os.mkdir(name)
@@ -137,7 +141,8 @@ def setUp(self):
137141
self.host = 'localhost'
138142
if self.scheme == 'https':
139143
self.port = reactor.listenSSL(
140-
0, self.wrapper, ssl_context_factory(), interface=self.host)
144+
0, self.wrapper, ssl_context_factory(self.keyfile, self.certfile),
145+
interface=self.host)
141146
else:
142147
self.port = reactor.listenTCP(0, self.wrapper, interface=self.host)
143148
self.portno = self.port.getHost().port
@@ -318,6 +323,18 @@ class Https11TestCase(Http11TestCase):
318323
scheme = 'https'
319324

320325

326+
class Https11WrongHostnameTestCase(Http11TestCase):
327+
scheme = 'https'
328+
329+
# above tests use a server certificate for "localhost",
330+
# client connection to "localhost" too.
331+
# here we test that even if the server certificate is for another domain,
332+
# "www.example.com" in this case,
333+
# the tests still pass
334+
keyfile = 'keys/example-com.key.pem'
335+
certfile = 'keys/example-com.cert.pem'
336+
337+
321338
class Http11MockServerTestCase(unittest.TestCase):
322339
"""HTTP 1.1 test case with MockServer"""
323340
if twisted_version < (11, 1, 0):

0 commit comments

Comments
 (0)