Skip to content

Commit a97889f

Browse files
authored
Merge pull request ed-donner#224 from dlamotta/main
Disabling SSL cert validation, and suppressing warnings. Fixes issue …
2 parents b422fac + 9d01032 commit a97889f

File tree

1 file changed

+81
-0
lines changed

1 file changed

+81
-0
lines changed
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "a98030af-fcd1-4d63-a36e-38ba053498fa",
6+
"metadata": {},
7+
"source": [
8+
"# A Small Tweak to Week1-Day5\n",
9+
"\n",
10+
"If you have network restrictions (such as using a custom DNS provider, or firewall rules at work), you can disable SSL cert verification.\n",
11+
"Once you do that and start executing your code, the output will be riddled with warnings. Thankfully, you can suppress those warnings,too.\n",
12+
"\n",
13+
"See the 2 lines added to the init method, below."
14+
]
15+
},
16+
{
17+
"cell_type": "code",
18+
"execution_count": 22,
19+
"id": "106dd65e-90af-4ca8-86b6-23a41840645b",
20+
"metadata": {},
21+
"outputs": [],
22+
"source": [
23+
"# A class to represent a Webpage\n",
24+
"\n",
25+
"# Some websites need you to use proper headers when fetching them:\n",
26+
"headers = {\n",
27+
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
28+
"}\n",
29+
"\n",
30+
"class Website:\n",
31+
" \"\"\"\n",
32+
" A utility class to represent a Website that we have scraped, now with links\n",
33+
" \"\"\"\n",
34+
"\n",
35+
" def __init__(self, url):\n",
36+
" self.url = url\n",
37+
"\n",
38+
" #\n",
39+
" # If you must disable SSL cert validation, and also suppress all the warning that will come with it,\n",
40+
" # add the 2 lines below. This comes in very handy if you have DNS/firewall restrictions; alas, use\n",
41+
" # with caution, especially if deploying this in a non-dev environment.\n",
42+
" requests.packages.urllib3.disable_warnings() \n",
43+
" response = requests.get(url, headers=headers, verify=False) \n",
44+
" # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
45+
" \n",
46+
" self.body = response.content\n",
47+
" soup = BeautifulSoup(self.body, 'html.parser')\n",
48+
" self.title = soup.title.string if soup.title else \"No title found\"\n",
49+
" if soup.body:\n",
50+
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
51+
" irrelevant.decompose()\n",
52+
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)\n",
53+
" else:\n",
54+
" self.text = \"\"\n",
55+
" links = [link.get('href') for link in soup.find_all('a')]\n",
56+
" self.links = [link for link in links if link]"
57+
]
58+
}
59+
],
60+
"metadata": {
61+
"kernelspec": {
62+
"display_name": "Python 3 (ipykernel)",
63+
"language": "python",
64+
"name": "python3"
65+
},
66+
"language_info": {
67+
"codemirror_mode": {
68+
"name": "ipython",
69+
"version": 3
70+
},
71+
"file_extension": ".py",
72+
"mimetype": "text/x-python",
73+
"name": "python",
74+
"nbconvert_exporter": "python",
75+
"pygments_lexer": "ipython3",
76+
"version": "3.11.11"
77+
}
78+
},
79+
"nbformat": 4,
80+
"nbformat_minor": 5
81+
}

0 commit comments

Comments
 (0)