Skip to content

Commit 8a5e815

Browse files
authored
Merge pull request ed-donner#216 from filokostas/week1-day2-excerise-with-deepseek-r1
Add DeepSeek exercise notebook for website summarization
2 parents ce12af8 + c5bf054 commit 8a5e815

File tree

1 file changed

+213
-0
lines changed

1 file changed

+213
-0
lines changed
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "bc7d1de3-e2ac-46ff-a302-3b4ba38c4c90",
6+
"metadata": {},
7+
"source": [
8+
"## Also trying the amazing reasoning model DeepSeek\n",
9+
"\n",
10+
"Here we use the version of DeepSeek-reasoner that's been distilled to 1.5B. \n",
11+
"This is actually a 1.5B variant of Qwen that has been fine-tuned using synethic data generated by Deepseek R1.\n",
12+
"\n",
13+
"Other sizes of DeepSeek are [here](https://ollama.com/library/deepseek-r1) all the way up to the full 671B parameter version, which would use up 404GB of your drive and is far too large for most!"
14+
]
15+
},
16+
{
17+
"cell_type": "code",
18+
"execution_count": null,
19+
"id": "cf9eb44e-fe5b-47aa-b719-0bb63669ab3d",
20+
"metadata": {},
21+
"outputs": [],
22+
"source": [
23+
"!ollama pull deepseek-r1:1.5b"
24+
]
25+
},
26+
{
27+
"cell_type": "code",
28+
"execution_count": null,
29+
"id": "4bdcd35a",
30+
"metadata": {},
31+
"outputs": [],
32+
"source": [
33+
"!ollama pull deepseek-r1:8b"
34+
]
35+
},
36+
{
37+
"cell_type": "markdown",
38+
"id": "1622d9bb-5c68-4d4e-9ca4-b492c751f898",
39+
"metadata": {},
40+
"source": [
41+
"# NOW the exercise for you\n",
42+
"\n",
43+
"Take the code from day1 and incorporate it here, to build a website summarizer that uses Llama 3.2 running locally instead of OpenAI; use either of the above approaches."
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"id": "1c106420",
50+
"metadata": {},
51+
"outputs": [],
52+
"source": [
53+
"# imports\n",
54+
"\n",
55+
"import requests\n",
56+
"import ollama\n",
57+
"from bs4 import BeautifulSoup\n",
58+
"from IPython.display import Markdown, display"
59+
]
60+
},
61+
{
62+
"cell_type": "code",
63+
"execution_count": null,
64+
"id": "22d62f00",
65+
"metadata": {},
66+
"outputs": [],
67+
"source": [
68+
"# Constants\n",
69+
"\n",
70+
"OLLAMA_API = \"http://localhost:11434/api/chat\"\n",
71+
"HEADERS = {\"Content-Type\": \"application/json\"}\n",
72+
"MODEL = \"deepseek-r1:8b\""
73+
]
74+
},
75+
{
76+
"cell_type": "code",
77+
"execution_count": null,
78+
"id": "6de38216-6d1c-48c4-877b-86d403f4e0f8",
79+
"metadata": {},
80+
"outputs": [],
81+
"source": [
82+
"# A class to represent a Webpage\n",
83+
"# If you're not familiar with Classes, check out the \"Intermediate Python\" notebook\n",
84+
"\n",
85+
"# Some websites need you to use proper headers when fetching them:\n",
86+
"headers = {\n",
87+
" \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\"\n",
88+
"}\n",
89+
"\n",
90+
"class Website:\n",
91+
"\n",
92+
" def __init__(self, url):\n",
93+
" \"\"\"\n",
94+
" Create this Website object from the given url using the BeautifulSoup library\n",
95+
" \"\"\"\n",
96+
" self.url = url\n",
97+
" response = requests.get(url, headers=headers)\n",
98+
" soup = BeautifulSoup(response.content, 'html.parser')\n",
99+
" self.title = soup.title.string if soup.title else \"No title found\"\n",
100+
" for irrelevant in soup.body([\"script\", \"style\", \"img\", \"input\"]):\n",
101+
" irrelevant.decompose()\n",
102+
" self.text = soup.body.get_text(separator=\"\\n\", strip=True)"
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": null,
108+
"id": "4449b7dc",
109+
"metadata": {},
110+
"outputs": [],
111+
"source": [
112+
"# Define our system prompt - you can experiment with this later, changing the last sentence to 'Respond in markdown in Spanish.\"\n",
113+
"\n",
114+
"system_prompt = \"You are an assistant that analyzes the contents of a website \\\n",
115+
"and provides a short summary, ignoring text that might be navigation related. \\\n",
116+
"Respond in markdown.\""
117+
]
118+
},
119+
{
120+
"cell_type": "code",
121+
"execution_count": null,
122+
"id": "daca9448",
123+
"metadata": {},
124+
"outputs": [],
125+
"source": [
126+
"def user_prompt_for(website):\n",
127+
" user_prompt = f\"You are looking at a website titled {website.title}\"\n",
128+
" user_prompt += \"\\nThe contents of this website is as follows; \\\n",
129+
"please provide a short summary of this website in markdown. \\\n",
130+
"If it includes news or announcements, then summarize these too.\\n\\n\"\n",
131+
" user_prompt += website.text\n",
132+
" return user_prompt"
133+
]
134+
},
135+
{
136+
"cell_type": "code",
137+
"execution_count": null,
138+
"id": "0ec9d5d2",
139+
"metadata": {},
140+
"outputs": [],
141+
"source": [
142+
"# See how this function creates exactly the format above\n",
143+
"\n",
144+
"def messages_for(website):\n",
145+
" return [\n",
146+
" {\"role\": \"system\", \"content\": system_prompt},\n",
147+
" {\"role\": \"user\", \"content\": user_prompt_for(website)}\n",
148+
" ]"
149+
]
150+
},
151+
{
152+
"cell_type": "code",
153+
"execution_count": null,
154+
"id": "6e1ab04a",
155+
"metadata": {},
156+
"outputs": [],
157+
"source": [
158+
"# And now: call the OpenAI API. You will get very familiar with this!\n",
159+
"\n",
160+
"def summarize(url):\n",
161+
" website = Website(url)\n",
162+
" response = ollama.chat(\n",
163+
" model = MODEL,\n",
164+
" messages = messages_for(website)\n",
165+
" )\n",
166+
" return response['message']['content']"
167+
]
168+
},
169+
{
170+
"cell_type": "code",
171+
"execution_count": null,
172+
"id": "0d3b5628",
173+
"metadata": {},
174+
"outputs": [],
175+
"source": [
176+
"def display_summary(url):\n",
177+
" summary = summarize(url)\n",
178+
" display(Markdown(summary))"
179+
]
180+
},
181+
{
182+
"cell_type": "code",
183+
"execution_count": null,
184+
"id": "938e5633",
185+
"metadata": {},
186+
"outputs": [],
187+
"source": [
188+
"display_summary(\"https://edwarddonner.com\")"
189+
]
190+
}
191+
],
192+
"metadata": {
193+
"kernelspec": {
194+
"display_name": "llms",
195+
"language": "python",
196+
"name": "python3"
197+
},
198+
"language_info": {
199+
"codemirror_mode": {
200+
"name": "ipython",
201+
"version": 3
202+
},
203+
"file_extension": ".py",
204+
"mimetype": "text/x-python",
205+
"name": "python",
206+
"nbconvert_exporter": "python",
207+
"pygments_lexer": "ipython3",
208+
"version": "3.11.11"
209+
}
210+
},
211+
"nbformat": 4,
212+
"nbformat_minor": 5
213+
}

0 commit comments

Comments
 (0)