Skip to content

Commit be1875f

Browse files
committed
Updated web scraping demo - wifi_finance.py
1 parent e9d6b6c commit be1875f

File tree

4 files changed

+139
-81
lines changed

4 files changed

+139
-81
lines changed

PyDOS.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def PyDOS():
7373
global envVars
7474
if "envVars" not in globals().keys():
7575
envVars = {}
76-
_VER = "1.46"
76+
_VER = "1.47"
7777
prmpVals = ['>','(',')','&','|','\x1b','\b','<','=',' ',_VER,'\n','$','']
7878

7979
print("Starting Py-DOS...")

getdate.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def getdate(passedIn=""):
2929

3030
try:
3131
print(" Using http worldtimeapi.org...",end="")
32+
Pydos_wifi.timeout = 1000
3233
response = Pydos_wifi.get("http://worldtimeapi.org/api/ip",None,True)
3334
time_data = Pydos_wifi.json()
3435

lib/pydos_wifi.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
PyDOS_wifi_VER = "1.40"
1+
PyDOS_wifi_VER = "1.47"
22

33
import os
44
import time
@@ -214,6 +214,10 @@ def get(self,text_url,headers=None,getJSON=False):
214214

215215
return self.response
216216

217+
def post(self,text_url,data):
218+
self.response = self._requests.post(text_url,data=data)
219+
return self.response
220+
217221
def json(self):
218222
retVal = None
219223
if implementation.name.upper() == 'CIRCUITPYTHON':

wifi_finance.py

Lines changed: 132 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from sys import implementation
55
from os import uname
66
from pydos_wifi import Pydos_wifi
7+
import time
78

89
def wifi_finance(symbol):
910
try:
@@ -12,20 +13,29 @@ def wifi_finance(symbol):
1213
_scrWidth = 80
1314

1415
if not symbol:
15-
symbol = "IXIC"
16+
symbol = ".IXIC:INDEXNASDAQ"
1617
else:
1718
symbol = symbol.upper()
18-
prt_sym = symbol
19-
srch_sym = symbol
19+
prt_sym = symbol[:symbol.find(':')]
20+
srch_sym = symbol[:symbol.find(':')]
21+
search_attempts = 5000
2022

2123
# Get wifi details and more from a .env file
2224
if Pydos_wifi.getenv('CIRCUITPY_WIFI_SSID') is None:
2325
raise Exception("WiFi secrets are kept in settings.toml, please add them there by using setenv.py!")
2426

2527
print("Connecting to %s" % Pydos_wifi.getenv('CIRCUITPY_WIFI_SSID'))
2628

27-
if not Pydos_wifi.connect(Pydos_wifi.getenv('CIRCUITPY_WIFI_SSID'), Pydos_wifi.getenv('CIRCUITPY_WIFI_PASSWORD')):
28-
raise Exception("Unable to connect to WiFi!")
29+
res = False
30+
for i in range(2):
31+
try:
32+
res = Pydos_wifi.connect(Pydos_wifi.getenv('CIRCUITPY_WIFI_SSID'), Pydos_wifi.getenv('CIRCUITPY_WIFI_PASSWORD'))
33+
break
34+
except:
35+
print('Retrying....')
36+
if not res:
37+
if not Pydos_wifi.connect(Pydos_wifi.getenv('CIRCUITPY_WIFI_SSID'), Pydos_wifi.getenv('CIRCUITPY_WIFI_PASSWORD')):
38+
raise Exception("Unable to connect to WiFi!")
2939

3040
print("My IP address is", Pydos_wifi.ipaddress)
3141

@@ -36,16 +46,24 @@ def wifi_finance(symbol):
3646
#search_string = 'data-symbol="^IXIC" data-field="regularMarketChangePercent"'
3747
#TEXT_URL = "https://www.moneycontrol.com/us-markets"
3848
#search_string = '<!-- -->Nasdaq<!-- -->'
49+
#TEXT_URL = f"https://www.google.com/search?q={symbol.replace('&','%26')}+stock+price"
50+
#search_string = symbol
51+
#Id_Symbol = True
52+
#price_ident = '%)'
53+
#window_depth=4
3954

40-
TEXT_URL = f"https://www.google.com/%3Cspan%20class="x x-first x-last">search?q={symbol.replace('&','%26')}+stock+price"
55+
TEXT_URL = f"https://www.google.com/%3Cspan%20class="x x-first x-last">finance/quote/{symbol.replace('&','%26')}"
4156
search_string = symbol
57+
Id_Symbol = False
58+
price_ident = 'data-last-price'
59+
window_depth = 5
4260

4361
#headers = {"user-agent": "RetiredWizard@"+implementation.name.lower()+uname()[2]}
4462

4563
print("Fetching text from %s" % TEXT_URL)
4664
response = Pydos_wifi.get(TEXT_URL)
4765
response_window = []
48-
for _ in range(4):
66+
for _ in range(window_depth):
4967
response_window.append(Pydos_wifi.next(256))
5068
if len(response_window[-1]) != 256:
5169
break
@@ -61,58 +79,65 @@ def wifi_finance(symbol):
6179
print(pline)
6280
print("-" * _scrWidth)
6381

64-
print("Identifying symbol",end="")
65-
name_loc = -1
66-
iKount = 0
67-
while name_loc == -1 and iKount<800:
68-
iKount +=1
69-
if iKount % 10 == 0:
70-
print(".",end="")
71-
72-
found_window = str(b''.join(response_window))
73-
74-
name_loc = found_window.find(' Inc. is')
75-
if name_loc == -1:
76-
name_loc = found_window.find(' Inc., commonly')
77-
if name_loc == -1:
78-
name_loc = found_window.find(' is a stock market ')
79-
if name_loc != -1:
80-
if found_window[:name_loc].rfind('or simply the ') != -1:
81-
srch_sym = found_window[found_window[:name_loc].rfind('or simply the ')+14:name_loc]
82-
elif found_window[:name_loc].rfind('>') != -1:
83-
srch_sym = found_window[found_window[:name_loc].rfind('>')+1:name_loc]
84-
srch_sym = srch_sym.replace(',','')
85-
prt_sym = srch_sym.replace('&amp;','&')
86-
prt_sym = prt_sym.replace('amp;','')
87-
if srch_sym[0:4].upper() == 'THE ':
88-
srch_sym = srch_sym[4:]
89-
print(f'* {search_string} * {srch_sym} * {prt_sym}',end="")
90-
91-
if iKount<800:
92-
for i in range(3):
93-
response_window[i] = response_window[i+1]
94-
try:
95-
response_window[3] = Pydos_wifi.next(256)
96-
if len(response_window[3]) != 256:
97-
print('X',end="")
98-
iKount=800
99-
except:
100-
iKount=800
101-
print()
102-
103-
print("Locating price data",end="")
104-
response.close()
105-
response = Pydos_wifi.get(TEXT_URL)
106-
response_window = []
10782
iKount = 0
108-
for _ in range(4):
109-
response_window.append(Pydos_wifi.next(256))
110-
if len(response_window[-1]) != 256:
111-
iKount = 799
112-
break
83+
if Id_Symbol:
84+
print("Identifying symbol",end="")
85+
name_loc = -1
86+
while name_loc == -1 and iKount<search_attempts:
87+
iKount +=1
88+
if iKount % 10 == 0:
89+
print(".",end="")
90+
91+
found_window = str(b''.join(response_window))
92+
93+
name_loc = found_window.find(' Inc. is')
94+
if name_loc == -1:
95+
name_loc = found_window.find(' Inc., commonly')
96+
if name_loc == -1:
97+
name_loc = found_window.find(' is a stock market ')
98+
if name_loc == -1:
99+
name_strt = found_window.find('Company Name')
100+
if name_strt != -1:
101+
print(f'{name_strt} {found_window[:name_strt]}')
102+
name_loc = found_window[:name_start].find('<')+name_start+1
103+
if name_loc != -1:
104+
if found_window[:name_loc].rfind('or simply the ') != -1:
105+
srch_sym = found_window[found_window[:name_loc].rfind('or simply the ')+14:name_loc]
106+
elif found_window[:name_loc].rfind('>') != -1:
107+
srch_sym = found_window[found_window[:name_loc].rfind('>')+1:name_loc]
108+
srch_sym = srch_sym.replace(',','')
109+
prt_sym = srch_sym.replace('&amp;','&')
110+
prt_sym = prt_sym.replace('amp;','')
111+
if srch_sym[0:4].upper() == 'THE ':
112+
srch_sym = srch_sym[4:]
113+
print(f'* {search_string} * {srch_sym} * {prt_sym}',end="")
114+
115+
if iKount<search_attempts:
116+
for i in range(window_depth-1):
117+
response_window[i] = response_window[i+1]
118+
try:
119+
response_window[window_depth-1] = Pydos_wifi.next(256)
120+
if len(response_window[window_depth-1]) != 256:
121+
print('X',end="")
122+
iKount=search_attempts
123+
except:
124+
iKount=search_attempts
125+
print()
126+
127+
response.close()
128+
response = Pydos_wifi.get(TEXT_URL)
129+
response_window = []
130+
iKount = 0
131+
for _ in range(window_depth):
132+
response_window.append(Pydos_wifi.next(256))
133+
if len(response_window[-1]) != 256:
134+
iKount = search_attempts-1
135+
break
136+
137+
print(f"Locating price data for {search_string} * {srch_sym} * {prt_sym}",end="")
113138

114139
nasdaq = -1
115-
while nasdaq == -1 and iKount<800:
140+
while nasdaq == -1 and iKount<search_attempts:
116141
iKount +=1
117142
if iKount % 10 == 0:
118143
print(".",end="")
@@ -122,32 +147,32 @@ def wifi_finance(symbol):
122147
if nasdaq == -1:
123148
nasdaq = found_window.upper().find(srch_sym.upper())
124149
if nasdaq == -1:
125-
for i in range(3):
150+
for i in range(window_depth-1):
126151
response_window[i] = response_window[i+1]
127152
try:
128-
response_window[3] = Pydos_wifi.next(256)
129-
if len(response_window[3]) != 256:
153+
response_window[window_depth-1] = Pydos_wifi.next(256)
154+
if len(response_window[window_depth-1]) != 256:
130155
print('X',end="")
131-
iKount=800
156+
iKount=search_attempts
132157
except:
133158
print('X',end="")
134-
iKount=800
159+
iKount=search_attempts
135160
else:
136-
if iKount < 800:
137-
for _ in range(2):
161+
if iKount < search_attempts:
162+
for _ in range(window_depth-2):
138163
response_window.append(Pydos_wifi.next(256))
139164
if len(response_window[-1]) != 256:
140165
print('X',end="")
141-
iKount = 800
166+
iKount = search_attempts
142167
break
143168

144169
found_window = str(b''.join(response_window))
145-
pct = found_window[nasdaq:].find('%)')
146-
if pct == -1 and iKount<800:
147-
response_window[0] = response_window[2]
148-
response_window[1] = response_window[3]
149-
response_window[3] = response_window.pop()
150-
response_window[2] = response_window.pop()
170+
pct = found_window[nasdaq:].find(price_ident)
171+
if pct == -1 and iKount<search_attempts:
172+
for i in range(2):
173+
response_window[i] = response_window[i+(window_depth-2)]
174+
for i in range(window_depth-2):
175+
response_window[(window_depth-1)-i] = response_window.pop()
151176
nasdaq = -1
152177

153178
print("*\n")
@@ -156,15 +181,28 @@ def wifi_finance(symbol):
156181
if nasdaq == -1:
157182
nasdaq = str(b''.join(response_window)).upper().find(srch_sym.upper())
158183

159-
pct = found_window.find('%)')
160-
pctst = found_window[:pct].rfind('>')+1
161-
pctend = pct + found_window[pct:].find('<')
162-
#print("Debug: %s\n" % found_window[nasdaq:pctend])
163-
pricest = found_window[:pctst-2].rfind('>')+1
164-
priceend = pricest + found_window[pricest:].find('<')
165-
166184
if nasdaq != -1:
167-
print(f'{prt_sym}: {found_window[pricest:priceend]} {found_window[pctst:pctend].replace("<","")}\n')
185+
# Final scrape logic
186+
# Google Search
187+
# pct = found_window.find(price_ident)
188+
# pctst = found_window[:pct].rfind('>')+1
189+
# pctend = pct + found_window[pct:].find('<')
190+
# print("Debug: %s\n" % found_window[nasdaq:pctend])
191+
# pricest = found_window[:pctst-2].rfind('>')+1
192+
# priceend = pricest + found_window[pricest:].find('<')
193+
194+
# Google finance
195+
pricest = found_window.find(price_ident)+len(price_ident)+2
196+
priceend = pricest + found_window[pricest:].find('"')
197+
pctst = -1
198+
199+
#print(f'Debug: start loc: {pricest} end loc: {priceend}\n{found_window[nasdaq:]}')
200+
201+
print(f'{prt_sym}: {found_window[pricest:priceend]}',end="")
202+
if pctst != -1:
203+
print(f' {found_window[pctst:pctend].replace("<","")}\n')
204+
else:
205+
print('\n')
168206
else:
169207
print(f"{prt_sym} symbol not found\n")
170208

@@ -176,4 +214,19 @@ def wifi_finance(symbol):
176214
wifi_finance(passedIn)
177215
else:
178216
print('Enter "wifi_finance.wifi_finance("symbol")" in the REPL or PEXEC command to run.')
179-
print(' A null symbol ("") will default to the Nasdaq Index')
217+
print(' A null symbol ("") will default to the Nasdaq Index')
218+
219+
print('\nDemonstration Web "scraping" program. The web sites being used in the')
220+
print('demonstration will often change and break the algorithm used to locate a')
221+
print('stock price. When that happens this program needs to be updated to work')
222+
print('with the new web site or find a new one.\n')
223+
print('The current web site being used is: https://www.google.com/finance\n')
224+
print('With this site the symbol passed to wifi_finance must be formatted as')
225+
print('follows: symbol:exchange. So for Apple Inc, you would enter AAPL:NASDAQ')
226+
print('or for AT&T enter T:NYSE. To retrieve the price of an index format the')
227+
print('symbol as follows: .indexsymbol:INDEXsymbol. For example Nasdaq:')
228+
print('.IXIC:INDEXNASDAQ, Dow Jones: .DJI:INDEXDJX, S&P 500: .INX:INDEXSP. The')
229+
print('index symbols can be retrieved by going to the www.google.com/finance page')
230+
print("and selecting the index you're inerested in. The formatted symbol will be")
231+
print('updated at the end of the URL (not the symbol displayed in the search box.')
232+

0 commit comments

Comments
 (0)