Skip to content

Commit 3514985

Browse files
committed
add stock crawl write into a csv file
1 parent 13e8e4c commit 3514985

File tree

8 files changed

+106
-8
lines changed

8 files changed

+106
-8
lines changed
File renamed without changes.

by-bs4/stock/601857.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
日期,开盘价,最高价,最低价,收盘价,涨跌额,涨跌幅,成交量,成交金额,振幅,换手率
File renamed without changes.

by-bs4/stock/stock.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# coding:utf-8
2+
import requests
3+
from bs4 import BeautifulSoup
4+
import os
5+
import time
6+
import csv
7+
8+
headers = {
9+
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.1708.400 QQBrowser/9.5.9635.400'
10+
}
11+
12+
url = 'http://quotes.money.163.com/trade/lsjysj_601857.html?year=2016&season=4'
13+
14+
15+
# parameter
16+
# shareCode/year/month : num ,
17+
def sharesCrawl(shareCode,year,month):
18+
shareCodeStr = str(shareCode)
19+
yearStr = str(year)
20+
monthStr = str(month)
21+
url = 'http://quotes.money.163.com/trade/lsjysj_'+shareCodeStr+'.html?year='+yearStr+'&season='+monthStr
22+
23+
data = requests.get(url, headers=headers)
24+
soup = BeautifulSoup(data.text, 'lxml')
25+
# print soup
26+
title = soup.select('h1.name > a')[0].get_text()
27+
28+
date = soup.select('div.inner_box > table > tr > td')
29+
30+
if os.path.exists('./'+shareCodeStr+title) == False:
31+
#create the share folder
32+
os.mkdir('./'+shareCodeStr+title)
33+
34+
f = open('./'+shareCodeStr+title+'/Y'+yearStr+'S'+monthStr+'.txt','wb')
35+
for index,value in enumerate(date):
36+
if index % 11 == 10:
37+
f.write(value.get_text()+'\n')
38+
else:
39+
f.write(value.get_text() +'\t')
40+
f.close()
41+
42+
# sharesCrawl(600019,'2016','2')
43+
44+
45+
46+
def sharesCrawl2(shareCode,year,month):
47+
shareCodeStr = str(shareCode)
48+
yearStr = str(year)
49+
monthStr = str(month)
50+
url = 'http://quotes.money.163.com/trade/lsjysj_' + shareCodeStr + '.html?year=' + yearStr + '&season=' + monthStr
51+
data = requests.get(url, headers=headers)
52+
soup = BeautifulSoup(data.text, 'lxml')
53+
date = soup.select('div.inner_box > table > tr > td')
54+
resultString = ''
55+
for index, value in enumerate(date):
56+
if index % 11 == 10:
57+
resultString += value.get_text() + '\n'
58+
else:
59+
resultString += value.get_text() + '\t'
60+
return resultString
61+
62+
# print sharesCrawl2(600019,2016,2)
63+
64+
65+
66+
def writeCSV(shareCode,beginYear,endYear):
67+
title = str(shareCode)
68+
69+
csvFile = open('./' + title + '.csv', 'wb')
70+
71+
writer = csv.writer(csvFile)
72+
writer.writerow(('日期','开盘价','最高价','最低价','收盘价','涨跌额','涨跌幅','成交量','成交金额','振幅','换手率'))
73+
# for i in range(beginYear,endYear+1):
74+
# print i
75+
# # time.sleep(5)
76+
try:
77+
for j in range(1, 5):
78+
writer.writerow(str(j))
79+
time.sleep(1)
80+
except:
81+
print 'chengxuchucuo'
82+
finally:
83+
csvFile.close()
84+
85+
writeCSV(601857,2008,2016)
86+
87+
88+
89+
90+
"""
91+
body > div.area > div.inner_box > table > tbody > tr:nth-child(1) > td:nth-child(1)
92+
body > div.area > div.header > div.stock_info > table > tbody > tr > td.col_1 > h1 > a
93+
"""

by-bs4/shares/zhongshiyou.py renamed to by-bs4/stock/zhongshiyou.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# coding:utf-8
12
import requests
23
from bs4 import BeautifulSoup
34
import os
@@ -63,7 +64,6 @@ def sharesCrawl2(shareCode,year,month):
6364

6465

6566
def createUrl(shareCode,beginYear,endYear):
66-
6767
title = str(shareCode)
6868

6969
if os.path.exists('./'+title) == False:
@@ -72,13 +72,17 @@ def createUrl(shareCode,beginYear,endYear):
7272

7373
f = open('./' + title + '.txt', 'wb')
7474

75-
for i in range(beginYear,endYear+1):
76-
print i
77-
# time.sleep(5)
78-
for j in range(1,5):
79-
f.write(sharesCrawl2(shareCode,i,j) + '\n ------- '+str(i)+'/'+str(j)+'----------------\n')
80-
time.sleep(5)
81-
f.close()
75+
try:
76+
for i in range(beginYear,endYear+1):
77+
print i
78+
# time.sleep(5)
79+
for j in range(1,5):
80+
f.write(sharesCrawl2(shareCode,i,j) + '\n ------- '+str(i)+'/'+str(j)+'----------------\n')
81+
time.sleep(5)
82+
except:
83+
print '没有进入循环'
84+
finally:
85+
f.close()
8286

8387
createUrl(601857,2008,2016)
8488

0 commit comments

Comments
 (0)