Skip to content

Commit cf78964

Browse files
committed
tc
1 parent f0eacf4 commit cf78964

File tree

2 files changed

+124
-0
lines changed

2 files changed

+124
-0
lines changed

58tongcheng/tc_itemInfo.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# coding:utf-8
2+
from bs4 import BeautifulSoup
3+
import requests
4+
import time
5+
import pymongo
6+
7+
client = pymongo.MongoClient('localhost', 27017)
8+
ceshi = client['ceshi']
9+
url_list = ceshi['url_list4']
10+
item_info = ceshi['item_info4']
11+
12+
# 获取每一项下具体的url
13+
def get_links_from(channel, pages, who_sells=0):
14+
list_view = '{}{}/pn{}/'.format(channel, str(who_sells), str(pages))
15+
wb_data = requests.get(list_view)
16+
time.sleep(1)
17+
soup = BeautifulSoup(wb_data.text, 'lxml')
18+
if soup.find('td', 't'):
19+
for link in soup.select('td.t a.t'):
20+
item_link = link.get('href').split('?')[0]
21+
item_title = link.get_text()
22+
# print link.get('href') # http://zhuanzhuan.58.com/detail/774777200171630596z.shtml?fullCate=5%2C36&fullLocal=1&from=pc
23+
# print item_link # http://zhuanzhuan.58.com/detail/774777200171630596z.shtml
24+
url_list.insert_one({'url': item_link, 'title': item_title})
25+
print item_link,item_title
26+
if item_link != 'http://jump.zhineng.58.com/jump':
27+
get_item_info(item_link)
28+
else:
29+
pass
30+
31+
# 获取每一页的具体信息
32+
def get_item_info(url):
33+
wb_data = requests.get(url)
34+
soup = BeautifulSoup(wb_data.text, 'lxml')
35+
no_longer_exist = '404' in soup.find('link',type="text/css").get('href').split('/')
36+
if no_longer_exist:
37+
pass
38+
else:
39+
title_soup = soup.title.text
40+
title = title_soup.split('_')[0]
41+
user = title_soup.split('_')[1].split('-')[0].split('的闲置物品')[0]
42+
price = soup.select('span.price_now i')[0].text
43+
area = soup.select('.palce_li span i')[0].text # if soup.find_all('span', 'c_25d') else None
44+
item_info.insert_one({'title': title, 'price': price, 'area': area, 'url': url, 'user': user})
45+
print {'title': title, 'price': price, 'area': area, 'user': user}

58tongcheng/tc_urlLists.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# coding:utf-8
2+
from bs4 import BeautifulSoup
3+
import requests
4+
5+
start_url = 'http://bj.58.com/sale.shtml'
6+
url_host = 'http://bj.58.com'
7+
8+
def get_channel_urls(url):
9+
wb_data = requests.get(url)
10+
soup = BeautifulSoup(wb_data.text, 'lxml')
11+
links = soup.select('ul.ym-submnu > li > b > a')
12+
for link in links:
13+
page_url = url_host + link.get('href')
14+
# print page_url
15+
16+
get_channel_urls(start_url)
17+
18+
channel_list = '''
19+
http://bj.58.com/shouji/
20+
http://bj.58.com/tongxunyw/
21+
http://bj.58.com/danche/
22+
http://bj.58.com/fzixingche/
23+
http://bj.58.com/diandongche/
24+
http://bj.58.com/sanlunche/
25+
http://bj.58.com/peijianzhuangbei/
26+
http://bj.58.com/diannao/
27+
http://bj.58.com/bijiben/
28+
http://bj.58.com/pbdn/
29+
http://bj.58.com/diannaopeijian/
30+
http://bj.58.com/zhoubianshebei/
31+
http://bj.58.com/shuma/
32+
http://bj.58.com/shumaxiangji/
33+
http://bj.58.com/mpsanmpsi/
34+
http://bj.58.com/youxiji/
35+
http://bj.58.com/jiadian/
36+
http://bj.58.com/dianshiji/
37+
http://bj.58.com/ershoukongtiao/
38+
http://bj.58.com/xiyiji/
39+
http://bj.58.com/bingxiang/
40+
http://bj.58.com/binggui/
41+
http://bj.58.com/chuang/
42+
http://bj.58.com/ershoujiaju/
43+
http://bj.58.com/bangongshebei/
44+
http://bj.58.com/diannaohaocai/
45+
http://bj.58.com/bangongjiaju/
46+
http://bj.58.com/ershoushebei/
47+
http://bj.58.com/yingyou/
48+
http://bj.58.com/yingeryongpin/
49+
http://bj.58.com/muyingweiyang/
50+
http://bj.58.com/muyingtongchuang/
51+
http://bj.58.com/yunfuyongpin/
52+
http://bj.58.com/fushi/
53+
http://bj.58.com/nanzhuang/
54+
http://bj.58.com/fsxiemao/
55+
http://bj.58.com/xiangbao/
56+
http://bj.58.com/meirong/
57+
http://bj.58.com/yishu/
58+
http://bj.58.com/shufahuihua/
59+
http://bj.58.com/zhubaoshipin/
60+
http://bj.58.com/yuqi/
61+
http://bj.58.com/tushu/
62+
http://bj.58.com/tushubook/
63+
http://bj.58.com/wenti/
64+
http://bj.58.com/yundongfushi/
65+
http://bj.58.com/jianshenqixie/
66+
http://bj.58.com/huju/
67+
http://bj.58.com/qiulei/
68+
http://bj.58.com/yueqi/
69+
http://bj.58.com/chengren/
70+
http://bj.58.com/nvyongpin/
71+
http://bj.58.com/qinglvqingqu/
72+
http://bj.58.com/qingquneiyi/
73+
http://bj.58.com/chengren/
74+
http://bj.58.com/xiaoyuan/
75+
http://bj.58.com/ershouqiugou/
76+
http://bj.58.com/tiaozao/
77+
http://bj.58.com/tiaozao/
78+
http://bj.58.com/tiaozao/
79+
'''

0 commit comments

Comments
 (0)