22import json
33import redis
44from pyquery import PyQuery as pq
5+ import hashlib
6+ import re
7+
8+ #请求对象
9+ session = requests .session ()
10+
11+ #请求头信息
12+ HEADERS = {
13+ 'Referer' : 'https://passport.lagou.com/login/login.html' ,
14+ 'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:51.0) Gecko/20100101 Firefox/51.0' ,
15+ }
16+ headers = {}
17+ cookies = {}
18+
19+ def get_password (passwd ):
20+ '''这里对密码进行了md5双重加密 veennike 这个值是在main.html_aio_f95e644.js文件找到的 '''
21+ passwd = hashlib .md5 (passwd .encode ('utf-8' )).hexdigest ()
22+ passwd = 'veenike' + passwd + 'veenike'
23+ passwd = hashlib .md5 (passwd .encode ('utf-8' )).hexdigest ()
24+ return passwd
25+
26+ def get_token ():
27+ Forge_Token = ""
28+ Forge_Code = ""
29+ login_page = 'https://passport.lagou.com/login/login.html'
30+ data = session .get (login_page , headers = HEADERS )
31+ match_obj = re .match (r'.*X_Anti_Forge_Token = \'(.*?)\';.*X_Anti_Forge_Code = \'(\d+?)\'' , data .text , re .DOTALL )
32+ if match_obj :
33+ Forge_Token = match_obj .group (1 )
34+ Forge_Code = match_obj .group (2 )
35+ return Forge_Token , Forge_Code
36+
37+ def login (username , passwd ):
38+ X_Anti_Forge_Token , X_Anti_Forge_Code = get_token ()
39+ login_headers = HEADERS .copy ()
40+ login_headers .update ({'X-Requested-With' : 'XMLHttpRequest' , 'X-Anit-Forge-Token' : X_Anti_Forge_Token , 'X-Anit-Forge-Code' : X_Anti_Forge_Code })
41+ postData = {
42+ 'username' : username ,
43+ 'password' : get_password (passwd ),
44+ 'request_form_verifyCode' : '' ,
45+ 'submit' : '' ,
46+ }
47+ response = session .post ('https://passport.lagou.com/login/login.json' , data = postData , headers = login_headers )
48+ json_data = response .json ()
49+ if (json_data ['state' ] != 1 ):
50+ print ("登录失败,退出" )
51+ exit (- 1 )
52+
53+ def get_cookies ():
54+ session .get ("https://passport.lagou.com/grantServiceTicket/grant.html" )
55+ return requests .utils .dict_from_cookiejar (session .cookies )
556
657"""
758获取职位列表
859"""
9- def fetch (url ,headers = {} ):
60+ def fetch (url ,headers , cookies ):
1061 #用户登录后的cookie
1162 try :
12- res = requests .get (url ,headers = headers )
63+ res = session .get (url ,headers = headers , cookies = cookies )
1364 except Exception as e :
1465 print (e )
1566 return False
@@ -43,15 +94,15 @@ def insert(data):
4394 resA = r .sadd ("company" ,id )
4495
4596 if resA == 1 :
46- res = fetchDetail (positionId ,headers = headers )
97+ res = fetch_detail (positionId ,headers , cookies )
4798 r .hmset ("postion:" + str (positionId ),res )
4899 print (str (positionId )+ "已经写入redis中" )
49100 r .hmset (key ,t )
50101
51102# 获取职位详情
52- def fetchDetail (id ,headers = {} ):
103+ def fetch_detail (id ,headers , cookies ):
53104 detailUrl = 'http://m.lagou.com/jobs/' + str (id )+ ".html"
54- res = requests .get (detailUrl ,headers = headers )
105+ res = session .get (detailUrl ,headers = headers , cookies = cookies )
55106 if res .status_code != 200 :
56107 print ("请求出错" + str (res .text ))
57108 return False
@@ -68,18 +119,7 @@ def fetchDetail(id,headers={}):
68119 return d
69120 # print(q(".content").text())
70121
71- if __name__ == '__main__' :
72-
73- #职位列表的json接口 需要登陆 带cookie
74- page = 1
75- pageSize = 100 #最大支持100
76- API_URL = 'http://m.lagou.com/listmore.json?pageNo=' + str (page )+ '&pageSize=' + str (pageSize )
77- #cookie替换成自己登录后的cookie
78- cookie = 'Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1531361076,1532319384; LGUID=20180723131853-e4343a35-8e37-11e8-9ee6-5254005c3644; LGSID=20180723135216-8dffe0ce-8e3c-11e8-a327-525400f775ce; PRE_UTM=; PRE_HOST=; PRE_SITE=; PRE_LAND=https%3A%2F%2Fpassport.lagou.com%2Flogin%2Flogin.html; LG_LOGIN_USER_ID=564f3b59e53c65db601e411a55fa01278ce7cf79cb588be8; _putrc=51AF6FA824D5BE21; login=true; unick=%E7%94%B0%E9%9B%B7; gate_login_token=a47b874276333f9f31e46da6185e63a0935573d160846c4d; _gat=1; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1532325155; LGRID=20180723135240-9c59f5b7-8e3c-11e8-a327-525400f775ce'
79- headers = {'user-agent' : 'my-app/0.0.1' ,'cookie' :cookie }
80122
81- data = fetch (API_URL ,headers = headers )
82- insert (data )
83123
84124
85125
0 commit comments