1.参考
https://www.cnblogs.com/blog411032/p/9718990.html
pom依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.10.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.13</version>
</dependency>
// 模拟登陆的代码
public class MoNiLoginSpider {
public static void main(String[] args) throws Exception {
//1. 确定首页的url:
String indexUrl = "http://home.manmanbuy.com/login.aspx";
//2. 发送请求, 获取数据
//2.1 获取httpClient对象
CloseableHttpClient httpClient = HttpClients.createDefault();
//2.2 创建请求方式对象
HttpPost httpPost = new HttpPost(indexUrl);
//2.3 封装请求参数, 和请求头
List<BasicNameValuePair> list = new ArrayList<BasicNameValuePair>();
list.add(new BasicNameValuePair("__VIEWSTATE", "/wEPDwULLTIwNjQ3Mzk2NDFkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQlhdXRvTG9naW4drg0qAK3TuQPsrrrpt8Dba/J1mUk7mqWIKal3j0zBfw=="));
list.add(new BasicNameValuePair("__EVENTVALIDATION", "/wEdAAVoMBVxRWA5FtL2rkHymyChDFTzKcXJqLg+OeJ6QAEa2kPTPkdPWl+8YN2NtDCtxieD/kBkxBNDyTLUoFtEiY50op4oRunf14dz2Zt2+QKDEIBKrHYpEdSpFyyjV4RcqOi2BVcG7EExQ32JOD1Pc5XJ"));
list.add(new BasicNameValuePair("txtUser", "itcast"));
list.add(new BasicNameValuePair("txtPass", "www.itcast.cn"));
list.add(new BasicNameValuePair("btnLogin", "登陆"));
HttpEntity entity = new UrlEncodedFormEntity(list);
httpPost.setEntity(entity);
// 设计一个头: referer 防掉链
httpPost.setHeader("Referer", "http://home.manmanbuy.com/login.aspx");
//2.4 发送请求, 获取响应的对象
CloseableHttpResponse response = httpClient.execute(httpPost);
//2.5 获取数据
int code = response.getStatusLine().getStatusCode();
if (code == 302) {
//登陆成功了, 获取成功的重定向的页面地址
Header[] locations = response.getHeaders("Location");
String reUrl = locations[0].getValue();
reUrl = "http://home.manmanbuy.com"+reUrl;
Header[] headers = response.getHeaders("Set-Cookie");
String cookie1 = headers[0].getValue();
String cookie2 = headers[1].getValue();
//2.6 释放资源
httpClient.close();
//访问成功页面
httpClient = HttpClients.createDefault();
HttpGet httpGet = new HttpGet(reUrl);
httpGet.addHeader("Cookie",cookie1);
httpGet.addHeader("Cookie",cookie2);
httpGet.setHeader("Referer","http://home.manmanbuy.com/login.aspx");
response = httpClient.execute(httpGet);
code = response.getStatusLine().getStatusCode();
if(200 == code) {
String html = EntityUtils.toString(response.getEntity(), "UTF-8");
httpClient.close();
// 解析数据, 获取积分信息
Document document = Jsoup.parse(html);
Elements fontEl = document.select("#aspnetForm > div.udivright > div:nth-child(2) > table > tbody > tr > td:nth-child(1) > table:nth-child(2) > tbody > tr > td:nth-child(2) > div:nth-child(1) > font");
String jifen = fontEl.text();
System.out.println(jifen);
}
}
}
}
该博客介绍了如何使用Jsoup和HttpClient库进行网页模拟登录,并在登录成功后抓取特定页面的数据。首先,通过设置请求参数和头信息完成登录操作,然后根据响应获取重定向URL和Cookie。接着,利用Cookie访问成功登录后的页面,解析HTML内容以获取积分信息。整个过程展示了网络爬虫的基础实现。
4306

被折叠的 条评论
为什么被折叠?



