Skip to content

Commit bec2484

Browse files
committed
CrawlerWebClient 测试单线程的抓取成功数量
1 parent b525c63 commit bec2484

File tree

4 files changed

+14
-24
lines changed

4 files changed

+14
-24
lines changed

src/main/kotlin/com/light/saber/service/CrawKnowledgeService.kt

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ import com.light.saber.dao.CrawSourceDao
44
import com.light.saber.dao.KnowledgeDao
55
import com.light.saber.model.Knowledge
66
import com.light.saber.webclient.CrawlerWebClient
7-
import kotlinx.coroutines.experimental.CommonPool
8-
import kotlinx.coroutines.experimental.launch
97
import org.jsoup.Jsoup
108
import org.jsoup.nodes.Document
119
import org.jsoup.nodes.Element
@@ -25,9 +23,7 @@ class CrawKnowledgeService {
2523
简书专题URLs.forEach {
2624
for (page in 1..100) {
2725
try {
28-
launch(CommonPool) {
29-
crawJianShuArticles(page, it.url)
30-
}
26+
crawJianShuArticles(page, it.url)
3127
} catch (e: Exception) {
3228

3329
}
@@ -40,9 +36,7 @@ class CrawKnowledgeService {
4036
fun doCrawSegmentFaultKnowledge() {
4137
for (page in 1..803) {
4238
try {
43-
launch(CommonPool) {
44-
crawSegmentFault(page)
45-
}
39+
crawSegmentFault(page)
4640
} catch (e: Exception) {
4741

4842
}
@@ -53,9 +47,7 @@ class CrawKnowledgeService {
5347
fun doCrawOSChinaKnowledge() {
5448
for (page in 1..560) {
5549
try {
56-
launch(CommonPool) {
57-
crawOSChina(page)
58-
}
50+
crawOSChina(page)
5951
} catch (e: Exception) {
6052

6153
}
@@ -65,9 +57,7 @@ class CrawKnowledgeService {
6557
fun doCrawImportNewKnowledge() {
6658
for (page in 1..135) {
6759
try {
68-
launch(CommonPool) {
69-
crawImportNew(page)
70-
}
60+
crawImportNew(page)
7161
} catch (e: Exception) {
7262

7363
}
@@ -77,9 +67,7 @@ class CrawKnowledgeService {
7767
fun doCrawITEyeKnowledge() {
7868
for (page in 1..10000) {
7969
try {
80-
launch(CommonPool) {
81-
crawITEye(page)
82-
}
70+
crawITEye(page)
8371
} catch (e: Exception) {
8472

8573
}
@@ -89,9 +77,7 @@ class CrawKnowledgeService {
8977
fun doCrawCNBlogKnowledge() {
9078
for (page in 1..200) {
9179
try {
92-
launch(CommonPool) {
93-
crawCNBlog(page)
94-
}
80+
crawCNBlog(page)
9581
} catch (e: Exception) {
9682

9783
}
@@ -101,9 +87,7 @@ class CrawKnowledgeService {
10187
fun doCrawInfoQKnowledge() {
10288
for (page in 0..40) {
10389
try {
104-
launch(CommonPool) {
105-
crawInfoQ(page)
106-
}
90+
crawInfoQ(page)
10791
} catch (e: Exception) {
10892

10993
}

src/main/kotlin/com/light/saber/webclient/CrawlerWebClient.kt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ object CrawlerWebClient {
2222
return webClient as WebClient
2323
}
2424

25+
@Synchronized
2526
fun getPageHtmlText(url: String): String? {
2627
webClient = instanceWebClient(3000)
2728
try {
131 KB
Loading

src/main/resources/templates/common/head.ftl

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22
<html>
33
<head>
44
<meta charset="utf-8">
5-
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
5+
<!-- 强制让文档与设备的宽度保持1:1 -->
6+
<meta name="viewport"
7+
content="width=device-width, initial-scale=1, minimum-scale=1, maximum-scale=1, user-scalable=no">
8+
<meta http-equiv=”X-UA-Compatible” content=”IE=edge,chrome=1″/>
9+
<link href="/assets/logo.jpg" rel="shortcut icon" type="image/x-icon"/>
610
<title>MicroTek</title>
711
<link rel="stylesheet" href="/assets/layui/css/layui.css">
812
<script src="https://cdn.bootcss.com/jquery/3.3.1/jquery.js"></script>
@@ -11,6 +15,7 @@
1115
<div class="layui-layout layui-layout-admin">
1216
<div class="layui-header">
1317
<div class="layui-logo">
18+
<img src="/assets/logo.jpg" style="height: 2.5em ;width: 2.5em">
1419
<a href="/" style="color: #009688;font-size: 1.2em;">微技 GD</a>
1520
</div>
1621
<!-- 头部区域(可配合layui已有的水平导航) -->

0 commit comments

Comments
 (0)