Skip to content

Commit 0ba1d2f

Browse files
committed
OSChina文章
1 parent 1a40ec1 commit 0ba1d2f

File tree

9 files changed

+115
-31
lines changed

9 files changed

+115
-31
lines changed

app.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ INSERT INTO `saber`.`craw_source` (`type`, `url`) VALUES ('JIAN_SHU', 'http://ww
1313
INSERT INTO `saber`.`craw_source` (`type`, `url`) VALUES ('JIAN_SHU', 'http://www.jianshu.com/c/04cb7410c597');
1414

1515

16+
delete from knowledge where content is null or content ='' or title is null or title='';
17+
1618
id type url
1719
2 JIAN_SHU http://www.jianshu.com/c/498ebcfd27ad
1820
3 JIAN_SHU http://www.jianshu.com/c/c3fe8e7aeb09

src/main/kotlin/com/light/saber/controller/KnowledgeController.kt

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,14 @@ class KnowledgeController {
5353
} else if (isTitleExist(title)) {
5454
return Result(title, "问题已经存在,请换一个问题", false)
5555
} else {
56-
knowledge.gmtCreate = Date()
57-
knowledge.gmtModified = Date()
58-
KnowledgeDao.save(knowledge)
59-
return Result(title, "保存成功", true)
56+
try {
57+
knowledge.gmtCreate = Date()
58+
knowledge.gmtModified = Date()
59+
KnowledgeDao.save(knowledge)
60+
return Result(title, "保存成功", true)
61+
} catch (e: Exception) {
62+
return Result(title, "系统出错啦:${e.message}", false)
63+
}
6064
}
6165

6266
}

src/main/kotlin/com/light/saber/controller/KnowledgeCrawController.kt

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,29 @@ class KnowledgeCrawController {
1414

1515
@GetMapping("/knowledge/doCrawJianShu")
1616
fun doCrawJianShu(): String {
17-
CrawKnowledgeService.doCrawJianShuKnowledge()
17+
Thread {
18+
CrawKnowledgeService.doCrawJianShuKnowledge()
19+
}.start()
20+
21+
1822
return "DONE"
1923
}
2024

2125
@GetMapping("/knowledge/doCrawSegmentFaultKnowledge")
2226
fun doCrawSegmentFaultKnowledge(): String {
23-
CrawKnowledgeService.doCrawSegmentFaultKnowledge()
27+
Thread {
28+
CrawKnowledgeService.doCrawSegmentFaultKnowledge()
29+
}.start()
30+
31+
return "DONE"
32+
}
33+
34+
@GetMapping("/knowledge/doCrawOSChinaKnowledge")
35+
fun doCrawOSChinaKnowledge(): String {
36+
Thread {
37+
CrawKnowledgeService.doCrawOSChinaKnowledge()
38+
}.start()
39+
2440
return "DONE"
2541
}
2642

src/main/kotlin/com/light/saber/model/Knowledge.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ class Knowledge {
1313
@Column(length = 200, unique = true, nullable = false)
1414
var title = ""
1515

16-
@Column(length = 200, unique = true, nullable = false)
16+
@Column(length = 200)
1717
var url = ""
1818

1919
@Lob

src/main/kotlin/com/light/saber/service/CrawKnowledgeService.kt

Lines changed: 61 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,12 @@ import com.light.saber.dao.CrawSourceDao
44
import com.light.saber.dao.KnowledgeDao
55
import com.light.saber.model.Knowledge
66
import com.light.saber.webclient.CrawlerWebClient
7-
import kotlinx.coroutines.experimental.CommonPool
8-
import kotlinx.coroutines.experimental.launch
97
import org.jsoup.Jsoup
108
import org.jsoup.nodes.Document
119
import org.jsoup.nodes.Element
1210
import org.springframework.beans.factory.annotation.Autowired
1311
import org.springframework.stereotype.Service
12+
import org.springframework.util.StringUtils
1413

1514
@Service
1615
class CrawKnowledgeService {
@@ -20,27 +19,74 @@ class CrawKnowledgeService {
2019
lateinit var CrawSourceDao: CrawSourceDao
2120

2221
fun doCrawJianShuKnowledge() {
23-
2422
val 简书专题URLs = CrawSourceDao.findJianShu()
25-
2623
简书专题URLs.forEach {
27-
launch(CommonPool) {
28-
for (page in 1..100) {
29-
crawJianShuArticles(page, it.url)
30-
}
24+
for (page in 1..100) {
25+
crawJianShuArticles(page, it.url)
3126
}
3227
}
3328
}
3429

3530

3631
fun doCrawSegmentFaultKnowledge() {
3732
for (page in 1..803) {
38-
launch(CommonPool) {
39-
crawSegmentFault(page)
33+
crawSegmentFault(page)
34+
}
35+
}
36+
37+
fun doCrawOSChinaKnowledge() {
38+
for (page in 1..560) {
39+
crawOSChina(page)
40+
}
41+
}
42+
43+
private fun crawOSChina(page: Int) {
44+
val pageUrl = "https://www.oschina.net/action/ajax/get_more_recommend_blog?classification=0&p=$page"
45+
val 文章列表HTML = CrawlerWebClient.getPageHtmlText(pageUrl)
46+
val document = Jsoup.parse(文章列表HTML)
47+
48+
// document.getElementsByClassName("blog-name")[0]
49+
50+
val titles = arrayListOf<String>()
51+
52+
document.getElementsByClass("blog-name").forEach {
53+
titles.add(it.html())
54+
}
55+
56+
// document.getElementsByClassName("blog-title-link")[0]
57+
//<a href=​"https:​/​/​my.oschina.net/​u/​3115385/​blog/​1819321" class=​"sc overh blog-title-link" target=​"_blank" title=​"JVM调优-堆大小设置、回收器选择">​…​</a>​
58+
59+
val links = document.getElementsByClass("blog-title-link")
60+
61+
if (titles.size != links.size) {
62+
return
63+
}
64+
65+
links.forEachIndexed { index, it ->
66+
val url = it.attr("href")
67+
if (KnowledgeDao.countByUrl(url) == 0) {
68+
val OSChina文章HTML = CrawlerWebClient.getPageHtmlText(url)
69+
val OSChina文章Document = Jsoup.parse(OSChina文章HTML)
70+
val content = 获取OSChina文章内容(OSChina文章Document)
71+
println(url)
72+
println(content)
73+
74+
doSaveKnowledge(
75+
url = url,
76+
title = titles[index],
77+
content = content
78+
)
79+
4080
}
4181
}
4282
}
4383

84+
private fun 获取OSChina文章内容(osChina文章Document: Document?): String? {
85+
// document.getElementById("blogBody")
86+
return osChina文章Document?.getElementById("blogBody")?.html()
87+
}
88+
89+
4490
private fun crawSegmentFault(page: Int) {
4591
val SegmentFault文章列表的HTML = CrawlerWebClient.getPageHtmlText("https://segmentfault.com/blogs?page=$page")
4692
val document = Jsoup.parse(SegmentFault文章列表的HTML)
@@ -125,10 +171,15 @@ class CrawKnowledgeService {
125171
}
126172

127173
private fun doSaveKnowledge(url: String, title: String?, content: String?) {
174+
if (StringUtils.isEmpty(url) || StringUtils.isEmpty(title) || StringUtils.isEmpty(content)) {
175+
return
176+
}
177+
128178
val Knowledge = Knowledge()
129179
Knowledge.url = url
130180
Knowledge.title = title ?: ""
131181
Knowledge.content = content ?: ""
182+
132183
try {
133184
KnowledgeDao.save(Knowledge)
134185
} catch (e: Exception) {

src/main/resources/templates/add.ftl

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,22 @@
7777
7878
var title = $('#title').val();
7979
var content = layedit.getContent(layeditIndex);
80+
var data = {title: title, content: content};
81+
console.log(data)
8082
8183
$.ajax({
8284
url: '/addKnowledge',
83-
data: {title: title, content: content},
85+
data: data,
8486
method: 'POST',
8587
async: false,
8688
success: (result) => {
87-
layer.msg(result.msg)
88-
//alert(JSON.stringify(result.msg))
89-
location.reload()
89+
if (result.success === true) {
90+
layer.msg(result.msg)
91+
location.reload()
92+
} else {
93+
layer.msg(result.msg)
94+
}
95+
9096
},
9197
error: (err) => {
9298
layer.msg(err)

src/main/resources/templates/common/head.ftl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
<dl class="layui-nav-child">
4949
<dd><a href="/knowledge/doCrawJianShu" target="_blank">抓取简书</a></dd>
5050
<dd><a href="/knowledge/doCrawSegmentFaultKnowledge" target="_blank">抓取SegmentFault</a></dd>
51-
<dd><a href="javascript:;">列表三</a></dd>
51+
<dd><a href="/knowledge/doCrawOSChinaKnowledge" target="_blank">抓取OSChina</a></dd>
5252
<dd><a href="">超链接</a></dd>
5353
</dl>
5454
</li>

src/main/resources/templates/detail.ftl

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
<h5 class="layui-timeline-title">
1717
${Knowledge.gmtCreate?string("yyyy-MM-dd HH:mm:ss")}
1818
</h5>
19-
<textarea class="content" id="content-${Knowledge.id}">
19+
<textarea class="content" id="content-${Knowledge.id?string("#")}">
2020
${Knowledge.content}
2121
</textarea>
2222
</div>
@@ -34,7 +34,12 @@
3434
$(function () {
3535
layui.use('layedit', function () {
3636
var layedit = layui.layedit;
37-
layedit.build('content-${Knowledge.id}', {height: 1000});
37+
layedit.build('content-${Knowledge.id?string("#")}',
38+
{
39+
height: 1000,
40+
tool: []
41+
}
42+
);
3843
});
3944
})
4045
</script>

src/main/resources/templates/index.ftl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@
2929
<i class="layui-icon layui-timeline-axis"></i>
3030
<div class="layui-timeline-content layui-text">
3131
<h3 class="layui-timeline-title">
32-
<a href="/knowledge/${e.id}" target="_blank" style="color: #469689">${e.title}</a>
32+
<a href="/knowledge/${e.id?string("#")}" target="_blank" style="color: #469689">${e.title}</a>
3333
</h3>
3434
<h5 class="layui-timeline-title">
3535
${e.gmtCreate?string("yyyy-MM-dd HH:mm:ss")}
3636
</h5>
37-
<textarea class="content" id="content-${e.id}">
37+
<textarea class="content" id="content-${e.id?string("#")}">
3838
${e.content}
3939
</textarea>
4040
</div>
@@ -65,9 +65,9 @@
6565
var layedit = layui.layedit;
6666
<#if feeds?exists>
6767
<#list feeds as e>
68-
layedit.build('content-${e.id}',
68+
layedit.build('content-${e.id?string("#")}',
6969
{
70-
height: 300,
70+
height: 520,
7171
tool: []
7272
}
7373
);
@@ -79,9 +79,9 @@
7979
//完整功能
8080
laypage.render({
8181
elem: 'pagination'
82-
, count: <#if total?exists>${total}<#else>0</#if> //数据总数,从服务端得到
83-
, limit: ${pageSize!10}
84-
, curr: ${pageNum!1}
82+
, count: <#if total?exists>${total?string("#")}<#else>0</#if> //数据总数,从服务端得到
83+
, limit: ${pageSize!10?string("#")}
84+
, curr: ${pageNum!1?string("#")}
8585
, layout: ['count', 'prev', 'page', 'next', 'limit', 'refresh', 'skip']
8686
, jump: function (obj, first) {
8787
//obj包含了当前分页的所有参数,比如:

0 commit comments

Comments
 (0)