Skip to content

Commit a69caea

Browse files
committed
tag
1 parent 0692a4f commit a69caea

File tree

2 files changed

+62
-46
lines changed

2 files changed

+62
-46
lines changed

src/main/java/DataClean/ArticleClean.java

Lines changed: 60 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import java.sql.*;
88
import java.util.ArrayList;
9+
import java.util.HashSet;
910
import java.util.List;
1011

1112
import static java.lang.Integer.min;
@@ -108,27 +109,28 @@ public static List<String> getTag(List<Tag> tagList,ArticleBean articleBean)
108109
{
109110
for(Tag tag :tagList)
110111
{
111-
String default_tag = Word2PinYin(tag.getName());
112-
String desc [] = tag.getDescription().split(" ");
113-
String now_tag = Word2PinYin(temp_tag[i]);
114-
if(StringHasChinese(tag.getName()) || StringHasChinese(temp_tag[i]))
115-
{
116-
if(editDistance(default_tag,now_tag) <3)
112+
113+
String default_tag = Word2PinYin(tag.getName());
114+
String desc [] = tag.getDescription().split(" ");
115+
String now_tag = Word2PinYin(temp_tag[i]);
116+
if(StringHasChinese(tag.getName()) || StringHasChinese(temp_tag[i]))
117117
{
118+
if(editDistance(default_tag,now_tag) <3)
119+
{
118120

119-
resultTag.add(tag.getName());
120-
continue;
121+
resultTag.add(tag.getName());
122+
continue;
123+
}
121124
}
122-
}
123-
else
124-
{
125-
if(editDistance(default_tag,now_tag) ==0)
125+
else
126126
{
127+
if(editDistance(default_tag,now_tag) ==0)
128+
{
127129

128-
resultTag.add(tag.getName());
129-
continue;
130+
resultTag.add(tag.getName());
131+
continue;
132+
}
130133
}
131-
}
132134

133135

134136
for(int k = 0 ; k<desc.length ; k++)
@@ -153,52 +155,56 @@ public static List<String> getTag(List<Tag> tagList,ArticleBean articleBean)
153155
}
154156

155157

158+
159+
160+
156161
}
157162
}
158163
}
159164

160165
for(Tag tag :tagList)
161166
{
162-
String default_tag = Word2PinYin(tag.getName());
163-
String desc [] = tag.getDescription().split(" ");
164-
if(clean_content.toLowerCase().contains(default_tag.toLowerCase()))
165-
{
166-
resultTag.add(tag.getName());
167-
continue;
168-
}
169-
else if(title.toLowerCase().contains(default_tag.toLowerCase()))
170-
{
171-
resultTag.add(default_tag);
172-
continue;
173-
}else
174-
{
175-
for(int k = 0 ; k<desc.length ; k++)
167+
168+
String default_tag = Word2PinYin(tag.getName());
169+
String desc [] = tag.getDescription().split(" ");
170+
if(clean_content.toLowerCase().contains(default_tag.toLowerCase()))
176171
{
177-
if(clean_content.toLowerCase().contains(desc[k].toLowerCase()) && !desc[k].equals(""))
178-
{
179-
System.out.println(clean_content.toLowerCase());
180-
System.out.println(desc[k].toLowerCase());
181-
resultTag.add(tag.getName());
182-
continue;
183-
}
172+
resultTag.add(tag.getName());
173+
continue;
174+
}
175+
else if(title.toLowerCase().contains(default_tag.toLowerCase()))
176+
{
177+
resultTag.add(tag.getName());
178+
continue;
179+
}else
180+
{
181+
for(int k = 0 ; k<desc.length ; k++)
182+
{
183+
if(clean_content.toLowerCase().contains(desc[k].toLowerCase()) && !desc[k].equals(""))
184+
{
185+
186+
resultTag.add(tag.getName());
187+
continue;
188+
}
189+
}
184190
}
185-
}
191+
192+
186193

187194
}
188195

189196
}
190-
return resultTag;
197+
return new ArrayList<String>(new HashSet<String>(resultTag));
191198

192199
}
193-
//求编辑距离
200+
201+
//求编辑距离 利用动态规划
194202
public static int editDistance(String str1, String str2) {
195203
Preconditions.checkNotNull(str1);
196204
Preconditions.checkNotNull(str2);
197205

198206
int len1 = str1.length();
199207
int len2 = str2.length();
200-
201-
// len1+1, len2+1, because finally return dp[len1][len2]
202208
int[][] dp = new int[len1 + 1][len2 + 1];
203209

204210
for (int i = 0; i <= len1; i++) {
@@ -218,7 +224,7 @@ public static int editDistance(String str1, String str2) {
218224
//update dp value for +1 length
219225
dp[i + 1][j + 1] = dp[i][j];
220226
} else {
221-
dp[i + 1][j + 1] = 1 + min(dp[i+1][j], min(dp[i][j+1], dp[i][j])); // 这里不需要递归实现了
227+
dp[i + 1][j + 1] = 1 + min(dp[i+1][j], min(dp[i][j+1], dp[i][j]));
222228
}
223229
}
224230
}
@@ -297,8 +303,18 @@ public static void main(String [] args)
297303
{
298304
str += s+",";
299305
}
300-
System.out.println(id+" " +tags);
301-
getMysqlData.UpdateArticle(id,str,clean_content,keyword,summary);
306+
if (str.length() != 0)
307+
{
308+
System.out.println(id+" " +tags);
309+
System.out.println(str.substring(0,str.length()-1));
310+
getMysqlData.UpdateArticle(id,str.substring(0,str.length()-1),clean_content,keyword,summary);
311+
}else
312+
{
313+
System.out.println(id+" " +tags);
314+
System.out.println(str);
315+
getMysqlData.UpdateArticle(id,str,clean_content,keyword,summary);
316+
}
317+
302318

303319
}
304320
}

src/main/java/DataClean/GetMysqlData.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ public List<ArticleBean> getArticle()
4444
List<ArticleBean> list = new ArrayList<>();
4545
try{
4646
String sql;
47-
sql = "SELECT id,title, content,tags FROM article";
47+
sql = "SELECT id,title, content,tags FROM article_copy1";
4848
ResultSet rs = stmt.executeQuery(sql);
4949

5050
// 展开结果集数据库
@@ -102,7 +102,7 @@ public void UpdateArticle(int id,String tags,String clean_content,String keyword
102102
{
103103
try {
104104

105-
String sql = "Update article set tags=?,clean_content=?,keyword=?,summary = ? where id=?";
105+
String sql = "Update article_copy1 set tags=?,clean_content=?,keyword=?,summary = ? where id=?";
106106
// 预处理sql语句
107107
PreparedStatement presta = conn.prepareStatement(sql);
108108
// 设置sql语句中的values值

0 commit comments

Comments
 (0)