Skip to content

Commit 5170532

Browse files
committed
refactor: spam filtering logic in PostService
1 parent bc93ca1 commit 5170532

File tree

1 file changed

+49
-23
lines changed
  • packages/velog-cron/src/services/PostService

1 file changed

+49
-23
lines changed

packages/velog-cron/src/services/PostService/index.ts

Lines changed: 49 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,11 @@ export class PostService implements Service {
109109
.join('')
110110
.concat(user.profile?.short_bio ?? '', user.profile?.display_name ?? '')
111111

112-
const isSpam = await this.checkIsSpam(
113-
post.title ?? '',
114-
post.body ?? '',
115-
user.username,
116-
extraText,
117-
country,
118-
)
112+
const {
113+
isSpam,
114+
reason,
115+
targetType = '',
116+
} = await this.checkIsSpam(post.title ?? '', post.body ?? '', user.username, extraText, country)
119117

120118
if (!isSpam) return
121119

@@ -130,9 +128,13 @@ export class PostService implements Service {
130128

131129
setTimeout(() => {
132130
const message = {
133-
text: `[Captured By Bot], *userId*: ${user_id}\ntitle: ${post.title}, ip: ${ip}, country: ${country} type: spam`,
131+
text: `[Captured By Bot], *userId*: ${user_id}\ntitle: ${post.title}, ip: ${ip}, country: ${country} type: spam,
132+
reason: ${reason}`,
134133
}
135134

135+
if (targetType) {
136+
message.text = message.text.concat(`, targetType: ${targetType}`)
137+
}
136138
this.discord.sendMessage('spam', JSON.stringify(message))
137139
}, 0)
138140
}
@@ -143,38 +145,47 @@ export class PostService implements Service {
143145
username: string,
144146
extraText: string,
145147
country: string,
146-
): Promise<boolean> {
148+
): Promise<{ isSpam: boolean; reason: string; targetType?: string }> {
147149
const allowList = ['KR', 'GB', '']
148150
const blockList = ['IN', 'PK', 'CN', 'VN', 'TH', 'PH']
149151
const isForeign = !allowList.includes(country)
150152

151153
if (blockList.includes(country)) {
152-
return true
154+
return { isSpam: true, reason: 'blocked country' }
153155
}
154156

155-
const isTitleSpam = await this.spamFilter(title!, username, isForeign, true)
157+
const { isSpam: isTitleSpam, reason: titleSpamReason } = await this.spamFilter(
158+
title!,
159+
username,
160+
isForeign,
161+
true,
162+
)
156163
if (isTitleSpam) {
157-
return true
164+
return { isSpam: isTitleSpam, reason: titleSpamReason, targetType: 'title' }
158165
}
159166

160-
const isBodySpam = await this.spamFilter(body!.concat(extraText), username, isForeign)
167+
const { isSpam: isBodySpam, reason: bodySpamReason } = await this.spamFilter(
168+
body!.concat(extraText),
169+
username,
170+
isForeign,
171+
)
161172
if (isBodySpam) {
162-
return true
173+
return { isSpam: isBodySpam, reason: bodySpamReason, targetType: 'body' }
163174
}
164175

165-
return false
176+
return { isSpam: false, reason: '' }
166177
}
167178
private async spamFilter(
168179
text: string,
169180
username: string,
170181
isForeign: boolean,
171182
isTitle = false,
172-
): Promise<boolean> {
183+
): Promise<{ isSpam: boolean; reason: string }> {
173184
const includesCN = /[\u4e00-\u9fa5]/.test(text)
174185
const includesKR = /[-|-|-]/.test(text)
175186

176187
if (includesCN && !includesKR) {
177-
return true
188+
return { isSpam: true, reason: 'includesCN' }
178189
}
179190

180191
let replaced = text.replace(/```([\s\S]*?)```/g, '') // remove code blocks
@@ -192,14 +203,14 @@ export class PostService implements Service {
192203
const containsPhoneNumber = phoneRegex.some((regex) => regex.test(replaced))
193204

194205
if (containsPhoneNumber) {
195-
return true
206+
return { isSpam: true, reason: 'containsPhoneNumber' }
196207
}
197208

198209
if (!isTitle && isForeign && hasLink) {
199210
const lines = replaced.split('\n').filter((line) => line.trim().length > 1)
200211
const koreanLinesCount = lines.filter((line) => this.hasKorean(line)).length
201212
const confidence = koreanLinesCount / lines.length
202-
return confidence < 0.3
213+
return { isSpam: confidence < 0.3, reason: 'foreignWithLink' }
203214
}
204215

205216
const removeDuplicatedWords = Array.from(
@@ -227,10 +238,12 @@ export class PostService implements Service {
227238
],
228239
})
229240

241+
const usedBannedKeywords: string[] = []
230242
const checkKeyword = bannedKeywords
231243
.map((keyword) => keyword.value)
232244
.some((keyword) => {
233245
if (removeDuplicatedWords.includes(keyword)) {
246+
usedBannedKeywords.push(keyword)
234247
this.updateDynmicConfigItem(keyword)
235248
return true
236249
} else {
@@ -239,7 +252,7 @@ export class PostService implements Service {
239252
})
240253

241254
if (checkKeyword) {
242-
return true
255+
return { isSpam: true, reason: `bannedKeyword: `.concat(...usedBannedKeywords) }
243256
}
244257

245258
const bannedAltKeywords = await this.db.dynamicConfigItem.findMany({
@@ -264,22 +277,35 @@ export class PostService implements Service {
264277
score++
265278
}
266279

280+
const initScore = score
281+
const usedBannedAltKeywords: string[] = []
267282
for (const { value: keyword } of bannedAltKeywords) {
268283
if (removeDuplicatedWords.includes(keyword)) {
284+
usedBannedAltKeywords.push(keyword)
269285
this.updateDynmicConfigItem(keyword)
270286
score++
271287
}
272288

273289
if (score >= 2 && isForeign) {
274-
return true
290+
return {
291+
isSpam: true,
292+
reason: `initScore: ${initScore}, foreign, ${'BannedAltKeywords: '.concat(
293+
usedBannedAltKeywords.join(','),
294+
)}`,
295+
}
275296
}
276297

277298
if (score >= 3) {
278-
return true
299+
return {
300+
isSpam: true,
301+
reason: `initScore: ${initScore}, foreign, ${'BannedAltKeywords: '.concat(
302+
usedBannedAltKeywords.join(','),
303+
)}`,
304+
}
279305
}
280306
}
281307

282-
return false
308+
return { isSpam: false, reason: '' }
283309
}
284310
private hasKorean(text: string) {
285311
return /[-]/g.test(text)

0 commit comments

Comments
 (0)