Skip to content

Commit 59059a9

Browse files
committed
Add: spam filter test runner to PostController
1 parent 9549cec commit 59059a9

File tree

6 files changed

+193
-80
lines changed

6 files changed

+193
-80
lines changed

packages/velog-cron/.gitignore

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
env/*
22
!env/.env.example
33

4-
54
prisma
65

7-
dist
6+
dist
7+
8+
# spam test data
9+
spam_post.json

packages/velog-cron/src/common/plugins/globals/errorHandlerPlugin.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ import { FastifyPluginCallback } from 'fastify'
44
import fp from 'fastify-plugin'
55

66
const pluginFn: FastifyPluginCallback = (fastify, _, done) => {
7-
fastify.addHook('onError', (request, reply, error) => {
8-
console.log('fastify hook error:', error)
7+
fastify.addHook('onError', (request, reply, error, done) => {
8+
request.log.error(error, 'fastify onError')
9+
done()
910
})
1011
fastify.setErrorHandler((error, _, reply) => {
1112
if (isHttpError(error)) {

packages/velog-cron/src/routes/posts/v1/PostController.ts

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@ import { BadRequestError } from '@errors/BadRequestErrors.js'
22
import { NotFoundError } from '@errors/NotfoundError.js'
33
import { DbService } from '@lib/db/DbService.js'
44
import { PostService } from '@services/PostService/index.js'
5-
import { injectable, singleton } from 'tsyringe'
5+
import { container, injectable, singleton } from 'tsyringe'
66
import { utcToZonedTime } from 'date-fns-tz'
77
import { startOfDay, subMonths } from 'date-fns'
88
import { ENV } from '@env'
9+
import fs from 'fs'
10+
import path from 'path'
11+
import { UtilsService } from '@lib/utils/UtilsService.js'
912

1013
interface Controller {
1114
updatePostScore(postId: string): Promise<void>
@@ -62,4 +65,68 @@ export class PostController implements Controller {
6265

6366
return posts.length
6467
}
68+
async spamFilterTestRunner() {
69+
const utils = container.resolve(UtilsService)
70+
const postService = container.resolve(PostService)
71+
try {
72+
if (ENV.appEnv !== 'development') return
73+
74+
const filePath = path.resolve(utils.resolveDir('./src/routes/posts/v1/spam_post.json'))
75+
76+
const fileExits = fs.existsSync(filePath)
77+
if (!fileExits) return
78+
79+
const readFileResult = fs.readFileSync(filePath, { encoding: 'utf-8' })
80+
const data = JSON.parse(readFileResult)
81+
const key = Object.keys(data)[0]
82+
const posts: PostData[] = data[key]
83+
.filter((v: any) => !!v.title)
84+
.map((v: any, index: number) => ({ id: index, ...v }))
85+
86+
const postLength = 5000
87+
const set = new Set()
88+
89+
const bannedUesrnames: string[] = []
90+
91+
for (const post of posts.slice(0, postLength)) {
92+
const { id, title, body, username } = post
93+
if (bannedUesrnames.includes(username)) {
94+
set.add(id)
95+
}
96+
97+
const isSpam = await postService.checkIsSpam(title, body, username, '', 'US')
98+
if (isSpam) {
99+
set.add(id)
100+
continue
101+
}
102+
103+
const isSpam2 = await postService.checkIsSpam(title, body, username, '', 'KR')
104+
if (isSpam2) {
105+
set.add(id)
106+
}
107+
}
108+
109+
const isSpamCount = set.size
110+
console.log('isSpamCount: ', isSpamCount)
111+
console.log('ratio: ', isSpamCount / postLength)
112+
113+
const allowIds: number[] = []
114+
for (const id of allowIds) {
115+
set.add(id)
116+
}
117+
118+
const notFilteredPosts = posts.filter((post) => !set.has(post.id))
119+
console.log('notFilteredPosts', notFilteredPosts[0])
120+
} catch (error) {
121+
throw error
122+
}
123+
}
124+
}
125+
126+
type PostData = {
127+
id: number
128+
title: string
129+
body: string
130+
tags: string
131+
username: string
65132
}

packages/velog-cron/src/routes/posts/v1/index.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ const v1: FastifyPluginCallback = (fastify, opts, done) => {
2424
reply.status(HttpStatus.OK).send({ processedPostsCount })
2525
})
2626

27+
fastify.post('/test/spam-filter', async (_, reply) => {
28+
await postController.spamFilterTestRunner()
29+
reply.status(HttpStatus.OK).send(HttpStatusMessage.Ok)
30+
})
31+
2732
done()
2833
}
2934

packages/velog-cron/src/services/PostService/index.ts

Lines changed: 76 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -111,11 +111,17 @@ export class PostService implements Service {
111111
.join('')
112112
.concat(user.profile?.short_bio ?? '', user.profile?.display_name ?? '')
113113

114-
const isSpam = await this.checkIsSpam(post.title ?? '', post.body ?? '', extraText, country)
114+
const isSpam = await this.checkIsSpam(
115+
post.title ?? '',
116+
post.body ?? '',
117+
user.username,
118+
extraText,
119+
country,
120+
)
115121

116122
if (!isSpam) return
117123

118-
this.db.post.update({
124+
await this.db.post.update({
119125
where: {
120126
id: post.id,
121127
},
@@ -130,9 +136,10 @@ export class PostService implements Service {
130136

131137
this.discord.sendMessage('spam', JSON.stringify(message))
132138
}
133-
private async checkIsSpam(
139+
public async checkIsSpam(
134140
title: string,
135141
body: string,
142+
username: string,
136143
extraText: string,
137144
country: string,
138145
): Promise<boolean> {
@@ -144,22 +151,25 @@ export class PostService implements Service {
144151
return true
145152
}
146153

147-
const checkTitle = await this.spamFilter(title!, isForeign, true)
148-
154+
const checkTitle = await this.spamFilter(title!, username, isForeign, true)
149155
if (checkTitle) {
150156
return true
151157
}
152158

153-
const checkBody = await this.spamFilter(body!.concat(extraText), isForeign)
154-
159+
const checkBody = await this.spamFilter(body!.concat(extraText), username, isForeign)
155160
if (checkBody) {
156161
return true
157162
}
158163

159164
return false
160165
}
161166

162-
private async spamFilter(text: string, isForeign: boolean, isTitle = false): Promise<boolean> {
167+
private async spamFilter(
168+
text: string,
169+
username: string,
170+
isForeign: boolean,
171+
isTitle = false,
172+
): Promise<boolean> {
163173
const includesCN = /[\u4e00-\u9fa5]/.test(text)
164174
const includesKR = /[-|-|-]/.test(text)
165175

@@ -171,20 +181,30 @@ export class PostService implements Service {
171181
// replace image markdown
172182
replaced = replaced.replace(/!\[([\s\S]*?)\]\(([\s\S]*?)\)/g, '')
173183

174-
const alphanumericKorean = replaced
175-
.replace(/[^a-zA-Z-0-9 \n]/g, '') // remove non-korean
176-
.toLowerCase()
184+
if (isTitle) {
185+
replaced = replaced.replace(/\s/g, '')
186+
}
177187

178188
const hasLink = /http/.test(replaced)
179189

190+
const phoneRegex = [/\+\d{13}/, /\+\d{11}/]
191+
192+
const containsPhoneNumber = phoneRegex.some((regex) => regex.test(replaced))
193+
194+
if (containsPhoneNumber) {
195+
return true
196+
}
197+
180198
if (!isTitle && isForeign && hasLink) {
181199
const lines = replaced.split('\n').filter((line) => line.trim().length > 1)
182200
const koreanLinesCount = lines.filter((line) => this.hasKorean(line)).length
183201
const confidence = koreanLinesCount / lines.length
184202
return confidence < 0.3
185203
}
186204

187-
const spaceReplaced = alphanumericKorean.replace(/\s/g, '')
205+
const removeDuplicatedWords = Array.from(
206+
new Set(replaced.toLocaleLowerCase().replace(/\s/g, '').split(/\n| /)),
207+
).join(' ')
188208

189209
const oneMonthAgo = subMonths(new Date(), 1)
190210
const bannedKeywords = await this.db.dynamicConfigItem.findMany({
@@ -196,38 +216,17 @@ export class PostService implements Service {
196216
},
197217
})
198218

199-
const removeDuplicated = Array.from(
200-
new Set([text, alphanumericKorean, spaceReplaced].join(',').split(' ')),
201-
)
202-
const keywordsToUpdate: string[] = []
203-
204219
const checkKeyword = bannedKeywords
205220
.map((keyword) => keyword.value)
206221
.some((keyword) => {
207-
if (removeDuplicated.includes(keyword)) {
208-
keywordsToUpdate.push(keyword)
222+
if (removeDuplicatedWords.includes(keyword)) {
223+
this.updateDynmicConfigItem(keyword)
209224
return true
210225
} else {
211226
return false
212227
}
213228
})
214229

215-
if (keywordsToUpdate.length > 0) {
216-
this.db.dynamicConfigItem.updateMany({
217-
where: {
218-
value: {
219-
in: keywordsToUpdate,
220-
},
221-
},
222-
data: {
223-
last_used_at: new Date(),
224-
usage_count: {
225-
increment: 1,
226-
},
227-
},
228-
})
229-
}
230-
231230
if (checkKeyword) {
232231
return true
233232
}
@@ -238,23 +237,55 @@ export class PostService implements Service {
238237
},
239238
})
240239

241-
const score = bannedAltKeywords
242-
.map(({ value }) => value)
243-
.reduce((acc, current) => {
244-
if (alphanumericKorean.includes(current)) {
245-
return acc + 1
246-
}
247-
return acc
248-
}, 0)
240+
let score = 0
249241

250-
if (score >= 2 && isForeign) {
251-
return true
242+
if (hasLink) {
243+
score++
244+
}
245+
246+
const isOnlyNumbers = /^\d+$/.test(username)
247+
if (isOnlyNumbers) {
248+
score++
252249
}
250+
251+
const notAlphanumbericKorean = replaced.replace(/[a-zA-Z-0-9]/g, '') // remove korean
252+
if (notAlphanumbericKorean.length / replaced.length > 0.35) {
253+
score++
254+
}
255+
256+
for (const { value: keyword } of bannedAltKeywords) {
257+
if (removeDuplicatedWords.includes(keyword)) {
258+
this.updateDynmicConfigItem(keyword)
259+
score++
260+
}
261+
262+
if (score >= 2 && isForeign) {
263+
return true
264+
}
265+
266+
if (score >= 3) {
267+
return true
268+
}
269+
}
270+
253271
return false
254272
}
255273
private hasKorean(text: string) {
256274
return /[-]/g.test(text)
257275
}
276+
private async updateDynmicConfigItem(value: string) {
277+
await this.db.dynamicConfigItem.updateMany({
278+
where: {
279+
value,
280+
},
281+
data: {
282+
last_used_at: new Date(),
283+
usage_count: {
284+
increment: 1,
285+
},
286+
},
287+
})
288+
}
258289
}
259290

260291
type FindByUserIdParams = {

0 commit comments

Comments
 (0)