Skip to content

Commit c3e7edc

Browse files
committed
Feat: implments spam filter in cron package
1 parent ba38536 commit c3e7edc

File tree

14 files changed

+239
-147
lines changed

14 files changed

+239
-147
lines changed

packages/velog-cron/env/.env.example

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ DATABASE_URL=
33
CRON_API_KEY=
44
REDIS_HOST=
55
DISCORD_BOT_TOKEN=
6-
DISCORD_STATS_CHANNEL=
6+
DISCORD_STATS_CHANNEL=
7+
DISCORD_SPAM_CHANNEL=

packages/velog-cron/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"fastify": "^4.18.0",
4040
"fastify-cron": "^1.3.1",
4141
"fastify-plugin": "^4.5.1",
42+
"geoip-country": "^4.2.68",
4243
"inquirer": "^9.2.7",
4344
"ioredis": "^5.3.2",
4445
"pino-pretty": "^10.0.0",
@@ -50,6 +51,7 @@
5051
"zod": "^3.21.4"
5152
},
5253
"devDependencies": {
54+
"@types/geoip-country": "^4.0.2",
5355
"@types/inquirer": "^9.0.3",
5456
"@types/jest": "^29.5.2",
5557
"@types/node": "^20.5.0",

packages/velog-cron/src/env.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ const env = z.object({
4141
redisHost: z.string(),
4242
discordBotToken: z.string(),
4343
discordStatsChannel: z.string(),
44+
discordSpamChannel: z.string(),
4445
})
4546

4647
export const ENV = env.parse({
@@ -52,4 +53,5 @@ export const ENV = env.parse({
5253
redisHost: process.env.REDIS_HOST,
5354
discordBotToken: process.env.DISCORD_BOT_TOKEN,
5455
discordStatsChannel: process.env.DISCORD_STATS_CHANNEL,
56+
discordSpamChannel: process.env.DISCORD_SPAM_CHANNEL,
5557
})

packages/velog-cron/src/lib/discord/DiscordService.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ export class DiscordService {
4343

4444
const channelMapper: Record<MessageType, string> = {
4545
stats: ENV.discordStatsChannel,
46+
spam: ENV.discordSpamChannel,
4647
}
4748

4849
const channelId = channelMapper[type]
@@ -70,4 +71,4 @@ export class DiscordService {
7071
}
7172
}
7273

73-
type MessageType = 'stats'
74+
type MessageType = 'stats' | 'spam'

packages/velog-cron/src/lib/redis/RedisService.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ export class RedisService extends Redis implements Service {
2828
}
2929
}
3030

31-
get queueName() {
31+
get queueName(): Record<QueueName, string> {
3232
return {
3333
createFeed: 'queue:feed',
34+
checkPostSpam: 'queue:checkPostSpam',
3435
}
3536
}
3637
}
@@ -39,4 +40,10 @@ type GenerateRedisKey = {
3940
trendingWriters: () => string
4041
}
4142

42-
type QueueName = 'createFeed'
43+
type QueueName = 'createFeed' | 'checkPostSpam'
44+
45+
export type CheckPostSpamArgs = {
46+
post_id: string
47+
user_id: string
48+
ip: string
49+
}

packages/velog-cron/src/services/PostService/index.ts

Lines changed: 196 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,25 @@
11
import { DbService } from '@lib/db/DbService.js'
2+
import { CheckPostSpamArgs } from '@lib/redis/RedisService'
23
import { Post, Prisma } from '@prisma/client'
34
import { injectable, singleton } from 'tsyringe'
5+
import geoip from 'geoip-country'
6+
import { subMonths } from 'date-fns'
7+
import { DiscordService } from '@lib/discord/DiscordService'
48

59
interface Service {
610
findById(postId: string): Promise<Post | null>
711
scoreCalculator(postId: string): Promise<void>
12+
checkSpam(args: CheckPostSpamArgs): Promise<void>
813
}
914

1015
@singleton()
1116
@injectable()
1217
export class PostService implements Service {
13-
constructor(private readonly db: DbService) {}
18+
constructor(
19+
private readonly db: DbService,
20+
private readonly discord: DiscordService,
21+
) {}
22+
1423
public async findById(postId: string): Promise<Post | null> {
1524
const post = await this.db.post.findUnique({
1625
where: {
@@ -19,6 +28,7 @@ export class PostService implements Service {
1928
})
2029
return post
2130
}
31+
2232
public async findByUserId({ userId, ...queries }: FindByUserIdParams): Promise<Post[]> {
2333
const { where, ...query } = queries
2434
const posts = await this.db.post.findMany({
@@ -30,6 +40,7 @@ export class PostService implements Service {
3040
})
3141
return posts
3242
}
43+
3344
public async scoreCalculator(postId: string): Promise<void> {
3445
const post = await this.findById(postId)
3546

@@ -59,6 +70,190 @@ export class PostService implements Service {
5970
},
6071
})
6172
}
73+
74+
public async checkSpam({ post_id, user_id, ip }: CheckPostSpamArgs): Promise<void> {
75+
const post = await this.db.post.findUnique({
76+
where: {
77+
id: post_id,
78+
},
79+
include: {
80+
postTags: {
81+
include: {
82+
tag: true,
83+
},
84+
},
85+
},
86+
})
87+
88+
if (!post) {
89+
throw new Error('Not found Post')
90+
}
91+
92+
const user = await this.db.user.findUnique({
93+
where: {
94+
id: user_id,
95+
},
96+
include: {
97+
profile: true,
98+
},
99+
})
100+
101+
if (!user) {
102+
throw new Error('Not found User')
103+
}
104+
105+
const country = geoip.lookup(ip)?.country ?? ''
106+
107+
const extraText = post.postTags
108+
.flatMap((postTag) => postTag.tag)
109+
.map((tag) => tag?.name ?? '')
110+
.join('')
111+
.concat(user.profile?.short_bio ?? '', user.profile?.display_name ?? '')
112+
113+
const isSpam = await this.checkIsSpam(post.title ?? '', post.body ?? '', extraText, country)
114+
115+
if (!isSpam) return
116+
117+
this.db.post.update({
118+
where: {
119+
id: post.id,
120+
},
121+
data: {
122+
is_private: true,
123+
},
124+
})
125+
126+
const message = {
127+
text: `*userId*: ${user_id}\ntitle: ${post.title}, ip: ${ip}, country: ${country} type: isSpam`,
128+
}
129+
130+
this.discord.sendMessage('spam', JSON.stringify(message))
131+
}
132+
private async checkIsSpam(
133+
title: string,
134+
body: string,
135+
extraText: string,
136+
country: string,
137+
): Promise<boolean> {
138+
const allowList = ['KR', 'GB', '']
139+
const blockList = ['IN', 'PK', 'CN', 'VN', 'TH', 'PH']
140+
const isForeign = !allowList.includes(country)
141+
142+
if (blockList.includes(country)) {
143+
return true
144+
}
145+
146+
const checkTitle = await this.spamFilter(title!, isForeign, true)
147+
148+
if (checkTitle) {
149+
return true
150+
}
151+
152+
const checkBody = await this.spamFilter(body!.concat(extraText), isForeign)
153+
154+
if (checkBody) {
155+
return true
156+
}
157+
158+
return false
159+
}
160+
161+
private async spamFilter(text: string, isForeign: boolean, isTitle = false): Promise<boolean> {
162+
const includesCN = /[\u4e00-\u9fa5]/.test(text)
163+
const includesKR = /[-|-|-]/.test(text)
164+
165+
if (includesCN && !includesKR) {
166+
return true
167+
}
168+
169+
let replaced = text.replace(/```([\s\S]*?)```/g, '') // remove code blocks
170+
// replace image markdown
171+
replaced = replaced.replace(/!\[([\s\S]*?)\]\(([\s\S]*?)\)/g, '')
172+
173+
const alphanumericKorean = replaced
174+
.replace(/[^a-zA-Z-0-9 \n]/g, '') // remove non-korean
175+
.toLowerCase()
176+
177+
const hasLink = /http/.test(replaced)
178+
179+
if (!isTitle && isForeign && hasLink) {
180+
const lines = replaced.split('\n').filter((line) => line.trim().length > 1)
181+
const koreanLinesCount = lines.filter((line) => this.hasKorean(line)).length
182+
const confidence = koreanLinesCount / lines.length
183+
return confidence < 0.3
184+
}
185+
186+
const spaceReplaced = alphanumericKorean.replace(/\s/g, '')
187+
188+
const oneMonthAgo = subMonths(new Date(), 1)
189+
const bannedKeywords = await this.db.dynamicConfigItem.findMany({
190+
where: {
191+
type: 'bannedKeyword',
192+
last_used_at: {
193+
gte: oneMonthAgo,
194+
},
195+
},
196+
})
197+
198+
const removeDuplicated = Array.from(
199+
new Set([text, alphanumericKorean, spaceReplaced].join(',').split(' ')),
200+
)
201+
const keywordsToUpdate: string[] = []
202+
203+
const checkKeyword = bannedKeywords
204+
.map((keyword) => keyword.value)
205+
.some((keyword) => {
206+
if (removeDuplicated.includes(keyword)) {
207+
keywordsToUpdate.push(keyword)
208+
return true
209+
} else {
210+
return false
211+
}
212+
})
213+
214+
if (keywordsToUpdate.length > 0) {
215+
this.db.dynamicConfigItem.updateMany({
216+
where: {
217+
value: {
218+
in: keywordsToUpdate,
219+
},
220+
},
221+
data: {
222+
last_used_at: new Date(),
223+
usage_count: {
224+
increment: 1,
225+
},
226+
},
227+
})
228+
}
229+
230+
if (checkKeyword) {
231+
return true
232+
}
233+
234+
const bannedAltKeywords = await this.db.dynamicConfigItem.findMany({
235+
where: {
236+
type: 'bannedAltKeyword',
237+
},
238+
})
239+
240+
const score = bannedAltKeywords
241+
.map(({ value }) => value)
242+
.reduce((acc, current) => {
243+
if (alphanumericKorean.includes(current)) {
244+
return acc + 1
245+
}
246+
return acc
247+
}, 0)
248+
249+
if (score >= 2 && isForeign) {
250+
return true
251+
}
252+
return false
253+
}
254+
private hasKorean(text: string) {
255+
return /[-]/g.test(text)
256+
}
62257
}
63258

64259
type FindByUserIdParams = {

packages/velog-scripts/lib/blockList/BlockListService.mts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,14 @@ export class BlockListService implements Service {
1414
await this.db.dynamicConfigItem.create({
1515
data: {
1616
value: username,
17-
type: 'username',
17+
type: 'blockUsername',
1818
},
1919
})
2020
}
2121
public async readBlockList() {
2222
const blockList = await this.db.dynamicConfigItem.findMany({
2323
where: {
24-
type: 'username',
24+
type: 'blockUsername',
2525
},
2626
})
2727
return blockList.map((item) => item.value)

packages/velog-scripts/scripts/addBannedKeywords.mts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ class Runner {
1010
public async run(bannedKeywords: string[]) {
1111
const data = bannedKeywords.map((value) => ({
1212
value,
13-
type: 'banned',
13+
type: 'bannedKeyword',
1414
}))
1515

1616
try {

packages/velog-server/src/env.ts

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ import { existsSync } from 'fs'
33
import { z } from 'zod'
44
import { fileURLToPath } from 'url'
55
import { dirname, join } from 'path'
6-
import { container } from 'tsyringe'
7-
import { DbService } from '@lib/db/DbService.js'
86

97
type DockerEnv = 'development' | 'stage' | 'production'
108
type AppEnvironment = 'development' | 'production'
@@ -85,13 +83,9 @@ const env = z.object({
8583
discordErrorChannel: z.string(),
8684
discordSpamChannel: z.string(),
8785
turnstileSecretKey: z.string(),
88-
bannedKeywords: z.array(z.string()),
89-
bannedAltKeywords: z.array(z.string()),
9086
graphcdnToken: z.string(),
9187
})
9288

93-
const { bannedKeywords, bannedAltKeywords } = await readEnvFromDatabase()
94-
9589
export const ENV = env.parse({
9690
dockerEnv,
9791
appEnv,
@@ -131,17 +125,5 @@ export const ENV = env.parse({
131125
discordErrorChannel: process.env.DISCORD_ERROR_CHANNEL,
132126
discordSpamChannel: process.env.DISCORD_SPAM_CHANNEL,
133127
turnstileSecretKey: process.env.TURNSTILE_SECRET_KEY,
134-
bannedKeywords: bannedKeywords,
135-
bannedAltKeywords: bannedAltKeywords,
136128
graphcdnToken: process.env.GRAPHCDN_TOKEN,
137129
})
138-
139-
async function readEnvFromDatabase() {
140-
const db = container.resolve(DbService)
141-
const items = await db.dynamicConfigItem.findMany()
142-
143-
return {
144-
bannedKeywords: items.filter((item) => item.type === 'banned').map((item) => item.value),
145-
bannedAltKeywords: items.filter((item) => item.type === 'bannedAlt').map((item) => item.value),
146-
}
147-
}

0 commit comments

Comments
 (0)