flume 启动后读取中文乱码。设置flume 启动参数 加上java启动环境的参数: Dfile.encoding = UTF-8,然后自定义拦截器,用GBK解码
拦截器代码如下
package com.decode;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.interceptor.Interceptor;
import org.mortbay.jetty.HttpHeaderValues;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.UnsupportedEncodingException;
import java.util.List;
public class CustomInceptors implements Interceptor {
private static final Logger logger = LoggerFactory.getLogger(CustomInceptors.class);
public static String INPUT_CHARSET = "gbk";
@Override
public void initialize() {
}
public CustomInceptors() {
logger.info("==========拦截器开始工作==============");
}
@Override
public Event intercept(Event event) {
byte[] bs = event.getBody();
try {
String gbkstr = new String(bs,INPUT_CHARSET);
String utf8str = new String(gbkstr.getBytes(),"utf-8");
//如果 channel 或者 sink 已经有转utf-8 就用 gbkstr.getBytes()
event.setBody(utf8str.getBytes());
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return event;
}
@Override
public List<Event> intercept(List<Event> events) {
for(Event event : events) {
intercept(event);
}
return events;
}
@Override
public void close() {
}
public static class Builder implements Interceptor.Builder{
@Override
public Interceptor build() {
return new CustomInceptors();
}
@Override
public void configure(Context context) {
CustomInceptors.INPUT_CHARSET = context.getString("inputCharset");
}
}
}
打包后,放入plugins.d/decode/lib 下。或者你放入安装目录的Lib下也可以
该博客主要介绍了在使用Flume时遇到的中文乱码问题及解决方案。通过设置Flume启动参数`Dfile.encoding=UTF-8`并自定义GBK解码的拦截器,成功解决了数据读取过程中的乱码问题。拦截器代码中包含了从GBK到UTF-8的转换逻辑,并提供了如何将拦截器打包和放置到Flume相应目录的步骤。
5608

被折叠的 条评论
为什么被折叠?



