package application;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.time.Duration;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
/**
*多线程消费实现方式,就是在处理消息模块改成多线程的实现方式
* */
public class ThirdMultiConsumerThreadDemo {
public static final String brokers = "localhost:9092";
public static final String topic = "topic-spark";
public static final String groupId = "group-spark";
//每一个处理消息的RecordsHandler类在处理完消息之后都将对应的消费位移保存到共享变量offsets中
public static Map<TopicPartition, OffsetAndMetadata> offsets;
public static Properties initConfig(){
Properties props = new Properties();
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName());
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,brokers);
props.put(ConsumerConfig.GROUP_ID_CONFIG,groupId);
props.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,true);
return props;
}
public static void main(String[] args) {
Properties props = initConfig();
KafkaConsumerThread comsumerThread = new KafkaConsumerThread(
props,
topic,
Runtime.getRuntime().availableProcessors());
comsumerThread.start();
}
public static class KafkaConsumerThread extends Thread{
private KafkaConsumer<String,String> kafkaConsumer;
private ExecutorService executorService;
private int threadNumber;
public KafkaConsumerThread(Properties props, String topic, int threadNumber){
kafkaConsumer = new KafkaConsumer<String, String>(props);
kafkaConsumer.subscribe(Collections.singletonList(topic));
this.threadNumber = threadNumber;
executorService = new ThreadPoolExecutor(
threadNumber,
threadNumber,
0L,
TimeUnit.MICROSECONDS,
new ArrayBlockingQueue<>(100),
/**CallerRunsPolicy():这个参数可以防止线程池的总体消费能力跟不上poll()拉去的能力,从而导致异常现象的发生。*/
new ThreadPoolExecutor.CallerRunsPolicy()
);
}
public void run(){
try{
while (true){
ConsumerRecords<String,String> records = kafkaConsumer.poll(Duration.ofMinutes(100));
if(!records.isEmpty()){
executorService.submit(new RecordsHandler(records));
}
//对应的位移提交实现
synchronized (offsets){
if(!offsets.isEmpty()){
kafkaConsumer.commitSync(offsets);
offsets.clear();
}
}
}
}catch (Exception e){
e.printStackTrace();
}finally {
kafkaConsumer.close();
}
}
}
public static class RecordsHandler extends Thread{
public final ConsumerRecords<String,String> records ;
public RecordsHandler(ConsumerRecords<String,String> records){
this.records = records;
}
public void run(){
for(TopicPartition tp: records.partitions()){
List<ConsumerRecord<String,String>> tpRecords = records.records(tp);
//业务处理:tpRecords
long lastConsumerOffset = tpRecords.get(tpRecords.size() -1).offset();
//对offsets加锁,防止出现并发问题
synchronized (offsets){
if(!offsets.containsKey(tp)){
offsets.put(tp,new OffsetAndMetadata(lastConsumerOffset +1));
}else {
long position = offsets.get(tp).offset();
if(position<lastConsumerOffset+1){
offsets.put(tp,new OffsetAndMetadata(lastConsumerOffset+1));
}
}
}
}
}
}
}
该代码未解决异常情况下的位移覆盖问题。
本文介绍了一种使用多线程处理Kafka消息的消费者实现方式,通过自定义KafkaConsumerThread类和RecordsHandler类,实现了消息的并行处理和消费位移的同步更新。代码示例展示了如何配置消费者属性,创建线程池,以及处理消费记录。
2152

被折叠的 条评论
为什么被折叠?



