package flinkdemo.sinkDemo;
import flinkdemo.sourceDemo.deserialization.ConsumerRecordKafkaDeserializationSchema;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011;
import java.nio.charset.StandardCharsets;
import java.util.*;
import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition;
import org.apache.kafka.clients.consumer.ConsumerRecord;
/**
* @author zhangkai
* @create 2019/12/11
*/
public class SinkToKafka {
public static void main(String[] args) throws Exception {
String topicList = "otter_sms_0";
System.out.println(topicList);
StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment();
// 设置checkpoint
// see.setStateBackend((StateBackend)new RocksDBStateBackend("hdfs://localhost:9000/user/zhangkai/flink-checkpoints"));
// see.setStateBackend((StateBackend)new RocksDBStateBackend("hdfs:///user/zhangkai/flink-checkpoints"));//
// CheckpointConfig checkpointConfig = see.getCheckpointConfig();
// checkpointConfig.enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
// checkpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
// checkpointConfig.setCheckpointInterval(30000);
// checkpointConfig.setMaxConcurrentCheckpoints(3);
// checkpointConfig.setCheckpointTimeout(60000);
Properties props = new Properties();
props.put("bootstrap.servers", "10.2.40.10:9092");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer"); //key 反序列化
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");//value 反序列化
// props.put("auto.offset.reset", "latest");
props.put("group.id","GROUP_ID");
FlinkKafkaConsumer011<ConsumerRecord> topicData = new FlinkKafkaConsumer011<>(Arrays.asList(topicList.split(",")), new ConsumerRecordKafkaDeserializationSchema(), props);
// 从指定时间戳消费
topicData.setStartFromTimestamp(1577433868000l);
// 从指定offset位置开始消费
/**
* Map<KafkaTopicPartition, Long>
* KafkaTopicPartition构造函数有两个参数,第一个为topic名字,第二个为分区数
* Long参数指定的offset位置
* 获取offset信息,可以用过Kafka自带的kafka-consumer-groups.sh脚本获取
*/
Map<KafkaTopicPartition, Long> specificStartupOffsets = new HashMap<>();
specificStartupOffsets.put(new KafkaTopicPartition("otter_sms_0",0),3387036l);
specificStartupOffsets.put(new KafkaTopicPartition("otter_sms_0",1),3182960l);
specificStartupOffsets.put(new KafkaTopicPartition("otter_sms_0",2),2815761l);
specificStartupOffsets.put(new KafkaTopicPartition("otter_sms_0",3),3591033l);
specificStartupOffsets.put(new KafkaTopicPartition("otter_sms_0",4),3346657l);
topicData.setStartFromSpecificOffsets(specificStartupOffsets);
// 从topic的earliest开始消费,也就是从这个topic的最早消息开始消费
topicData.setStartFromEarliest();
// 从topic的latest开始消费,也就是从这个topic的最新消息开始消费
topicData.setStartFromLatest();
// 从topic中指定的group上次消费的位置开始消费,所以必须配置group.id参数
topicData.setStartFromGroupOffsets();
DataStreamSource<ConsumerRecord> consumerRecordDataStreamSource = see.addSource(topicData);
SingleOutputStreamOperator<Object> map = consumerRecordDataStreamSource.map(new MapFunction<ConsumerRecord, Object>() {
@Override
public Object map(ConsumerRecord consumerRecord) throws Exception {
String valueNew = new String((byte[]) consumerRecord.value(), StandardCharsets.UTF_8);
return consumerRecord.partition()+"_"+ consumerRecord.timestamp()+"_"+ new String((byte[])consumerRecord.key())+"_" + consumerRecord.offset();
}
}).setParallelism(1);
see.execute("kafka 2 kafka");
}
}
flink从kafka-topic固定offset消费的5种方式
最新推荐文章于 2026-06-23 09:04:45 发布
本文详细探讨了Flink从Kafka特定offset开始消费数据的五种方法,包括配置初始offset、使用Checkpoint、利用Savepoint、手动设置Consumer Group以及通过连接器属性设置,帮助读者理解如何在不同场景下灵活控制数据处理起点。
909

被折叠的 条评论
为什么被折叠?



