当先锋百科网

首页 1 2 3 4 5 6 7

Flink连接Kafka的Scala代码

  • Maven依赖
<dependencies>
        <!--flink模块引入-->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-scala_2.11</artifactId>
             <!--2.11 Scala版本 -->
            <version>1.11.0</version>
             <!--1.11.0 Flink版本 -->
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-scala_2.11</artifactId>
            <version>1.11.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients_2.11</artifactId>
            <version>1.11.0</version>
        </dependency>
        <!--flink模块引入-->
        <!--flink_kafka-->
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-kafka -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.11</artifactId>
            <version>1.11.0</version>
        </dependency>
    </dependencies>
  • Scala 代码
package flink

import java.util.Properties
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.api.common.serialization.SimpleStringSchema
//这个一定要导入,要不然不能隐式转换
import org.apache.flink.streaming.api.scala._

object flink_kafka {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    
    //kafka的配置
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", "10.21.13.181:9092")
    properties.setProperty("zookeeper.connect", "10.21.13.181:2181")
    properties.setProperty("group.id", "consume_id")
    //kafkaSource实例
    val kafkaSource = new FlinkKafkaConsumer[String]("test", new SimpleStringSchema(), properties)
    //创建DataStream
    val tesDS: DataStream[String] = env.addSource(kafkaSource)
    //使用算子计算并输出结果
    tesDS.flatMap(x=>x.split(" ")).map((_,1)).keyBy(0).sum(1).print()
    env.execute("job")
  }
}