前言

  • 技术栈
Windows 10
JDK     1.8
Flink   1.18.1

WordCount 批处理

package qbit.example;

import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.connector.file.src.FileSource;
import org.apache.flink.connector.file.src.reader.TextLineInputFormat;
import org.apache.flink.core.fs.Path;
import org.apache.flink.util.Collector;

public class WordCountBatch {

    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setRuntimeMode(RuntimeExecutionMode.BATCH);

        final FileSource<String> source = FileSource.forRecordStreamFormat(new TextLineInputFormat(), new Path("K:/tmp/")).build();
        final DataStream<String> stream = env.fromSource(source, WatermarkStrategy.noWatermarks(), "file-source");

        DataStream<Tuple2<String, Integer>> dataStream = stream
                .flatMap(new Splitter())
                .keyBy(value -> value.f0)
                .sum(1);
        dataStream.print();
        env.execute("WordCountBatch");
    }

    public static class Splitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
        public void flatMap(String sentence, Collector<Tuple2<String, Integer>> out) throws Exception {
            for (String word : sentence.split(" ")) {
                out.collect(new Tuple2<String, Integer>(word, 1));
            }
        }
    }

}
  • 运行结果
(flink,4)
(hello,8)
(spark,3)
(python,1)

WordCount 流处理

package qbit.example;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;

public class WordCountStream {
    public static void main(String[] args) throws Exception {
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream<Tuple2<String, Integer>> dataStream = env
                .socketTextStream("127.0.0.1", 9999)
                .flatMap(new Splitter())
                .keyBy(value -> value.f0)            
                .window(SlidingProcessingTimeWindows.of(Time.seconds(60), Time.seconds(1)))
                .sum(1);

        dataStream.print();
        env.execute("WordCountStream");
    }

    public static class Splitter implements FlatMapFunction<String, Tuple2<String, Integer>> {
        public void flatMap(String sentence, Collector<Tuple2<String, Integer>> out) throws Exception {
            for (String word: sentence.split(" ")) {
                out.collect(new Tuple2<String, Integer>(word, 1));
            }
        }
    }

}
  • 用 nc 创建服务端,并输入数据
$ nc -lk 9999
a
a
a
b
c
b
  • 运行结果
3> (a,2)
3> (a,3)
3> (a,3)
1> (b,1)
2> (c,1)
3> (a,3)
2> (c,1)
1> (b,2)

pom.xml

<project>
    <name>test_flink</name>
    <modelVersion>4.0.0</modelVersion>
    <version>0.1</version>
    <groupId>cn.qbit</groupId>
    <artifactId>test_flink</artifactId>
    <packaging>jar</packaging>
    <description>test flink</description>

    <properties>
        <java.version>1.8</java.version>
        <flink.version>1.18.1</flink.version>
        <maven.compiler.source>${java.version}</maven.compiler.source>
        <maven.compiler.target>${java.version}</maven.compiler.target>
        <maven.compiler.encoding>UTF-8</maven.compiler.encoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <dependencies>
        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-java -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-streaming-java</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
            <exclusions>
                <exclusion>
                    <artifactId>chill-java</artifactId>
                    <groupId>com.twitter</groupId>
                </exclusion>
            </exclusions>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-clients -->
        <!-- 从 Flink 1.11 开始,flink-streaming-java 不再直接依赖于 flink-clients -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-clients</artifactId>
            <version>${flink.version}</version>
        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-connector-files -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-files</artifactId>
            <version>${flink.version}</version>
            <scope>provided</scope>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>${java.version}</source>
                    <target>${java.version}</target>
                </configuration>
            </plugin>
        </plugins>
    </build>
</project>
本文出自 qbit snap

qbit
271 声望279 粉丝