转载--Flink sql 按时间分区写入到本地文件,hdfs文件
详情请看:https://www.aboutyun.com/forum.php?mod=viewthread&tid=29104
直接上代码:
已经验证过json paquet 等数据格式写入,在本地磁盘生成了文件。要注意的是要注意导入依赖:
package flinksql; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.source.SourceFunction; import org.apache.flink.table.api.EnvironmentSettings; import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; import java.sql.Timestamp; /** * @author zhangjun 欢迎关注我的公众号[大数据技术与应用实战],获取更多精彩实战内容 * <p> * 流式数据以sql的形式写入file */ public class StreamingWriteFile { public static void main(String[] args) throws Exception{ StreamExecutionEnvironment bsEnv = StreamExecutionEnvironment.getExecutionEnvironment(); EnvironmentSettings bsSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); bsEnv.enableCheckpointing(10000); StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv, bsSettings); DataStream<UserInfo> dataStream = bsEnv.addSource(new MySource()); String sql = "CREATE TABLE fs_table (\n" + " user_id STRING,\n" + " order_amount DOUBLE,\n" + " dt STRING," + " h string," + " m string \n" + ") PARTITIONED BY (dt,h,m) WITH (\n" + " 'connector'='filesystem',\n" + " 'path'='file:///G:\\汪小剑的文件夹\\flink_test\\',\n" + " 'format'='json'\n" + ")"; bsTableEnv.executeSql(sql); bsTableEnv.createTemporaryView("users", dataStream); String insertSql = "insert into fs_table SELECT userId, amount, " + " DATE_FORMAT(ts, 'yyyy-MM-dd'), DATE_FORMAT(ts, 'HH'), DATE_FORMAT(ts, 'mm') FROM users"; bsTableEnv.executeSql(insertSql); } public static class MySource implements SourceFunction<UserInfo>{ String userids[] = { "4760858d-2bec-483c-a535-291de04b2247", "67088699-d4f4-43f2-913c-481bff8a2dc5", "72f7b6a8-e1a9-49b4-9a0b-770c41e01bfb", "dfa27cb6-bd94-4bc0-a90b-f7beeb9faa8b", "aabbaa50-72f4-495c-b3a1-70383ee9d6a4", "3218bbb9-5874-4d37-a82d-3e35e52d1702", "3ebfb9602ac07779||3ebfe9612a007979", "aec20d52-c2eb-4436-b121-c29ad4097f6c", "e7e896cd939685d7||e7e8e6c1930689d7", "a4b1e1db-55ef-4d9d-b9d2-18393c5f59ee" }; @Override public void run(SourceContext<UserInfo> sourceContext) throws Exception{ while (true){ String userid = userids[(int) (Math.random() * (userids.length - 1))]; UserInfo userInfo = new UserInfo(); userInfo.setUserId(userid); userInfo.setAmount(Math.random() * 100); userInfo.setTs(new Timestamp(System.currentTimeMillis())); sourceContext.collect(userInfo); Thread.sleep(100); } } @Override public void cancel(){ } } public static class UserInfo implements java.io.Serializable{ private String userId; private Double amount; private Timestamp ts; public String getUserId(){ return userId; } public void setUserId(String userId){ this.userId = userId; } public Double getAmount(){ return amount; } public void setAmount(Double amount){ this.amount = amount; } public Timestamp getTs(){ return ts; } public void setTs(Timestamp ts){ this.ts = ts; } } }
FlinkSQL 在hive中创建空表
// TODO: 2020/7/13 创建表 String hiveSql = "CREATE external TABLE test_table (\n" + " aaa STRING,\n" + " bbb DOUBLE" + ") partitioned by (dt string,h string,m string) " + "stored as ORC " + "TBLPROPERTIES (\n" + " 'partition.time-extractor.timestamp-pattern'='$dt $h:$m:00',\n" + " 'sink.partition-commit.delay'='0s',\n" + " 'sink.partition-commit.trigger'='partition-time',\n" + " 'sink.partition-commit.policy.kind'='metastore'" + ")"; tEnv.executeSql(hiveSql);