cd $SPARK_3_0_1_HOME /bin . /beeline !connect jdbc:hive2: //cdh-datanode1 :10005 /default # 出现弹框提示输入账号密码信息,由于没有添加鉴权,直接按enter即可 -- 创建测试表 student_info create table default.student_info( id bigint comment '学生id' , name string comment '学生姓名' , birthday string comment '出生日期 yyyy-MM-dd' , class string comment '班级名称 xxyy xx年级 yy班' ) comment '学生信息表' stored as parquet; -- 初始化测算表数据 insert into default.student_info values(1, '黄小明' , '2002-01-09' , '0302' ) ,(2, '王小刚' , '2001-06-03' , '0305' ) ,(3, '董小梅' , '2002-07-03' , '0302' ) ,(4, '杨小宝' , '2002-05-03' , '0301' ) ,(5, '周小龙' , '2002-04-23' , '0302' ) ,(6, '周小龙' , '2002-06-28' , '0304' ) ,(7, '方小玉' , '2000-09-28' , '0204' ); -- 查询插入进去的数据 select id , name, birthday, class from default.student_info; 0: jdbc:hive2: //cdh-datanode1 :10005 /default > select id , name, birthday, class from default.student_info; +-----+-------+-------------+--------+ | id | name | birthday | class | +-----+-------+-------------+--------+ | 4 | 杨小宝 | 2002-05-03 | 0301 | | 5 | 周小龙 | 2002-04-23 | 0302 | | 6 | 周小龙 | 2002-06-28 | 0304 | | 7 | 方小玉 | 2000-09-28 | 0204 | | 1 | 黄小明 | 2002-01-09 | 0302 | | 2 | 王小刚 | 2001-06-03 | 0305 | | 3 | 董小梅 | 2002-07-03 | 0302 | +-----+-------+-------------+--------+ 7 rows selected (0.275 seconds) -- 对测试表 student_info的一些操作 -- a.查看各个班级的人数 select class, count(1) from default.student_info group by class; 0: jdbc:hive2: //cdh-datanode1 :10005 /default > select class, count(1) from default.student_info group by class; +--------+-----------+ | class | count(1) | +--------+-----------+ | 0204 | 1 | | 0305 | 1 | | 0304 | 1 | | 0301 | 1 | | 0302 | 3 | +--------+-----------+ 5 rows selected (0.816 seconds) -- b.使用explain查看 a 的执行计划 explain select class, count(1) from default.student_info group by class; 0: jdbc:hive2: //cdh-datanode1 :10005 /default > explain select class, count(1) from default.student_info group by class; +----------------------------------------------------+ | plan | +----------------------------------------------------+ | == Physical Plan == *(2) HashAggregate(keys=[class #9977], functions=[count(1)]) +- Exchange hashpartitioning(class #9977, 200), true, [id=#15603] +- *(1) HashAggregate(keys=[class #9977], functions=[partial_count(1)]) +- *(1) ColumnarToRow +- FileScan parquet default.student_info[class #9977] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex[hdfs://cdh-namenode1:8020/user/hive/warehouse/student_info], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<class:string> | +----------------------------------------------------+ 1 row selected (0.087 seconds) -- c.查询班级人数最多的班级名称 select class from default.student_info group by class order by count(1) desc limit 1; 0: jdbc:hive2: //cdh-datanode1 :10005 /default > select . . . . . . . . . . . . . . . . . . . . . .> class . . . . . . . . . . . . . . . . . . . . . .> from default.student_info . . . . . . . . . . . . . . . . . . . . . .> group by class . . . . . . . . . . . . . . . . . . . . . .> order by count(1) desc . . . . . . . . . . . . . . . . . . . . . .> limit 1; +--------+ | class | +--------+ | 0302 | +--------+ 1 row selected (0.757 seconds) -- d.查看年龄最小的学生所在的班级 select class from default.student_info order by birthday limit 1; 0: jdbc:hive2: //cdh-datanode1 :10005 /default > select . . . . . . . . . . . . . . . . . . . . . .> class . . . . . . . . . . . . . . . . . . . . . .> from default.student_info . . . . . . . . . . . . . . . . . . . . . .> order by birthday . . . . . . . . . . . . . . . . . . . . . .> limit 1; +--------+ | class | +--------+ | 0204 | +--------+ 1 row selected (0.202 seconds) -- e.查看各个年级的总人数 select count(1) as cnt , substr(class,1,2) as grade from default.student_info group by substr(class,1,2); 0: jdbc:hive2: //cdh-datanode1 :10005 /default > select . . . . . . . . . . . . . . . . . . . . . .> count(1) as cnt . . . . . . . . . . . . . . . . . . . . . .> , substr(class,1,2) as grade . . . . . . . . . . . . . . . . . . . . . .> from default.student_info . . . . . . . . . . . . . . . . . . . . . .> group by substr(class,1,2); +------+--------+ | cnt | grade | +------+--------+ | 6 | 03 | | 1 | 02 | +------+--------+ 2 rows selected (0.752 seconds) -- f.在同一年级中,给每个学生根据出生年月从年纪小到大编号 select substr(class,1,2) as grade , name , birthday , row_number() over(partition by substr(class,1,2) order by birthday desc) as rn from default.student_info; 0: jdbc:hive2: //cdh-datanode1 :10005 /default > select . . . . . . . . . . . . . . . . . . . . . .> substr(class,1,2) as grade . . . . . . . . . . . . . . . . . . . . . .> , name . . . . . . . . . . . . . . . . . . . . . .> , birthday . . . . . . . . . . . . . . . . . . . . . .> , row_number() over(partition by substr(class,1,2) order by birthday desc) as rn . . . . . . . . . . . . . . . . . . . . . .> from default.student_info; +--------+-------+-------------+-----+ | grade | name | birthday | rn | +--------+-------+-------------+-----+ | 03 | 董小梅 | 2002-07-03 | 1 | | 03 | 周小龙 | 2002-06-28 | 2 | | 03 | 杨小宝 | 2002-05-03 | 3 | | 03 | 周小龙 | 2002-04-23 | 4 | | 03 | 黄小明 | 2002-01-09 | 5 | | 03 | 王小刚 | 2001-06-03 | 6 | | 02 | 方小玉 | 2000-09-28 | 1 | +--------+-------+-------------+-----+ 7 rows selected (1.166 seconds) -- g.查询同一姓名学生的信息 select id , name , birthday , class from ( select id , name , birthday , class , count(1) over(partition by name) as cnt from default.student_info ) t where t.cnt > 1 ; 0: jdbc:hive2: //cdh-datanode1 :10005 /default > select . . . . . . . . . . . . . . . . . . . . . .> id . . . . . . . . . . . . . . . . . . . . . .> , name . . . . . . . . . . . . . . . . . . . . . .> , birthday . . . . . . . . . . . . . . . . . . . . . .> , class . . . . . . . . . . . . . . . . . . . . . .> from ( . . . . . . . . . . . . . . . . . . . . . .> select . . . . . . . . . . . . . . . . . . . . . .> id . . . . . . . . . . . . . . . . . . . . . .> , name . . . . . . . . . . . . . . . . . . . . . .> , birthday . . . . . . . . . . . . . . . . . . . . . .> , class . . . . . . . . . . . . . . . . . . . . . .> , count(1) over(partition by name) as cnt . . . . . . . . . . . . . . . . . . . . . .> from default.student_info . . . . . . . . . . . . . . . . . . . . . .> ) t . . . . . . . . . . . . . . . . . . . . . .> where t.cnt > 1 . . . . . . . . . . . . . . . . . . . . . .> ; +-----+-------+-------------+--------+ | id | name | birthday | class | +-----+-------+-------------+--------+ | 5 | 周小龙 | 2002-04-23 | 0302 | | 6 | 周小龙 | 2002-06-28 | 0304 | +-----+-------+-------------+--------+ 2 rows selected (3.426 seconds) -- 退出beeline !quit |