Spark 操作JSON格式数据
scala> val json=spark.read.json("/opt/data/emp.json")
json: org.apache.spark.sql.DataFrame = [comm: string, deptno: bigint ... 6 more fields]
scala> json.show
+----+------+-----+------+----------+---------+----+----+
|comm|deptno|empno| ename| hiredate| job| mgr| sal|
+----+------+-----+------+----------+---------+----+----+
| | 20| 7369| SMITH|1980/12/17| CLERK|7902| 800|
| 300| 30| 7499| ALLEN| 1981/2/20| SALESMAN|7698|1600|
| 500| 30| 7521| WARD| 1981/2/22| SALESMAN|7698|1250|
| | 20| 7566| JONES| 1981/4/2| MANAGER|7839|2975|
|1400| 30| 7654|MARTIN| 1981/9/28| SALESMAN|7698|1250|
| | 30| 7698| BLAKE| 1981/5/1| MANAGER|7839|2850|
| | 10| 7782| CLARK| 1981/6/9| MANAGER|7839|2450|
| | 20| 7788| SCOTT| 1987/4/19| ANALYST|7566|3000|
| | 10| 7839| KING|1981/11/17|PRESIDENT| |5000|
| 0| 30| 7844|TURNER| 1981/9/8| SALESMAN|7698|1500|
| | 20| 7876| ADAMS| 1987/5/23| CLERK|7788|1100|
| | 30| 7900| JAMES| 1981/12/3| CLERK|7698| 950|
| | 20| 7902| FORD| 1981/12/3| ANALYST|7566|3000|
| | 10| 7934|MILLER| 1982/1/23| CLERK|7782|1300|
+----+------+-----+------+----------+---------+----+----+
scala> json.printSchema
root
|-- comm: string (nullable = true)
|-- deptno: long (nullable = true)
|-- empno: long (nullable = true)
|-- ename: string (nullable = true)
|-- hiredate: string (nullable = true)
|-- job: string (nullable = true)
|-- mgr: string (nullable = true)
|-- sal: long (nullable = true)
scala> json.createOrReplaceTempView("emp")
scala> spark.sql("select deptno,count(1) from emp group by deptno").show
+------+--------+
|deptno|count(1)|
+------+--------+
| 10| 3|
| 30| 6|
| 20| 5|
+------+--------+
scala>