scala> sqlContext
res0: org.apache.spark.sql.SQLContext = org.apache.spark.sql.hive.HiveContext@330305d3
scala> val df = sqlContext.load("org.apache.spark.sql.json", Map("path" -> "file:///employee.json"))
warning: there were 1 deprecation warning(s); re-run with -deprecation for details
df: org.apache.spark.sql.DataFrame = [birth_date: string, department_id: bigint, education_level: string, employee_id: bigint, end_date: string, first_name: string, full_name: string, gender: string, hire_date: string, last_name: string, management_role: string, marital_status: string, position_id: bigint, position_title: string, salary: double, store_id: bigint, supervisor_id: bigint]
scala> df.printSchema()
root
|-- birth_date: string (nullable = true)
|-- department_id: long (nullable = true)
|-- education_level: string (nullable = true)
|-- employee_id: long (nullable = true)
|-- end_date: string (nullable = true)
|-- first_name: string (nullable = true)
|-- full_name: string (nullable = true)
|-- gender: string (nullable = true)
|-- hire_date: string (nullable = true)
|-- last_name: string (nullable = true)
|-- management_role: string (nullable = true)
|-- marital_status: string (nullable = true)
|-- position_id: long (nullable = true)
|-- position_title: string (nullable = true)
|-- salary: double (nullable = true)
|-- store_id: long (nullable = true)
|-- supervisor_id: long (nullable = true)
scala> df.registerTempTable("employee")
scala> val names = sqlContext.sql("select first_name from employee limit 5")
names: org.apache.spark.sql.DataFrame = [first_name: string]
scala> names.foreach(println)
[Sheri]
[Derrick]
[Michael]
[Maya]
[Roberta]
Thursday, October 22, 2015
Spark-SQL : how to query json files
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment