scala> sqlContext res0: org.apache.spark.sql.SQLContext = org.apache.spark.sql.hive.HiveContext@330305d3 scala> val df = sqlContext.load("org.apache.spark.sql.json", Map("path" -> "file:///employee.json")) warning: there were 1 deprecation warning(s); re-run with -deprecation for details df: org.apache.spark.sql.DataFrame = [birth_date: string, department_id: bigint, education_level: string, employee_id: bigint, end_date: string, first_name: string, full_name: string, gender: string, hire_date: string, last_name: string, management_role: string, marital_status: string, position_id: bigint, position_title: string, salary: double, store_id: bigint, supervisor_id: bigint] scala> df.printSchema() root |-- birth_date: string (nullable = true) |-- department_id: long (nullable = true) |-- education_level: string (nullable = true) |-- employee_id: long (nullable = true) |-- end_date: string (nullable = true) |-- first_name: string (nullable = true) |-- full_name: string (nullable = true) |-- gender: string (nullable = true) |-- hire_date: string (nullable = true) |-- last_name: string (nullable = true) |-- management_role: string (nullable = true) |-- marital_status: string (nullable = true) |-- position_id: long (nullable = true) |-- position_title: string (nullable = true) |-- salary: double (nullable = true) |-- store_id: long (nullable = true) |-- supervisor_id: long (nullable = true) scala> df.registerTempTable("employee") scala> val names = sqlContext.sql("select first_name from employee limit 5") names: org.apache.spark.sql.DataFrame = [first_name: string] scala> names.foreach(println) [Sheri] [Derrick] [Michael] [Maya] [Roberta]
Thursday, October 22, 2015
Spark-SQL : how to query json files
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment