Monday, May 8, 2017

oozie spark shell action example

workflow dir @hdfs

 hadoop fs -ls /tmp/sparkOozieShellAction/
Found 4 items
-rw-r--r--   3 oozie hdfs        178 2017-05-08 07:00 /tmp/sparkOozieShellAction/job.properties
drwxr-xr-x   - oozie hdfs          0 2017-05-08 07:01 /tmp/sparkOozieShellAction/lib
-rw-r--r--   3 oozie hdfs        279 2017-05-08 07:12 /tmp/sparkOozieShellAction/spark-pi-job.sh
-rw-r--r--   3 oozie hdfs        712 2017-05-08 07:34 /tmp/sparkOozieShellAction/workflow.xml

oozie- spark share lib

[oozie@rk253 ~]$ hadoop fs -ls /user/oozie/share/lib/lib_20170508043956/spark
Found 8 items
-rw-r--r--   3 oozie hdfs     339666 2017-05-08 04:42 /user/oozie/share/lib/lib_20170508043956/spark/datanucleus-api-jdo-3.2.6.jar
-rw-r--r--   3 oozie hdfs    1890075 2017-05-08 04:42 /user/oozie/share/lib/lib_20170508043956/spark/datanucleus-core-3.2.10.jar
-rw-r--r--   3 oozie hdfs    1809447 2017-05-08 04:42 /user/oozie/share/lib/lib_20170508043956/spark/datanucleus-rdbms-3.2.9.jar
-rw-r--r--   3 oozie hdfs        167 2017-05-08 04:42 /user/oozie/share/lib/lib_20170508043956/spark/hive-site.xml
-rw-r--r--   3 oozie hdfs      22440 2017-05-08 04:42 /user/oozie/share/lib/lib_20170508043956/spark/oozie-sharelib-spark-4.2.0.2.5.3.0-37.jar
-rw-r--r--   3 oozie hdfs      44846 2017-05-08 04:42 /user/oozie/share/lib/lib_20170508043956/spark/py4j-0.9-src.zip
-rw-r--r--   3 oozie hdfs     357563 2017-05-08 04:42 /user/oozie/share/lib/lib_20170508043956/spark/pyspark.zip
-rw-r--r--   3 oozie hdfs  188897932 2017-05-08 04:42 /user/oozie/share/lib/lib_20170508043956/spark/spark-assembly-1.6.2.2.5.3.0-37-hadoop2.7.3.2.5.3.0-37.jar

job.properties

[oozie@rk253 ~]$ cat job.properties 
nameNode= hdfs://rk253.openstack:8020 
jobTracker= rk253.openstack:8050 
oozie.wf.application.path=/tmp/sparkOozieShellAction/ 
oozie.use.system.libpath=true 

workflow.xml

[oozie@rk253 ~]$ cat job.properties 
nameNode= hdfs://rk253.openstack:8020 
jobTracker= rk253.openstack:8050 
oozie.wf.application.path=/tmp/sparkOozieShellAction/ 
oozie.use.system.libpath=true 
master=yarn-client
[oozie@rk253 ~]$ cat workflow.xml 
<workflow-app name="WorkFlowForShellAction" xmlns="uri:oozie:workflow:0.4">
    <start to="shellAction"/>
    <action name="shellAction">
        <shell xmlns="uri:oozie:shell-action:0.2">
            <job-tracker>${jobTracker}</job-tracker>
            <name-node>${nameNode}</name-node>
            <exec>spark-pi-job.sh
            </exec>
            <env-var>HADOOP_USER_NAME=${wf:user()}</env-var>
		<file>/tmp/sparkOozieShellAction/spark-pi-job.sh#spark-pi-job.sh</file>
	    <capture-output/>
        </shell>
    <ok to="end"/>
    <error to="killAction"/>
    </action>
    <kill name="killAction">
        <message>"Killed job due to error"</message>
    </kill>
    <end name="end"/>
</workflow-app>

spark-pi-job.sh

[oozie@rk253 ~]$ cat spark-pi-job.sh 
/usr/hdp/2.5.3.0-37/spark/bin/spark-submit --class org.apache.spark.examples.SparkPi --master yarn-client --num-executors 3 --driver-memory 512m --executor-memory 512m --executor-cores 1 /usr/hdp/2.5.3.0-37/spark/lib/spark-examples-1.6.2.2.5.3.0-37-hadoop2.7.3.2.5.3.0-37.jar 10

run oozie job

oozie job -oozie http://rk253:11000/oozie/ -config job.properties -run 

No comments: