azkaban-aplcache

Move jobtype module to the main AZ repo (#1717) This PR is a …

4/6/2018 3:47:36 PM

followup of #1712. We move jobtype module from azkaban-plugins to the main AZ repo, including their original tests.

Liang Tang

Commit: 985059c

Tree: 966c062

Parents: bb24a72

Changes

az-hadoop-jobtype-plugin/build.gradle 62(+62 -0)

az-hadoop-jobtype-plugin/src/examples/command-hello/command.job 5(+5 -0)

az-hadoop-jobtype-plugin/src/examples/hive-wc/hive-demo.job 7(+7 -0)

az-hadoop-jobtype-plugin/src/examples/hive-wc/res/input 224(+224 -0)

az-hadoop-jobtype-plugin/src/examples/hive-wc/scripts/hive-wc.hql 8(+8 -0)

az-hadoop-jobtype-plugin/src/examples/java-wc/lib/wordcountjava.jar 0(+0 -0)

az-hadoop-jobtype-plugin/src/examples/java-wc/pig-upload.job 4(+4 -0)

az-hadoop-jobtype-plugin/src/examples/java-wc/res/rpfarewell 340(+340 -0)

az-hadoop-jobtype-plugin/src/examples/java-wc/src/upload.pig 4(+4 -0)

az-hadoop-jobtype-plugin/src/examples/java-wc/wc.properties 8(+8 -0)

az-hadoop-jobtype-plugin/src/examples/java-wc/wordcount-java.job 13(+13 -0)

az-hadoop-jobtype-plugin/src/examples/pig-wc/README 1(+1 -0)

az-hadoop-jobtype-plugin/src/examples/pig-wc/res/rpfarewell 340(+340 -0)

az-hadoop-jobtype-plugin/src/examples/pig-wc/wordcountpig.job 12(+12 -0)

az-hadoop-jobtype-plugin/src/jobtypes/common.properties 8(+8 -0)

az-hadoop-jobtype-plugin/src/jobtypes/commonprivate.properties 28(+28 -0)

az-hadoop-jobtype-plugin/src/jobtypes/gobblin/plugin.properties 4(+4 -0)

az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/common.properties 25(+25 -0)

az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/hdfsToMysql.properties 26(+26 -0)

az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/mysqlToHdfs.properties 46(+46 -0)

az-hadoop-jobtype-plugin/src/jobtypes/gobblin/private.properties 9(+9 -0)

az-hadoop-jobtype-plugin/src/jobtypes/hadoopJava/plugin.properties 1(+1 -0)

az-hadoop-jobtype-plugin/src/jobtypes/hadoopJava/private.properties 1(+1 -0)

az-hadoop-jobtype-plugin/src/jobtypes/hadoopShell/plugin.properties 1(+1 -0)

az-hadoop-jobtype-plugin/src/jobtypes/hadoopShell/private.properties 3(+3 -0)

az-hadoop-jobtype-plugin/src/jobtypes/hive/plugin.properties 6(+6 -0)

az-hadoop-jobtype-plugin/src/jobtypes/hive/private.properties 10(+10 -0)

az-hadoop-jobtype-plugin/src/jobtypes/java/private.properties 2(+2 -0)

az-hadoop-jobtype-plugin/src/jobtypes/pig-0.11.0/plugin.properties 4(+4 -0)

az-hadoop-jobtype-plugin/src/jobtypes/pig-0.11.0/private.properties 10(+10 -0)

az-hadoop-jobtype-plugin/src/jobtypes/pig-0.12.0/plugin.properties 4(+4 -0)

az-hadoop-jobtype-plugin/src/jobtypes/pig-0.12.0/private.properties 7(+7 -0)

az-hadoop-jobtype-plugin/src/jobtypes/spark/plugin.properties 1(+1 -0)

az-hadoop-jobtype-plugin/src/jobtypes/spark/private.properties 3(+3 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/AzkabanPigListener.java 323(+323 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinConstants.java 28(+28 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinHadoopJob.java 218(+218 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinPresets.java 53(+53 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/HdfsToMySqlValidator.java 21(+21 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/IPropertiesValidator.java 32(+32 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/MySqlToHdfsValidator.java 44(+44 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/examples/java/WordCount.java 133(+133 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopConfigurationInjector.java 209(+209 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopHiveJob.java 297(+297 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJavaJob.java 271(+271 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJavaJobRunnerMain.java 427(+427 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJobUtils.java 596(+596 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopPigJob.java 393(+393 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureHiveWrapper.java 284(+284 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecurePigWrapper.java 175(+175 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureSparkWrapper.java 450(+450 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureWrapperUtils.java 156(+156 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopShell.java 146(+146 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSparkJob.java 693(+693 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/AddExternalPartitionHQL.java 44(+44 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/AlterTableLocationQL.java 33(+33 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/Constants.java 24(+24 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/DropAllPartitionsAddLatest.java 160(+160 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/DropPartitionHQL.java 41(+41 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/ExecuteHiveQuery.java 200(+200 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/HQL.java 21(+21 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/UpdateTableLocationToLatest.java 135(+135 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/useDatabaseHQL.java 30(+30 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/Utils.java 55(+55 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveAction.java 21(+21 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveViaAzkaban.java 70(+70 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveViaAzkabanException.java 33(+33 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/Utils.java 52(+52 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveMetaStoreBrowserException.java 36(+36 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveModule.java 47(+47 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryException.java 49(+49 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutionException.java 53(+53 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutor.java 56(+56 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutorModule.java 74(+74 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveUtils.java 80(+80 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/RealHiveQueryExecutor.java 132(+132 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/ResultSchema.java 48(+48 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/AzkabanJobPropertyDescription.java 30(+30 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/AzkHiveAction.java 24(+24 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/IntendedAudience.java 28(+28 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JavaJob.java 165(+165 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JavaJobRunnerMain.java 360(+360 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/AbstractHadoopJob.java 340(+340 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/FileUtils.java 86(+86 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/HadoopUtils.java 139(+139 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/JobUtils.java 17(+17 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/ValidationUtils.java 88(+88 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/Whitelist.java 122(+122 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JobDagNode.java 140(+140 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/MapReduceJobState.java 301(+301 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigIoStats.java 87(+87 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigJobDagNode.java 137(+137 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigJobStats.java 328(+328 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/PigProcessJob.java 258(+258 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/SecurePigWrapper.java 102(+102 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/SparkJobArg.java 59(+59 -0)

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/StatsUtils.java 147(+147 -0)

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/connectors/gobblin/TestGobblinHadoopJob.java 38(+38 -0)

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestFileUtils.java 92(+92 -0)

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsAdditionalNamenodes.java 23(+23 -0)

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsExecutionJar.java 117(+117 -0)

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsFilterCommands.java 71(+71 -0)

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsFindApplicationIdFromLog.java 121(+121 -0)

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsResolveJarSpec.java 55(+55 -0)

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopSecureHiveWrapper.java 23(+23 -0)

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestValidationUtils.java 41(+41 -0)

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestWhitelist.java 117(+117 -0)

az-jobtype-plugin/src/examples/pig-wc/src/wordcountpig.pig 10(+10 -0)

azkaban-hadoop-security-plugin/src/main/java/azkaban/security/commons/HadoopSecurityManager.java 1(+1 -0)

build.gradle 4(+3 -1)

settings.gradle 2(+1 -1)

Details

az-hadoop-jobtype-plugin/build.gradle 62(+62 -0)

diff --git a/az-hadoop-jobtype-plugin/build.gradle b/az-hadoop-jobtype-plugin/build.gradle
new file mode 100644
index 0000000..b716b42
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/build.gradle
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2018 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+apply plugin: 'distribution'
+
+dependencies {
+    compile project(':az-core')
+    compile project(":azkaban-common")
+    compile project(":azkaban-hadoop-security-plugin")
+
+    compile deps.hadoopCommon
+    compile deps.hadoopMRClientCommon
+    compile deps.hadoopMRClientCore
+    compile(deps.hiveCli) {
+        transitive = false
+    }
+    compile("org.apache.hive:hive-metastore:0.12.0") {
+        force = true
+    }
+    compile("org.apache.hive:hive-shims:0.12.0") {
+        force = true
+    }
+    compile(deps.hiveExecCore) {
+        exclude group: 'org.pentaho', module: 'pentaho-aggdesigner-algorithm'
+        exclude group: 'eigenbase', module: 'eigenbase-properties'
+    }
+    compile(deps.pig) {
+        transitive = false
+    }
+    compile deps.sparkCore
+    compile deps.commonsCli
+}
+
+/**
+ * TODO spyne: remove after fixing internal build.
+ *
+ * Just package the jar.
+ * Since, rest of the dependencies are just hadoop and hive. They are not packaged inside the plugin.
+ * It is assumed that classpaths of hadoop, hive, pig, etc will be externally fed into the application.
+ */
+distributions {
+    main {
+        contents {
+            from(jar) {
+                into 'lib'
+            }
+        }
+    }
+}

az-hadoop-jobtype-plugin/src/examples/command-hello/command.job 5(+5 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/command-hello/command.job b/az-hadoop-jobtype-plugin/src/examples/command-hello/command.job
new file mode 100644
index 0000000..fe4d76a
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/command-hello/command.job
@@ -0,0 +1,5 @@
+type=command
+command=echo "hello"
+command.1=echo "This is how one runs a command job"
+command.2=whoami
+

az-hadoop-jobtype-plugin/src/examples/hive-wc/hive-demo.job 7(+7 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/hive-wc/hive-demo.job b/az-hadoop-jobtype-plugin/src/examples/hive-wc/hive-demo.job
new file mode 100644
index 0000000..a0e1e46
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/hive-wc/hive-demo.job
@@ -0,0 +1,7 @@
+type=hive
+user.to.proxy=azkaban
+
+hive.script=scripts/hive-wc.hql
+
+
+

az-hadoop-jobtype-plugin/src/examples/hive-wc/res/input 224(+224 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/hive-wc/res/input b/az-hadoop-jobtype-plugin/src/examples/hive-wc/res/input
new file mode 100644
index 0000000..748d6c2
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/hive-wc/res/input
@@ -0,0 +1,224 @@
+11	and
+10	the
+6	to
+6	in
+6	of
+6	is
+5	CLAUDIUS
+5	KING
+5	this
+5	we
+5	what
+4	us
+4	GUILDENSTERN
+4	And
+4	d
+4	ROSENCRANTZ
+4	a
+4	his
+4	QUEEN
+4	he
+4	O
+4	GERTRUDE
+3	To
+3	s
+3	him
+3	Exeunt
+3	Gertrude
+3	fit
+3	deed
+3	out
+3	both
+3	I
+3	our
+3	hath
+3	with
+3	you
+2	understand
+2	them
+2	these
+2	was
+2	will
+2	some
+2	But
+2	Hamlet
+2	It
+2	The
+2	We
+2	Where
+2	all
+2	away
+2	be
+2	been
+2	body
+2	come
+2	done
+2	A
+2	from
+2	full
+2	good
+2	had
+2	have
+2	it
+2	let
+2	like
+2	madness
+2	man
+2	must
+2	on
+2	rat
+2	shall
+2	so
+1	pith
+1	base
+1	place
+1	pray
+1	bloody
+1	profound
+1	Ho
+1	brainish
+1	bring
+1	but
+1	call
+1	castle
+1	chapel
+1	closet
+1	providence
+1	contend
+1	countenance
+1	cries
+1	His
+1	Guildenstern
+1	discord
+1	disease
+1	dismay
+1	divulging
+1	do
+1	does
+1	pure
+1	dragg
+1	draw
+1	enter
+1	er
+1	every
+1	excuse
+1	rapier
+1	feed
+1	Go
+1	for
+1	foul
+1	friends
+1	slain
+1	restrain
+1	further
+1	go
+1	gone
+1	room
+1	Ah
+1	haste
+1	Friends
+1	haunt
+1	sea
+1	Even
+1	hearing
+1	heaves
+1	heavy
+1	hence
+1	Enter
+1	Come
+1	how
+1	Both
+1	Bestow
+1	seek
+1	itself
+1	join
+1	keep
+1	kept
+1	kill
+1	kills
+1	know
+1	laid
+1	lawless
+1	seen
+1	liberty
+1	fair
+1	little
+1	ll
+1	lord
+1	love
+1	mad
+1	ship
+1	majesty
+1	short
+1	matter
+1	mean
+1	metals
+1	mightier
+1	mineral
+1	most
+1	mother
+1	mountains
+1	much
+1	sighs
+1	my
+1	night
+1	no
+1	not
+1	Behind
+1	old
+1	skill
+1	one
+1	Polonius
+1	My
+1	Mad
+1	Re
+1	SCENE
+1	Should
+1	Shows
+1	ore
+1	There
+1	This
+1	Life
+1	Among
+1	What
+1	Alas
+1	Which
+1	Whips
+1	You
+1	Into
+1	aid
+1	owner
+1	How
+1	answer
+1	apart
+1	apprehension
+1	arras
+1	as
+1	something
+1	son
+1	sooner
+1	soul
+1	speak
+1	stir
+1	sun
+1	there
+1	threats
+1	tis
+1	touch
+1	translate
+1	unseen
+1	untimely
+1	up
+1	very
+1	vile
+1	weeps
+1	when
+1	while
+1	whom
+1	whose
+1	wind
+1	wisest
+1	would
+1	young
+1	your
+1	yourself

az-hadoop-jobtype-plugin/src/examples/hive-wc/scripts/hive-wc.hql 8(+8 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/hive-wc/scripts/hive-wc.hql b/az-hadoop-jobtype-plugin/src/examples/hive-wc/scripts/hive-wc.hql
new file mode 100644
index 0000000..fb9c956
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/hive-wc/scripts/hive-wc.hql
@@ -0,0 +1,8 @@
+drop table words;
+create table words (freq int, word string) row format delimited fields terminated by '\t' stored as textfile;
+describe words;
+load data local inpath "res/input" into table words;
+select * from words limit 10;
+select freq, count(1) as f2 from words group by freq sort by f2 desc limit 10;
+
+

az-hadoop-jobtype-plugin/src/examples/java-wc/lib/wordcountjava.jar 0(+0 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/java-wc/lib/wordcountjava.jar b/az-hadoop-jobtype-plugin/src/examples/java-wc/lib/wordcountjava.jar
new file mode 100644
index 0000000..d173516
Binary files /dev/null and b/az-hadoop-jobtype-plugin/src/examples/java-wc/lib/wordcountjava.jar differ

az-hadoop-jobtype-plugin/src/examples/java-wc/pig-upload.job 4(+4 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/java-wc/pig-upload.job b/az-hadoop-jobtype-plugin/src/examples/java-wc/pig-upload.job
new file mode 100644
index 0000000..cc7bf55
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/java-wc/pig-upload.job
@@ -0,0 +1,4 @@
+type=pig
+pig.script=src/upload.pig
+
+

az-hadoop-jobtype-plugin/src/examples/java-wc/res/rpfarewell 340(+340 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/java-wc/res/rpfarewell b/az-hadoop-jobtype-plugin/src/examples/java-wc/res/rpfarewell
new file mode 100644
index 0000000..84df4be
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/java-wc/res/rpfarewell
@@ -0,0 +1,340 @@
+Farewell to Congress
+
+This may well be the last time I speak on the House Floor.  At the end of the year I’ll leave Congress after 23 years in office over a 36 year period.  My goals in 1976 were the same as they are today:  promote peace and prosperity by a strict adherence to the principles of individual liberty.
+
+It was my opinion, that the course the U.S. embarked on in the latter part of the 20th Century would bring us a major financial crisis and engulf us in a foreign policy that would overextend us and undermine our national security.
+
+To achieve the goals I sought, government would have had to shrink in size and scope, reduce spending, change the monetary system, and reject the unsustainable costs of policing the world and expanding the American Empire.
+
+The problems seemed to be overwhelming and impossible to solve, yet from my view point, just following the constraints placed on the federal government by the Constitution would have been a good place to start.
+
+ 
+
+How Much Did I Accomplish?
+
+In many ways, according to conventional wisdom, my off-and-on career in Congress, from 1976 to 2012, accomplished very little.  No named legislation, no named federal buildings or highways—thank goodness.  In spite of my efforts, the government has grown exponentially, taxes remain excessive, and the prolific increase of incomprehensible regulations continues.  Wars are constant and pursued without Congressional declaration, deficits rise to the sky, poverty is rampant and dependency on the federal government is now worse than any time in our history.
+
+All this with minimal concerns for the deficits and unfunded liabilities that common sense tells us cannot go on much longer.  A grand, but never mentioned, bipartisan agreement allows for the well-kept secret that keeps the spending going.  One side doesn’t give up one penny on military spending, the other side doesn’t give up one penny on welfare spending, while both sides support the bailouts and subsidies for the banking and  corporate elite.  And the spending continues as the economy weakens and the downward spiral continues.   As the government continues fiddling around, our liberties and our wealth burn in the flames of a foreign policy that makes us less safe.
+
+The major stumbling block to real change in Washington is the total resistance to admitting that the country is broke. This has made compromising, just to agree to increase spending, inevitable since neither side has any intention of cutting spending.
+
+The country and the Congress will remain divisive since there’s no “loot left to divvy up.”
+
+Without this recognition the spenders in Washington will continue the march toward a fiscal cliff much bigger than the one anticipated this coming January.
+
+I have thought a lot about why those of us who believe in liberty, as a solution, have done so poorly in convincing others of its benefits.  If liberty is what we claim it is- the principle that protects all personal, social and economic decisions necessary for maximum prosperity and the best chance for peace- it should be an easy sell.  Yet, history has shown that the masses have been quite receptive to the promises of authoritarians which are rarely if ever fulfilled.
+
+ 
+
+Authoritarianism vs. Liberty
+
+If authoritarianism leads to poverty and war and less freedom for all individuals and is controlled by rich special interests, the people should be begging for liberty.  There certainly was a strong enough sentiment for more freedom at the time of our founding that motivated those who were willing to fight in the revolution against the powerful British government.
+
+During my time in Congress the appetite for liberty has been quite weak; the understanding of its significance negligible.  Yet the good news is that compared to 1976 when I first came to Congress, the desire for more freedom and less government in 2012 is much greater and growing, especially in grassroots America. Tens of thousands of teenagers and college age students are, with great enthusiasm, welcoming the message of liberty.
+
+I have a few thoughts as to why the people of a country like ours, once the freest and most prosperous, allowed the conditions to deteriorate to the degree that they have.
+
+Freedom, private property, and enforceable voluntary contracts, generate wealth.  In our early history we were very much aware of this.  But in the early part of the 20th century our politicians promoted the notion that the tax and monetary systems had to change if we were to involve ourselves in excessive domestic and military spending. That is why Congress gave us the Federal Reserve and the income tax.  The majority of Americans and many government officials agreed that sacrificing some liberty was necessary to carry out what some claimed to be “progressive” ideas. Pure democracy became acceptable.
+
+They failed to recognized that what they were doing was exactly opposite of what the colonists were seeking when they broke away from the British.
+
+Some complain that my arguments makes no sense, since great wealth and the standard of living improved  for many Americans over the last 100 years, even with these new policies.
+
+But the damage to the market economy, and the currency, has been insidious and steady.  It took a long time to consume our wealth, destroy the currency and undermine productivity and get our financial obligations to a point of no return. Confidence sometimes lasts longer than deserved. Most of our wealth today depends on debt.
+
+The wealth that we enjoyed and seemed to be endless, allowed concern for the principle of a free society to be neglected.  As long as most people believed the material abundance would last forever, worrying about protecting a competitive productive economy and individual liberty seemed unnecessary.
+
+ 
+
+The Age of Redistribution
+
+This neglect ushered in an age of redistribution of wealth by government kowtowing to any and all special interests, except for those who just wanted to left alone.  That is why today money in politics far surpasses money currently going into research and development and productive entrepreneurial efforts.
+
+The material benefits became more important than the understanding and promoting the principles of liberty and a free market.  It is good that material abundance is a result of liberty but if materialism is all that we care about, problems are guaranteed.
+
+The crisis arrived because the illusion that wealth and prosperity would last forever has ended. Since it was based on debt and a pretense that debt can be papered over by an out-of-control fiat monetary system, it was doomed to fail.  We have ended up with a system that doesn’t produce enough even to finance the debt and no fundamental understanding of why a free society is crucial to reversing these trends.
+
+If this is not recognized, the recovery will linger for a long time.  Bigger government, more spending, more debt, more poverty for the middle class, and a more intense scramble by the elite special interests will continue.
+
+ 
+
+We Need an Intellectual Awakening
+
+Without an intellectual awakening, the turning point will be driven by economic law.  A dollar crisis will bring the current out-of-control system to its knees.
+
+If it’s not accepted that big government, fiat money, ignoring liberty, central economic planning, welfarism, and warfarism caused our crisis we can expect a continuous and dangerous march toward corporatism and even fascism with even more loss of our liberties.  Prosperity for a large middle class though will become an abstract dream.
+
+This continuous move is no different than what we have seen in how our financial crisis of 2008 was handled.  Congress first directed, with bipartisan support, bailouts for the wealthy.  Then it was the Federal Reserve with its endless quantitative easing. If at first it doesn’t succeed try again; QE1, QE2, and QE3 and with no results we try QE indefinitely—that is until it too fails.  There’s a cost to all of this and let me assure you delaying the payment is no longer an option.  The rules of the market will extract its pound of flesh and it won’t be pretty.
+
+The current crisis elicits a lot of pessimism.  And the pessimism adds to less confidence in the future.  The two feed on themselves, making our situation worse.
+
+If the underlying cause of the crisis is not understood we cannot solve our problems. The issues of warfare, welfare, deficits, inflationism, corporatism, bailouts and authoritarianism cannot be ignored.  By only expanding these policies we cannot expect good results.
+
+Everyone claims support for freedom.  But too often it’s for one’s own freedom and not for others.  Too many believe that there must be limits on freedom. They argue that freedom must be directed and managed to achieve fairness and equality thus making it acceptable to curtail, through force, certain liberties.
+
+Some decide what and whose freedoms are to be limited.  These are the politicians whose goal in life is power. Their success depends on gaining support from special interests.
+
+ 
+
+No More ‘isms’
+
+The great news is the answer is not to be found in more “isms.”  The answers are to be found in more liberty which cost so much less.  Under these circumstances spending goes down, wealth production goes up, and the quality of life improves.
+
+Just this recognition—especially if we move in this direction—increases optimism which in itself is beneficial.  The follow through with sound policies are required which must be understood and supported by the people.
+
+But there is good evidence that the generation coming of age at the present time is supportive of moving in the direction of more liberty and self-reliance. The more this change in direction and the solutions become known, the quicker will be the return of optimism.
+
+Our job, for those of us who believe that a different system than the  one that we have  had for the  last 100 years, has driven us to this unsustainable crisis, is to be more convincing that there is a wonderful, uncomplicated, and moral system that provides the answers.  We had a taste of it in our early history. We need not give up on the notion of advancing this cause.
+
+It worked, but we allowed our leaders to concentrate on the material abundance that freedom generates, while ignoring freedom itself.  Now we have neither, but the door is open, out of necessity, for an answer.  The answer available is based on the Constitution, individual liberty and prohibiting the use of government force to provide privileges and benefits to all special interests.
+
+After over 100 years we face a society quite different from the one that was intended by the Founders.  In many ways their efforts to protect future generations with the Constitution from this danger has failed.  Skeptics, at the time the Constitution was written in 1787, warned us of today’s possible outcome.  The insidious nature of the erosion of our liberties and the reassurance our great abundance gave us, allowed the process to evolve into the dangerous period in which we now live.
+
+ 
+
+Dependency on Government Largesse
+
+Today we face a dependency on government largesse for almost every need.  Our liberties are restricted and government operates outside the rule of law, protecting and rewarding those who buy or coerce government into satisfying their demands. Here are a few examples:
+
+Undeclared wars are commonplace.
+Welfare for the rich and poor is considered an entitlement.
+The economy is overregulated, overtaxed and grossly distorted by a deeply flawed monetary system.
+Debt is growing exponentially.
+The Patriot Act and FISA legislation passed without much debate have resulted in a steady erosion of our 4th Amendment rights.
+Tragically our government engages in preemptive war, otherwise known as aggression, with no complaints from the American people.
+The drone warfare we are pursuing worldwide is destined to end badly for us as the hatred builds for innocent lives lost and the international laws flaunted. Once we are financially weakened and militarily challenged, there will be a lot resentment thrown our way.
+It’s now the law of the land that the military can arrest American citizens, hold them indefinitely, without charges or a trial.
+Rampant hostility toward free trade is supported by a large number in Washington.
+Supporters of sanctions, currency manipulation and WTO trade retaliation, call the true free traders “isolationists.”
+Sanctions are used to punish countries that don’t follow our orders.
+Bailouts and guarantees for all kinds of misbehavior are routine.
+Central economic planning through monetary policy, regulations and legislative mandates has been an acceptable policy.
+ 
+
+Questions
+
+Excessive government has created such a mess it prompts many questions:
+
+Why are sick people who use medical marijuana put in prison?
+Why does the federal government restrict the drinking of raw milk?
+Why can’t Americans manufacture rope and other products from hemp?
+Why are Americans not allowed to use gold and silver as legal tender as mandated by the Constitution?
+Why is Germany concerned enough to consider repatriating their gold held by the FED for her in New York?  Is it that the trust in the U.S. and dollar supremacy beginning to wane?
+Why do our political leaders believe it’s unnecessary to thoroughly audit our own gold?
+Why can’t Americans decide which type of light bulbs they can buy?
+Why is the TSA permitted to abuse the rights of any American traveling by air?
+Why should there be mandatory sentences—even up to life for crimes without victims—as our drug laws require?
+Why have we allowed the federal government to regulate commodes in our homes?
+Why is it political suicide for anyone to criticize AIPAC ?
+Why haven’t we given up on the drug war since it’s an obvious failure and violates the people’s rights? Has nobody noticed that the authorities can’t even keep drugs out of the prisons? How can making our entire society a prison solve the problem?
+Why do we sacrifice so much getting needlessly involved in border disputes and civil strife around the world and ignore the root cause of the most deadly border in the world-the one between Mexico and the US?
+Why does Congress willingly give up its prerogatives to the Executive Branch?
+Why does changing the party in power never change policy? Could it be that the views of both parties are essentially the same?
+Why did the big banks, the large corporations, and foreign banks and foreign central banks get bailed out in 2008 and the middle class lost their jobs and their homes?
+Why do so many in the government and the federal officials believe that creating money out of thin air creates wealth?
+Why do so many accept the deeply flawed principle that government bureaucrats and politicians can protect us from ourselves without totally destroying the principle of liberty?
+Why can’t people understand that war always destroys wealth and liberty?
+Why is there so little concern for the Executive Order that gives the President authority to establish a “kill list,” including American citizens, of those targeted for assassination?
+Why is patriotism thought to be blind loyalty to the government and the politicians who run it, rather than loyalty to the principles of liberty and support for the people? Real patriotism is a willingness to challenge the government when it’s wrong.
+Why is it is claimed that if people won’t  or can’t take care of their own needs, that people in government can do it for them?
+Why did we ever give the government a safe haven for initiating violence against the people?
+Why do some members defend free markets, but not civil liberties?
+Why do some members defend civil liberties but not free markets? Aren’t they the same?
+Why don’t more defend both economic liberty and personal liberty?
+Why are there not more individuals who seek to intellectually influence others to bring about positive changes than those who seek power to force others to obey their commands?
+Why does the use of religion to support a social gospel and preemptive wars, both of which requires authoritarians to use violence, or the threat of violence, go unchallenged? Aggression and forced redistribution of wealth has nothing to do with the teachings of the world great religions.
+Why do we allow the government and the Federal Reserve to disseminate false information dealing with both economic and  foreign policy?
+Why is democracy held in such high esteem when it’s the enemy of the minority and makes all rights relative to the dictates of the majority?
+Why should anyone be surprised that Congress has no credibility, since there’s such a disconnect between what politicians say and what they do?
+Is there any explanation for all the deception, the unhappiness, the fear of the future, the loss of confidence in our leaders, the distrust, the anger and frustration?   Yes there is, and there’s a way to reverse these attitudes.  The negative perceptions are logical and a consequence of bad policies bringing about our problems.  Identification of the problems and recognizing the cause allow the proper changes to come easy.
+ 
+
+Trust Yourself, Not the Government
+
+Too many people have for too long placed too much confidence and trust in government and not enough in themselves.  Fortunately, many are now becoming aware of the seriousness of the gross mistakes of the past several decades.  The blame is shared by both political parties.  Many Americans now are demanding to hear the plain truth of things and want the demagoguing to stop.  Without this first step, solutions are impossible.
+
+Seeking the truth and finding the answers in liberty and self-reliance promotes the optimism necessary for restoring prosperity.  The task is not that difficult if politics doesn’t get in the way.
+
+We have allowed ourselves to get into such a mess for various reasons.
+
+Politicians deceive themselves as to how wealth is produced.  Excessive confidence is placed in the judgment of politicians and bureaucrats.  This replaces the confidence in a free society.  Too many in high places of authority became convinced that only they,   armed with arbitrary government power, can bring about fairness, while facilitating wealth production.  This always proves to be a utopian dream and destroys wealth and liberty.  It impoverishes the people and rewards the special interests who end up controlling both political parties.
+
+It’s no surprise then that much of what goes on in Washington is driven by aggressive partisanship and power seeking, with philosophic differences being minor.
+
+ 
+
+Economic Ignorance
+
+Economic ignorance is commonplace.  Keynesianism continues to thrive, although today it is facing healthy and enthusiastic rebuttals.  Believers in military Keynesianism and domestic Keynesianism continue to desperately promote their failed policies, as the economy languishes in a deep slumber.
+
+Supporters of all government edicts use humanitarian arguments to justify them.
+
+Humanitarian arguments are always used to justify government mandates related to the economy, monetary policy, foreign policy, and personal liberty.  This is on purpose to make it more difficult to challenge.  But, initiating violence for humanitarian reasons is still violence.  Good intentions are no excuse and are just as harmful as when people use force with bad intentions.  The results are always negative.
+
+The immoral use of force is the source of man’s political problems.  Sadly, many religious groups, secular organizations, and psychopathic authoritarians endorse government initiated force to change the world.  Even when the desired goals are well-intentioned—or especially when well-intentioned—the results are dismal.  The good results sought never materialize.  The new problems created require even more government force as a solution.  The net result is institutionalizing government initiated violence and morally justifying it on humanitarian grounds.
+
+This is the same fundamental reason our government  uses force  for invading other countries at will, central economic planning at home, and the regulation of personal liberty and habits of our citizens.
+
+It is rather strange, that unless one has a criminal mind and no respect for other people and their property, no one claims it’s permissible to go into one’s neighbor’s house and tell them how to behave, what they can eat, smoke and drink or how to spend their money.
+
+Yet, rarely is it asked why it is morally acceptable that a stranger with a badge and a gun can do the same thing in the name of law and order.  Any resistance is met with brute force, fines, taxes, arrests, and even imprisonment. This is done more frequently every day without a proper search warrant.
+
+ 
+
+No Government Monopoly over Initiating Violence
+
+Restraining aggressive behavior is one thing, but legalizing a government monopoly for initiating aggression can only lead to exhausting liberty associated with chaos, anger and the breakdown of civil society.  Permitting such authority and expecting saintly behavior from the bureaucrats and the politicians is a pipe dream.  We now have a standing army of armed bureaucrats in the TSA, CIA, FBI, Fish and Wildlife, FEMA, IRS, Corp of Engineers, etc. numbering over 100,000.  Citizens are guilty until proven innocent in the unconstitutional administrative courts.
+
+Government in a free society should have no authority to meddle in social activities or the economic transactions of individuals. Nor should government meddle in the affairs of other nations. All things peaceful, even when controversial, should be permitted.
+
+We must reject the notion of prior restraint in economic activity just we do in the area of free speech and religious liberty. But even in these areas government is starting to use a backdoor approach of political correctness to regulate speech-a dangerous trend. Since 9/11 monitoring speech on the internet is now a problem since warrants are no longer required.
+
+ 
+
+The Proliferation of Federal Crimes
+
+The Constitution established four federal crimes.  Today the experts can’t even agree on how many federal crimes are now on the books—they number into the thousands.  No one person can comprehend the enormity of the legal system—especially the tax code.  Due to the ill-advised drug war and the endless federal expansion of the criminal code we have over 6 million people under correctional suspension, more than the Soviets ever had, and more than any other nation today, including China.  I don’t understand the complacency of the Congress and the willingness to continue their obsession with passing more Federal laws.  Mandatory sentencing laws associated with drug laws have compounded our prison problems.
+
+The federal register is now 75,000 pages long and the tax code has 72,000 pages, and expands every year.  When will the people start shouting, “enough is enough,” and demand Congress cease and desist.
+
+ 
+
+Achieving Liberty
+
+Liberty can only be achieved when government is denied the aggressive use of force.  If one seeks liberty, a precise type of government is needed.  To achieve it, more than lip service is required.
+
+Two choices are available.
+
+A government designed to protect liberty—a natural right—as its sole objective.  The people are expected to care for themselves and reject the use of any force for interfering with another person’s liberty.  Government is given a strictly limited authority to enforce contracts, property ownership, settle disputes, and defend against foreign aggression.
+A government that pretends to protect liberty but is granted power to arbitrarily use force over the people and foreign nations.  Though the grant of power many times is meant to be small and limited, it inevitably metastasizes into an omnipotent political cancer.  This is the problem for which the world has suffered throughout the ages.  Though meant to be limited it nevertheless is a 100% sacrifice of a principle that would-be-tyrants find irresistible.  It is used vigorously—though incrementally and insidiously.  Granting power to government officials always proves the adage that:  “power corrupts.”
+Once government gets a limited concession for the use of force to mold people habits and plan the economy, it causes a steady move toward tyrannical government.  Only a revolutionary spirit can reverse the process and deny to the government this arbitrary use of aggression.  There’s no in-between.  Sacrificing a little liberty for imaginary safety always ends badly.
+
+Today’s mess is a result of Americans accepting option #2, even though the Founders attempted to give us Option #1.
+
+The results are not good.  As our liberties have been eroded our wealth has been consumed.  The wealth we see today is based on debt and a foolish willingness on the part of foreigners to take our dollars for goods and services. They then loan them back to us to perpetuate our debt system.  It’s amazing that it has worked for this long but the impasse in Washington, in solving our problems indicate that many are starting to understand the seriousness of the world -wide debt crisis and the dangers we face. The longer this process continues the harsher the outcome will be.
+
+ 
+
+The Financial Crisis Is a Moral Crisis
+
+Many are now acknowledging that a financial crisis looms but few understand it’s, in reality, a moral crisis.  It’s the moral crisis that has allowed our liberties to be undermined and permits the exponential growth of illegal government power.  Without a clear understanding of the nature of the crisis it will be difficult to prevent a steady march toward tyranny and the poverty that will accompany it.
+
+Ultimately, the people have to decide which form of government they want; option #1 or option #2.  There is no other choice.  Claiming there is a choice of a “little” tyranny is like describing pregnancy as a “touch of pregnancy.”  It is a myth to believe that a mixture of free markets and government central economic planning is a worthy compromise.  What we see today is a result of that type of thinking.  And the results speak for themselves.
+
+ 
+
+A Culture of Violence
+
+American now suffers from a culture of violence.  It’s easy to reject the initiation of violence against one’s neighbor but it’s ironic that the people arbitrarily and freely anoint government officials with monopoly power to initiate violence against the American people—practically at will.
+
+Because it’s the government that initiates force, most people accept it as being legitimate.  Those who exert the force have no sense of guilt.  It is believed by too many that governments are morally justified in initiating force supposedly to “do good.”  They incorrectly believe that this authority has come from the “consent of the people.”  The minority, or victims of government violence never consented to suffer the abuse of government mandates, even when dictated by the majority.  Victims of TSA excesses never consented to this abuse.
+
+This attitude has given us a policy of initiating war to “do good,” as well. It is claimed that war, to prevent war for noble purposes, is justified.  This is similar to what we were once told that:  “destroying a village to save a village” was justified.  It was said by a US Secretary of State that the loss of 500,000 Iraqis, mostly children, in the 1990s, as a result of American bombs and sanctions, was “worth it” to achieve the “good” we brought to the Iraqi people.  And look at the mess that Iraq is in today.
+
+Government use of force to mold social and economic behavior at home and abroad has justified individuals using force on their own terms.  The fact that violence by government is seen as morally justified, is the reason why violence will increase when the big financial crisis hits and becomes a political crisis as well.
+
+First, we recognize that individuals shouldn’t initiate violence, then we give the authority to government.   Eventually, the immoral use of government violence, when things goes badly, will be used to justify an individual’s “right” to do the same thing. Neither the government nor individuals have the moral right to initiate violence against another yet we are moving toward the day when both will claim this authority.  If this cycle is not reversed society will break down.
+
+When needs are pressing, conditions deteriorate and rights become relative to the demands and the whims of the majority.  It’s then not a great leap for individuals to take it upon themselves to use violence to get what they claim is theirs.  As the economy deteriorates and the wealth discrepancies increase—as are already occurring— violence increases as those in need take it in their own hands to get what they believe is theirs.  They will not wait for a government rescue program.
+
+When government officials wield power over others to bail out the special interests, even with disastrous results to the average citizen, they feel no guilt for the harm they do. Those who take us into undeclared wars with many casualties resulting, never lose sleep over the death and destruction their bad decisions caused. They are convinced that what they do is morally justified, and the fact that many suffer   just can’t be helped.
+
+When the street criminals do the same thing, they too have no remorse, believing they are only taking what is rightfully theirs.  All moral standards become relative.  Whether it’s bailouts, privileges, government subsidies or benefits for some from inflating a currency, it’s all part of a process justified by a philosophy of forced redistribution of wealth.  Violence, or a threat of such, is the instrument required and unfortunately is of little concern of most members of Congress.
+
+Some argue it’s only a matter of “fairness” that those in need are cared for. There are two problems with this. First, the principle is used to provide a greater amount of benefits to the rich than the poor. Second, no one seems to be concerned about whether or not it’s fair to those who end up paying for the benefits. The costs are usually placed on the backs of the middle class and are hidden from the public eye. Too many people believe government handouts are free, like printing money out of thin air, and there is no cost. That deception is coming to an end. The bills are coming due and that’s what the economic slowdown is all about.
+
+Sadly, we have become accustomed to living with the illegitimate use of force by government.  It is the tool for telling the people how to live, what to eat and drink, what to read and how to spend their money.
+
+To develop a truly free society, the issue of initiating force must be understood and rejected.  Granting to government even a small amount of force is a dangerous concession.
+
+ 
+
+Limiting Government Excesses vs. a Virtuous Moral People
+
+Our Constitution, which was intended to limit government power and abuse, has failed.  The Founders warned that a free society depends on a virtuous and moral people.  The current crisis reflects that their concerns were justified.
+
+Most politicians and pundits are aware of the problems we face but spend all their time in trying to reform government.  The sad part is that the suggested reforms almost always lead to less freedom and the importance of a virtuous and moral people is either ignored, or not understood. The new reforms serve only to further undermine liberty.  The compounding effect has given us this steady erosion of liberty and the massive expansion of debt.  The real question is: if it is liberty we seek, should most of the emphasis be placed on government reform or trying to understand what “a virtuous and moral people” means and how to promote it. The Constitution has not prevented the people from demanding handouts for both rich and poor in their efforts to reform the government, while ignoring the principles of a free society. All branches of our government today are controlled by individuals who use their power to undermine liberty and enhance the welfare/warfare state-and frequently their own wealth and power.
+
+If the people are unhappy with the government performance it must be recognized that government is merely a reflection of an immoral society that rejected a moral government of constitutional limitations of power and love of freedom.
+
+If this is the problem all the tinkering with thousands of pages of new laws and regulations will do nothing to solve the problem.
+
+It is self-evident that our freedoms have been severely limited and the apparent prosperity we still have, is nothing more than leftover wealth from a previous time.  This fictitious wealth based on debt and benefits from a false trust in our currency and credit, will play havoc with our society when the bills come due.  This means that the full consequence of our lost liberties is yet to be felt.
+
+But that illusion is now ending.  Reversing a downward spiral depends on accepting a new approach.
+
+Expect the rapidly expanding homeschooling movement to play a significant role in the revolutionary reforms needed to build a free society with Constitutional protections. We cannot expect a Federal government controlled school system to provide the intellectual ammunition to combat the dangerous growth of government that threatens our liberties.
+
+The internet will provide the alternative to the government/media complex that controls the news and most political propaganda. This is why it’s essential that the internet remains free of government regulation.
+
+Many of our religious institutions and secular organizations support greater dependency on the state by supporting war, welfare and corporatism and ignore the need for a virtuous people.
+
+I never believed that the world or our country could be made more free by politicians, if the people had no desire for freedom.
+
+Under the current circumstances the most we can hope to achieve in the political process is to use it as a podium to reach the people to alert them of the nature of the crisis and the importance of their need to assume responsibility for themselves, if it is liberty that they truly seek.  Without this, a constitutionally protected free society is impossible.
+
+If this is true, our individual goal in life ought to be for us to seek virtue and excellence and recognize that self-esteem and happiness only comes from using one’s natural ability, in the most productive manner possible, according to one’s own talents.
+
+Productivity and creativity are the true source of personal satisfaction. Freedom, and not dependency, provides the environment needed to achieve these goals. Government cannot do this for us; it only gets in the way. When the government gets involved, the goal becomes a bailout or a subsidy and these cannot provide a sense of  personal achievement.
+
+Achieving legislative power and political influence should not be our goal. Most of the change, if it is to come, will not come from the politicians, but rather from individuals, family, friends, intellectual leaders and our religious institutions.  The solution can only come from rejecting the use of coercion, compulsion, government commands, and aggressive force, to mold social and economic behavior.  Without accepting these restraints, inevitably the consensus will be to allow the government to mandate economic equality and obedience to the politicians who gain power and promote an environment that smothers the freedoms of everyone. It is then that the responsible individuals who seek excellence and self-esteem by being self-reliance and productive, become the true victims.
+
+ 
+
+Conclusion                                                                                                                                                    
+
+What are the greatest dangers that the American people face today and impede the goal of a free society? There are five.
+
+1. The continuous attack on our civil liberties which threatens the rule of law and our ability to resist the onrush of tyranny.               
+
+2. Violent anti-Americanism that has engulfed the world. Because the phenomenon of “blow-back” is not understood or denied, our foreign policy is destined to keep us involved in many wars that we have no business being in. National bankruptcy and a greater threat to our national security will result.                                                         
+
+3. The ease in which we go to war, without a declaration by Congress, but accepting international authority from the UN or NATO even for preemptive wars, otherwise known as aggression.                                        
+
+4. A financial political crisis as a consequence of excessive debt, unfunded liabilities, spending, bailouts, and gross discrepancy in wealth distribution going from the middle class to the rich. The danger of central economic planning, by the Federal Reserve must be understood.                                               
+
+ 5. World government taking over  local and US sovereignty by getting involved in the issues of war, welfare, trade, banking,  a world currency, taxes, property ownership, and private ownership of guns.
+
+Happily, there is an answer for these very dangerous trends.                                                     
+
+What a wonderful world it would be if everyone accepted the simple moral premise of rejecting all acts of aggression.  The retort to such a suggestion is always:  it’s too simplistic, too idealistic, impractical, naïve, utopian, dangerous, and unrealistic to strive for such an ideal.
+
+The answer to that is that for thousands of years the acceptance of government force, to rule over the people, at the sacrifice of liberty, was considered moral and the only available option for achieving peace and prosperity.
+
+What could be more utopian than that myth—considering the results especially looking at the state sponsored killing, by nearly every government during the 20th Century, estimated to be in the hundreds of millions.  It’s time to reconsider this grant of authority to the state.
+
+No good has ever come from granting monopoly power to the state to use aggression against the people to arbitrarily mold human behavior.  Such power, when left unchecked, becomes the seed of an ugly tyranny.  This method of governance has been adequately tested, and the results are in: reality dictates we try liberty.
+
+The idealism of non-aggression and rejecting all offensive use of force should be tried.  The idealism of government sanctioned violence has been abused throughout history and is the primary source of poverty and war.  The theory of a society being based on individual freedom has been around for a long time.  It’s time to take a bold step and actually permit it by advancing this cause, rather than taking a step backwards as some would like us to do.
+
+Today the principle of habeas corpus, established when King John signed the Magna Carta in 1215, is under attack. There’s every reason to believe that a renewed effort with the use of the internet that we can instead advance the cause of liberty by spreading an uncensored message that will serve to rein in government authority and challenge the obsession with war and welfare.
+
+What I’m talking about is a system of government guided by the moral principles of peace and tolerance.
+
+The Founders were convinced that a free society could not exist without a moral people.  Just writing rules won’t work if the people choose to ignore them.  Today the rule of law written in the Constitution has little meaning for most Americans, especially those who work in Washington DC.
+
+Benjamin Franklin claimed “only a virtuous people are capable of freedom.”  John Adams concurred:  “Our Constitution was made for a moral and religious people.  It is wholly inadequate to the government of any other.”
+
+A moral people must reject all violence in an effort to mold people’s beliefs or habits.
+
+A society that boos or ridicules the Golden Rule is not a moral society.  All great religions endorse the Golden Rule.  The same moral standards that individuals are required to follow should apply to all government officials.  They cannot be exempt.
+
+The ultimate solution is not in the hands of the government.
+
+The solution falls on each and every individual, with guidance from family, friends and community.
+
+The #1 responsibility for each of us is to change ourselves with hope that others will follow.  This is of greater importance than working on changing the government; that is secondary to promoting a virtuous society.  If we can achieve this, then the government will change.
+
+It doesn’t mean that political action or holding office has no value. At times it does nudge policy in the right direction. But what is true is that when seeking office is done for personal aggrandizement, money or power, it becomes useless if not harmful. When political action is taken for the right reasons it’s easy to understand why compromise should be avoided. It also becomes clear why progress is best achieved by working with coalitions, which bring people together, without anyone sacrificing his principles.
+
+Political action, to be truly beneficial, must be directed toward changing the hearts and minds of the people, recognizing that it’s the virtue and morality of the people that allow liberty to flourish.
+
+The Constitution or more laws per se, have no value if the people’s attitudes aren’t changed.
+
+To achieve liberty and peace, two powerful human emotions have to be overcome.  Number one is “envy” which leads to hate and class warfare.  Number two is “intolerance” which leads to bigoted and judgmental policies.  These emotions must be replaced with a much better understanding of love, compassion, tolerance and free market economics. Freedom, when understood, brings people together. When tried, freedom is popular.
+
+The problem we have faced over the years has been that economic interventionists are swayed by envy, whereas social interventionists are swayed by intolerance of habits and lifestyles. The misunderstanding that tolerance is an endorsement of certain activities, motivates many to legislate moral standards which should only be set by individuals making their own choices. Both sides use force to deal with these misplaced emotions. Both are authoritarians. Neither endorses voluntarism.  Both views ought to be rejected.
+
+I have come to one firm conviction after these many years of trying to figure out “the plain truth of things.”  The best chance for achieving peace and prosperity, for the maximum number of people world-wide, is to pursue the cause of LIBERTY.
+
+If you find this to be a worthwhile message, spread it throughout the land.

az-hadoop-jobtype-plugin/src/examples/java-wc/src/upload.pig 4(+4 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/java-wc/src/upload.pig b/az-hadoop-jobtype-plugin/src/examples/java-wc/src/upload.pig
new file mode 100644
index 0000000..c77d6df
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/java-wc/src/upload.pig
@@ -0,0 +1,4 @@
+RMF -skipTrash $inData;
+RMF -skipTrash $outData;
+copyFromLocal $inDataLocal $inData;
+

az-hadoop-jobtype-plugin/src/examples/java-wc/wc.properties 8(+8 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/java-wc/wc.properties b/az-hadoop-jobtype-plugin/src/examples/java-wc/wc.properties
new file mode 100644
index 0000000..502ed55
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/java-wc/wc.properties
@@ -0,0 +1,8 @@
+user.to.proxy=azkaban
+
+HDFSRoot=/tmp
+
+param.inDataLocal=res/rpfarewell
+param.inData=${HDFSRoot}/${user.to.proxy}/wordcountjavain
+param.outData=${HDFSRoot}/${user.to.proxy}/wordcountjavaout
+

az-hadoop-jobtype-plugin/src/examples/java-wc/wordcount-java.job 13(+13 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/java-wc/wordcount-java.job b/az-hadoop-jobtype-plugin/src/examples/java-wc/wordcount-java.job
new file mode 100644
index 0000000..2bacaa5
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/java-wc/wordcount-java.job
@@ -0,0 +1,13 @@
+type=hadoopJava
+job.class=azkaban.jobtype.examples.java.WordCount
+
+classpath=./lib/*,${hadoop.home}/lib/*
+
+main.args=${param.inData} ${param.outData}
+
+force.output.overwrite=true
+
+input.path=${param.inData}
+output.path=${param.outData}
+
+dependencies=pig-upload

az-hadoop-jobtype-plugin/src/examples/pig-wc/README 1(+1 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/pig-wc/README b/az-hadoop-jobtype-plugin/src/examples/pig-wc/README
new file mode 100644
index 0000000..5642e45
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/pig-wc/README
@@ -0,0 +1 @@
+fill in the user.to.proxy property in the .job file with an account you have access to

az-hadoop-jobtype-plugin/src/examples/pig-wc/res/rpfarewell 340(+340 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/pig-wc/res/rpfarewell b/az-hadoop-jobtype-plugin/src/examples/pig-wc/res/rpfarewell
new file mode 100644
index 0000000..84df4be
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/pig-wc/res/rpfarewell
@@ -0,0 +1,340 @@
+Farewell to Congress
+
+This may well be the last time I speak on the House Floor.  At the end of the year I’ll leave Congress after 23 years in office over a 36 year period.  My goals in 1976 were the same as they are today:  promote peace and prosperity by a strict adherence to the principles of individual liberty.
+
+It was my opinion, that the course the U.S. embarked on in the latter part of the 20th Century would bring us a major financial crisis and engulf us in a foreign policy that would overextend us and undermine our national security.
+
+To achieve the goals I sought, government would have had to shrink in size and scope, reduce spending, change the monetary system, and reject the unsustainable costs of policing the world and expanding the American Empire.
+
+The problems seemed to be overwhelming and impossible to solve, yet from my view point, just following the constraints placed on the federal government by the Constitution would have been a good place to start.
+
+ 
+
+How Much Did I Accomplish?
+
+In many ways, according to conventional wisdom, my off-and-on career in Congress, from 1976 to 2012, accomplished very little.  No named legislation, no named federal buildings or highways—thank goodness.  In spite of my efforts, the government has grown exponentially, taxes remain excessive, and the prolific increase of incomprehensible regulations continues.  Wars are constant and pursued without Congressional declaration, deficits rise to the sky, poverty is rampant and dependency on the federal government is now worse than any time in our history.
+
+All this with minimal concerns for the deficits and unfunded liabilities that common sense tells us cannot go on much longer.  A grand, but never mentioned, bipartisan agreement allows for the well-kept secret that keeps the spending going.  One side doesn’t give up one penny on military spending, the other side doesn’t give up one penny on welfare spending, while both sides support the bailouts and subsidies for the banking and  corporate elite.  And the spending continues as the economy weakens and the downward spiral continues.   As the government continues fiddling around, our liberties and our wealth burn in the flames of a foreign policy that makes us less safe.
+
+The major stumbling block to real change in Washington is the total resistance to admitting that the country is broke. This has made compromising, just to agree to increase spending, inevitable since neither side has any intention of cutting spending.
+
+The country and the Congress will remain divisive since there’s no “loot left to divvy up.”
+
+Without this recognition the spenders in Washington will continue the march toward a fiscal cliff much bigger than the one anticipated this coming January.
+
+I have thought a lot about why those of us who believe in liberty, as a solution, have done so poorly in convincing others of its benefits.  If liberty is what we claim it is- the principle that protects all personal, social and economic decisions necessary for maximum prosperity and the best chance for peace- it should be an easy sell.  Yet, history has shown that the masses have been quite receptive to the promises of authoritarians which are rarely if ever fulfilled.
+
+ 
+
+Authoritarianism vs. Liberty
+
+If authoritarianism leads to poverty and war and less freedom for all individuals and is controlled by rich special interests, the people should be begging for liberty.  There certainly was a strong enough sentiment for more freedom at the time of our founding that motivated those who were willing to fight in the revolution against the powerful British government.
+
+During my time in Congress the appetite for liberty has been quite weak; the understanding of its significance negligible.  Yet the good news is that compared to 1976 when I first came to Congress, the desire for more freedom and less government in 2012 is much greater and growing, especially in grassroots America. Tens of thousands of teenagers and college age students are, with great enthusiasm, welcoming the message of liberty.
+
+I have a few thoughts as to why the people of a country like ours, once the freest and most prosperous, allowed the conditions to deteriorate to the degree that they have.
+
+Freedom, private property, and enforceable voluntary contracts, generate wealth.  In our early history we were very much aware of this.  But in the early part of the 20th century our politicians promoted the notion that the tax and monetary systems had to change if we were to involve ourselves in excessive domestic and military spending. That is why Congress gave us the Federal Reserve and the income tax.  The majority of Americans and many government officials agreed that sacrificing some liberty was necessary to carry out what some claimed to be “progressive” ideas. Pure democracy became acceptable.
+
+They failed to recognized that what they were doing was exactly opposite of what the colonists were seeking when they broke away from the British.
+
+Some complain that my arguments makes no sense, since great wealth and the standard of living improved  for many Americans over the last 100 years, even with these new policies.
+
+But the damage to the market economy, and the currency, has been insidious and steady.  It took a long time to consume our wealth, destroy the currency and undermine productivity and get our financial obligations to a point of no return. Confidence sometimes lasts longer than deserved. Most of our wealth today depends on debt.
+
+The wealth that we enjoyed and seemed to be endless, allowed concern for the principle of a free society to be neglected.  As long as most people believed the material abundance would last forever, worrying about protecting a competitive productive economy and individual liberty seemed unnecessary.
+
+ 
+
+The Age of Redistribution
+
+This neglect ushered in an age of redistribution of wealth by government kowtowing to any and all special interests, except for those who just wanted to left alone.  That is why today money in politics far surpasses money currently going into research and development and productive entrepreneurial efforts.
+
+The material benefits became more important than the understanding and promoting the principles of liberty and a free market.  It is good that material abundance is a result of liberty but if materialism is all that we care about, problems are guaranteed.
+
+The crisis arrived because the illusion that wealth and prosperity would last forever has ended. Since it was based on debt and a pretense that debt can be papered over by an out-of-control fiat monetary system, it was doomed to fail.  We have ended up with a system that doesn’t produce enough even to finance the debt and no fundamental understanding of why a free society is crucial to reversing these trends.
+
+If this is not recognized, the recovery will linger for a long time.  Bigger government, more spending, more debt, more poverty for the middle class, and a more intense scramble by the elite special interests will continue.
+
+ 
+
+We Need an Intellectual Awakening
+
+Without an intellectual awakening, the turning point will be driven by economic law.  A dollar crisis will bring the current out-of-control system to its knees.
+
+If it’s not accepted that big government, fiat money, ignoring liberty, central economic planning, welfarism, and warfarism caused our crisis we can expect a continuous and dangerous march toward corporatism and even fascism with even more loss of our liberties.  Prosperity for a large middle class though will become an abstract dream.
+
+This continuous move is no different than what we have seen in how our financial crisis of 2008 was handled.  Congress first directed, with bipartisan support, bailouts for the wealthy.  Then it was the Federal Reserve with its endless quantitative easing. If at first it doesn’t succeed try again; QE1, QE2, and QE3 and with no results we try QE indefinitely—that is until it too fails.  There’s a cost to all of this and let me assure you delaying the payment is no longer an option.  The rules of the market will extract its pound of flesh and it won’t be pretty.
+
+The current crisis elicits a lot of pessimism.  And the pessimism adds to less confidence in the future.  The two feed on themselves, making our situation worse.
+
+If the underlying cause of the crisis is not understood we cannot solve our problems. The issues of warfare, welfare, deficits, inflationism, corporatism, bailouts and authoritarianism cannot be ignored.  By only expanding these policies we cannot expect good results.
+
+Everyone claims support for freedom.  But too often it’s for one’s own freedom and not for others.  Too many believe that there must be limits on freedom. They argue that freedom must be directed and managed to achieve fairness and equality thus making it acceptable to curtail, through force, certain liberties.
+
+Some decide what and whose freedoms are to be limited.  These are the politicians whose goal in life is power. Their success depends on gaining support from special interests.
+
+ 
+
+No More ‘isms’
+
+The great news is the answer is not to be found in more “isms.”  The answers are to be found in more liberty which cost so much less.  Under these circumstances spending goes down, wealth production goes up, and the quality of life improves.
+
+Just this recognition—especially if we move in this direction—increases optimism which in itself is beneficial.  The follow through with sound policies are required which must be understood and supported by the people.
+
+But there is good evidence that the generation coming of age at the present time is supportive of moving in the direction of more liberty and self-reliance. The more this change in direction and the solutions become known, the quicker will be the return of optimism.
+
+Our job, for those of us who believe that a different system than the  one that we have  had for the  last 100 years, has driven us to this unsustainable crisis, is to be more convincing that there is a wonderful, uncomplicated, and moral system that provides the answers.  We had a taste of it in our early history. We need not give up on the notion of advancing this cause.
+
+It worked, but we allowed our leaders to concentrate on the material abundance that freedom generates, while ignoring freedom itself.  Now we have neither, but the door is open, out of necessity, for an answer.  The answer available is based on the Constitution, individual liberty and prohibiting the use of government force to provide privileges and benefits to all special interests.
+
+After over 100 years we face a society quite different from the one that was intended by the Founders.  In many ways their efforts to protect future generations with the Constitution from this danger has failed.  Skeptics, at the time the Constitution was written in 1787, warned us of today’s possible outcome.  The insidious nature of the erosion of our liberties and the reassurance our great abundance gave us, allowed the process to evolve into the dangerous period in which we now live.
+
+ 
+
+Dependency on Government Largesse
+
+Today we face a dependency on government largesse for almost every need.  Our liberties are restricted and government operates outside the rule of law, protecting and rewarding those who buy or coerce government into satisfying their demands. Here are a few examples:
+
+Undeclared wars are commonplace.
+Welfare for the rich and poor is considered an entitlement.
+The economy is overregulated, overtaxed and grossly distorted by a deeply flawed monetary system.
+Debt is growing exponentially.
+The Patriot Act and FISA legislation passed without much debate have resulted in a steady erosion of our 4th Amendment rights.
+Tragically our government engages in preemptive war, otherwise known as aggression, with no complaints from the American people.
+The drone warfare we are pursuing worldwide is destined to end badly for us as the hatred builds for innocent lives lost and the international laws flaunted. Once we are financially weakened and militarily challenged, there will be a lot resentment thrown our way.
+It’s now the law of the land that the military can arrest American citizens, hold them indefinitely, without charges or a trial.
+Rampant hostility toward free trade is supported by a large number in Washington.
+Supporters of sanctions, currency manipulation and WTO trade retaliation, call the true free traders “isolationists.”
+Sanctions are used to punish countries that don’t follow our orders.
+Bailouts and guarantees for all kinds of misbehavior are routine.
+Central economic planning through monetary policy, regulations and legislative mandates has been an acceptable policy.
+ 
+
+Questions
+
+Excessive government has created such a mess it prompts many questions:
+
+Why are sick people who use medical marijuana put in prison?
+Why does the federal government restrict the drinking of raw milk?
+Why can’t Americans manufacture rope and other products from hemp?
+Why are Americans not allowed to use gold and silver as legal tender as mandated by the Constitution?
+Why is Germany concerned enough to consider repatriating their gold held by the FED for her in New York?  Is it that the trust in the U.S. and dollar supremacy beginning to wane?
+Why do our political leaders believe it’s unnecessary to thoroughly audit our own gold?
+Why can’t Americans decide which type of light bulbs they can buy?
+Why is the TSA permitted to abuse the rights of any American traveling by air?
+Why should there be mandatory sentences—even up to life for crimes without victims—as our drug laws require?
+Why have we allowed the federal government to regulate commodes in our homes?
+Why is it political suicide for anyone to criticize AIPAC ?
+Why haven’t we given up on the drug war since it’s an obvious failure and violates the people’s rights? Has nobody noticed that the authorities can’t even keep drugs out of the prisons? How can making our entire society a prison solve the problem?
+Why do we sacrifice so much getting needlessly involved in border disputes and civil strife around the world and ignore the root cause of the most deadly border in the world-the one between Mexico and the US?
+Why does Congress willingly give up its prerogatives to the Executive Branch?
+Why does changing the party in power never change policy? Could it be that the views of both parties are essentially the same?
+Why did the big banks, the large corporations, and foreign banks and foreign central banks get bailed out in 2008 and the middle class lost their jobs and their homes?
+Why do so many in the government and the federal officials believe that creating money out of thin air creates wealth?
+Why do so many accept the deeply flawed principle that government bureaucrats and politicians can protect us from ourselves without totally destroying the principle of liberty?
+Why can’t people understand that war always destroys wealth and liberty?
+Why is there so little concern for the Executive Order that gives the President authority to establish a “kill list,” including American citizens, of those targeted for assassination?
+Why is patriotism thought to be blind loyalty to the government and the politicians who run it, rather than loyalty to the principles of liberty and support for the people? Real patriotism is a willingness to challenge the government when it’s wrong.
+Why is it is claimed that if people won’t  or can’t take care of their own needs, that people in government can do it for them?
+Why did we ever give the government a safe haven for initiating violence against the people?
+Why do some members defend free markets, but not civil liberties?
+Why do some members defend civil liberties but not free markets? Aren’t they the same?
+Why don’t more defend both economic liberty and personal liberty?
+Why are there not more individuals who seek to intellectually influence others to bring about positive changes than those who seek power to force others to obey their commands?
+Why does the use of religion to support a social gospel and preemptive wars, both of which requires authoritarians to use violence, or the threat of violence, go unchallenged? Aggression and forced redistribution of wealth has nothing to do with the teachings of the world great religions.
+Why do we allow the government and the Federal Reserve to disseminate false information dealing with both economic and  foreign policy?
+Why is democracy held in such high esteem when it’s the enemy of the minority and makes all rights relative to the dictates of the majority?
+Why should anyone be surprised that Congress has no credibility, since there’s such a disconnect between what politicians say and what they do?
+Is there any explanation for all the deception, the unhappiness, the fear of the future, the loss of confidence in our leaders, the distrust, the anger and frustration?   Yes there is, and there’s a way to reverse these attitudes.  The negative perceptions are logical and a consequence of bad policies bringing about our problems.  Identification of the problems and recognizing the cause allow the proper changes to come easy.
+ 
+
+Trust Yourself, Not the Government
+
+Too many people have for too long placed too much confidence and trust in government and not enough in themselves.  Fortunately, many are now becoming aware of the seriousness of the gross mistakes of the past several decades.  The blame is shared by both political parties.  Many Americans now are demanding to hear the plain truth of things and want the demagoguing to stop.  Without this first step, solutions are impossible.
+
+Seeking the truth and finding the answers in liberty and self-reliance promotes the optimism necessary for restoring prosperity.  The task is not that difficult if politics doesn’t get in the way.
+
+We have allowed ourselves to get into such a mess for various reasons.
+
+Politicians deceive themselves as to how wealth is produced.  Excessive confidence is placed in the judgment of politicians and bureaucrats.  This replaces the confidence in a free society.  Too many in high places of authority became convinced that only they,   armed with arbitrary government power, can bring about fairness, while facilitating wealth production.  This always proves to be a utopian dream and destroys wealth and liberty.  It impoverishes the people and rewards the special interests who end up controlling both political parties.
+
+It’s no surprise then that much of what goes on in Washington is driven by aggressive partisanship and power seeking, with philosophic differences being minor.
+
+ 
+
+Economic Ignorance
+
+Economic ignorance is commonplace.  Keynesianism continues to thrive, although today it is facing healthy and enthusiastic rebuttals.  Believers in military Keynesianism and domestic Keynesianism continue to desperately promote their failed policies, as the economy languishes in a deep slumber.
+
+Supporters of all government edicts use humanitarian arguments to justify them.
+
+Humanitarian arguments are always used to justify government mandates related to the economy, monetary policy, foreign policy, and personal liberty.  This is on purpose to make it more difficult to challenge.  But, initiating violence for humanitarian reasons is still violence.  Good intentions are no excuse and are just as harmful as when people use force with bad intentions.  The results are always negative.
+
+The immoral use of force is the source of man’s political problems.  Sadly, many religious groups, secular organizations, and psychopathic authoritarians endorse government initiated force to change the world.  Even when the desired goals are well-intentioned—or especially when well-intentioned—the results are dismal.  The good results sought never materialize.  The new problems created require even more government force as a solution.  The net result is institutionalizing government initiated violence and morally justifying it on humanitarian grounds.
+
+This is the same fundamental reason our government  uses force  for invading other countries at will, central economic planning at home, and the regulation of personal liberty and habits of our citizens.
+
+It is rather strange, that unless one has a criminal mind and no respect for other people and their property, no one claims it’s permissible to go into one’s neighbor’s house and tell them how to behave, what they can eat, smoke and drink or how to spend their money.
+
+Yet, rarely is it asked why it is morally acceptable that a stranger with a badge and a gun can do the same thing in the name of law and order.  Any resistance is met with brute force, fines, taxes, arrests, and even imprisonment. This is done more frequently every day without a proper search warrant.
+
+ 
+
+No Government Monopoly over Initiating Violence
+
+Restraining aggressive behavior is one thing, but legalizing a government monopoly for initiating aggression can only lead to exhausting liberty associated with chaos, anger and the breakdown of civil society.  Permitting such authority and expecting saintly behavior from the bureaucrats and the politicians is a pipe dream.  We now have a standing army of armed bureaucrats in the TSA, CIA, FBI, Fish and Wildlife, FEMA, IRS, Corp of Engineers, etc. numbering over 100,000.  Citizens are guilty until proven innocent in the unconstitutional administrative courts.
+
+Government in a free society should have no authority to meddle in social activities or the economic transactions of individuals. Nor should government meddle in the affairs of other nations. All things peaceful, even when controversial, should be permitted.
+
+We must reject the notion of prior restraint in economic activity just we do in the area of free speech and religious liberty. But even in these areas government is starting to use a backdoor approach of political correctness to regulate speech-a dangerous trend. Since 9/11 monitoring speech on the internet is now a problem since warrants are no longer required.
+
+ 
+
+The Proliferation of Federal Crimes
+
+The Constitution established four federal crimes.  Today the experts can’t even agree on how many federal crimes are now on the books—they number into the thousands.  No one person can comprehend the enormity of the legal system—especially the tax code.  Due to the ill-advised drug war and the endless federal expansion of the criminal code we have over 6 million people under correctional suspension, more than the Soviets ever had, and more than any other nation today, including China.  I don’t understand the complacency of the Congress and the willingness to continue their obsession with passing more Federal laws.  Mandatory sentencing laws associated with drug laws have compounded our prison problems.
+
+The federal register is now 75,000 pages long and the tax code has 72,000 pages, and expands every year.  When will the people start shouting, “enough is enough,” and demand Congress cease and desist.
+
+ 
+
+Achieving Liberty
+
+Liberty can only be achieved when government is denied the aggressive use of force.  If one seeks liberty, a precise type of government is needed.  To achieve it, more than lip service is required.
+
+Two choices are available.
+
+A government designed to protect liberty—a natural right—as its sole objective.  The people are expected to care for themselves and reject the use of any force for interfering with another person’s liberty.  Government is given a strictly limited authority to enforce contracts, property ownership, settle disputes, and defend against foreign aggression.
+A government that pretends to protect liberty but is granted power to arbitrarily use force over the people and foreign nations.  Though the grant of power many times is meant to be small and limited, it inevitably metastasizes into an omnipotent political cancer.  This is the problem for which the world has suffered throughout the ages.  Though meant to be limited it nevertheless is a 100% sacrifice of a principle that would-be-tyrants find irresistible.  It is used vigorously—though incrementally and insidiously.  Granting power to government officials always proves the adage that:  “power corrupts.”
+Once government gets a limited concession for the use of force to mold people habits and plan the economy, it causes a steady move toward tyrannical government.  Only a revolutionary spirit can reverse the process and deny to the government this arbitrary use of aggression.  There’s no in-between.  Sacrificing a little liberty for imaginary safety always ends badly.
+
+Today’s mess is a result of Americans accepting option #2, even though the Founders attempted to give us Option #1.
+
+The results are not good.  As our liberties have been eroded our wealth has been consumed.  The wealth we see today is based on debt and a foolish willingness on the part of foreigners to take our dollars for goods and services. They then loan them back to us to perpetuate our debt system.  It’s amazing that it has worked for this long but the impasse in Washington, in solving our problems indicate that many are starting to understand the seriousness of the world -wide debt crisis and the dangers we face. The longer this process continues the harsher the outcome will be.
+
+ 
+
+The Financial Crisis Is a Moral Crisis
+
+Many are now acknowledging that a financial crisis looms but few understand it’s, in reality, a moral crisis.  It’s the moral crisis that has allowed our liberties to be undermined and permits the exponential growth of illegal government power.  Without a clear understanding of the nature of the crisis it will be difficult to prevent a steady march toward tyranny and the poverty that will accompany it.
+
+Ultimately, the people have to decide which form of government they want; option #1 or option #2.  There is no other choice.  Claiming there is a choice of a “little” tyranny is like describing pregnancy as a “touch of pregnancy.”  It is a myth to believe that a mixture of free markets and government central economic planning is a worthy compromise.  What we see today is a result of that type of thinking.  And the results speak for themselves.
+
+ 
+
+A Culture of Violence
+
+American now suffers from a culture of violence.  It’s easy to reject the initiation of violence against one’s neighbor but it’s ironic that the people arbitrarily and freely anoint government officials with monopoly power to initiate violence against the American people—practically at will.
+
+Because it’s the government that initiates force, most people accept it as being legitimate.  Those who exert the force have no sense of guilt.  It is believed by too many that governments are morally justified in initiating force supposedly to “do good.”  They incorrectly believe that this authority has come from the “consent of the people.”  The minority, or victims of government violence never consented to suffer the abuse of government mandates, even when dictated by the majority.  Victims of TSA excesses never consented to this abuse.
+
+This attitude has given us a policy of initiating war to “do good,” as well. It is claimed that war, to prevent war for noble purposes, is justified.  This is similar to what we were once told that:  “destroying a village to save a village” was justified.  It was said by a US Secretary of State that the loss of 500,000 Iraqis, mostly children, in the 1990s, as a result of American bombs and sanctions, was “worth it” to achieve the “good” we brought to the Iraqi people.  And look at the mess that Iraq is in today.
+
+Government use of force to mold social and economic behavior at home and abroad has justified individuals using force on their own terms.  The fact that violence by government is seen as morally justified, is the reason why violence will increase when the big financial crisis hits and becomes a political crisis as well.
+
+First, we recognize that individuals shouldn’t initiate violence, then we give the authority to government.   Eventually, the immoral use of government violence, when things goes badly, will be used to justify an individual’s “right” to do the same thing. Neither the government nor individuals have the moral right to initiate violence against another yet we are moving toward the day when both will claim this authority.  If this cycle is not reversed society will break down.
+
+When needs are pressing, conditions deteriorate and rights become relative to the demands and the whims of the majority.  It’s then not a great leap for individuals to take it upon themselves to use violence to get what they claim is theirs.  As the economy deteriorates and the wealth discrepancies increase—as are already occurring— violence increases as those in need take it in their own hands to get what they believe is theirs.  They will not wait for a government rescue program.
+
+When government officials wield power over others to bail out the special interests, even with disastrous results to the average citizen, they feel no guilt for the harm they do. Those who take us into undeclared wars with many casualties resulting, never lose sleep over the death and destruction their bad decisions caused. They are convinced that what they do is morally justified, and the fact that many suffer   just can’t be helped.
+
+When the street criminals do the same thing, they too have no remorse, believing they are only taking what is rightfully theirs.  All moral standards become relative.  Whether it’s bailouts, privileges, government subsidies or benefits for some from inflating a currency, it’s all part of a process justified by a philosophy of forced redistribution of wealth.  Violence, or a threat of such, is the instrument required and unfortunately is of little concern of most members of Congress.
+
+Some argue it’s only a matter of “fairness” that those in need are cared for. There are two problems with this. First, the principle is used to provide a greater amount of benefits to the rich than the poor. Second, no one seems to be concerned about whether or not it’s fair to those who end up paying for the benefits. The costs are usually placed on the backs of the middle class and are hidden from the public eye. Too many people believe government handouts are free, like printing money out of thin air, and there is no cost. That deception is coming to an end. The bills are coming due and that’s what the economic slowdown is all about.
+
+Sadly, we have become accustomed to living with the illegitimate use of force by government.  It is the tool for telling the people how to live, what to eat and drink, what to read and how to spend their money.
+
+To develop a truly free society, the issue of initiating force must be understood and rejected.  Granting to government even a small amount of force is a dangerous concession.
+
+ 
+
+Limiting Government Excesses vs. a Virtuous Moral People
+
+Our Constitution, which was intended to limit government power and abuse, has failed.  The Founders warned that a free society depends on a virtuous and moral people.  The current crisis reflects that their concerns were justified.
+
+Most politicians and pundits are aware of the problems we face but spend all their time in trying to reform government.  The sad part is that the suggested reforms almost always lead to less freedom and the importance of a virtuous and moral people is either ignored, or not understood. The new reforms serve only to further undermine liberty.  The compounding effect has given us this steady erosion of liberty and the massive expansion of debt.  The real question is: if it is liberty we seek, should most of the emphasis be placed on government reform or trying to understand what “a virtuous and moral people” means and how to promote it. The Constitution has not prevented the people from demanding handouts for both rich and poor in their efforts to reform the government, while ignoring the principles of a free society. All branches of our government today are controlled by individuals who use their power to undermine liberty and enhance the welfare/warfare state-and frequently their own wealth and power.
+
+If the people are unhappy with the government performance it must be recognized that government is merely a reflection of an immoral society that rejected a moral government of constitutional limitations of power and love of freedom.
+
+If this is the problem all the tinkering with thousands of pages of new laws and regulations will do nothing to solve the problem.
+
+It is self-evident that our freedoms have been severely limited and the apparent prosperity we still have, is nothing more than leftover wealth from a previous time.  This fictitious wealth based on debt and benefits from a false trust in our currency and credit, will play havoc with our society when the bills come due.  This means that the full consequence of our lost liberties is yet to be felt.
+
+But that illusion is now ending.  Reversing a downward spiral depends on accepting a new approach.
+
+Expect the rapidly expanding homeschooling movement to play a significant role in the revolutionary reforms needed to build a free society with Constitutional protections. We cannot expect a Federal government controlled school system to provide the intellectual ammunition to combat the dangerous growth of government that threatens our liberties.
+
+The internet will provide the alternative to the government/media complex that controls the news and most political propaganda. This is why it’s essential that the internet remains free of government regulation.
+
+Many of our religious institutions and secular organizations support greater dependency on the state by supporting war, welfare and corporatism and ignore the need for a virtuous people.
+
+I never believed that the world or our country could be made more free by politicians, if the people had no desire for freedom.
+
+Under the current circumstances the most we can hope to achieve in the political process is to use it as a podium to reach the people to alert them of the nature of the crisis and the importance of their need to assume responsibility for themselves, if it is liberty that they truly seek.  Without this, a constitutionally protected free society is impossible.
+
+If this is true, our individual goal in life ought to be for us to seek virtue and excellence and recognize that self-esteem and happiness only comes from using one’s natural ability, in the most productive manner possible, according to one’s own talents.
+
+Productivity and creativity are the true source of personal satisfaction. Freedom, and not dependency, provides the environment needed to achieve these goals. Government cannot do this for us; it only gets in the way. When the government gets involved, the goal becomes a bailout or a subsidy and these cannot provide a sense of  personal achievement.
+
+Achieving legislative power and political influence should not be our goal. Most of the change, if it is to come, will not come from the politicians, but rather from individuals, family, friends, intellectual leaders and our religious institutions.  The solution can only come from rejecting the use of coercion, compulsion, government commands, and aggressive force, to mold social and economic behavior.  Without accepting these restraints, inevitably the consensus will be to allow the government to mandate economic equality and obedience to the politicians who gain power and promote an environment that smothers the freedoms of everyone. It is then that the responsible individuals who seek excellence and self-esteem by being self-reliance and productive, become the true victims.
+
+ 
+
+Conclusion                                                                                                                                                    
+
+What are the greatest dangers that the American people face today and impede the goal of a free society? There are five.
+
+1. The continuous attack on our civil liberties which threatens the rule of law and our ability to resist the onrush of tyranny.               
+
+2. Violent anti-Americanism that has engulfed the world. Because the phenomenon of “blow-back” is not understood or denied, our foreign policy is destined to keep us involved in many wars that we have no business being in. National bankruptcy and a greater threat to our national security will result.                                                         
+
+3. The ease in which we go to war, without a declaration by Congress, but accepting international authority from the UN or NATO even for preemptive wars, otherwise known as aggression.                                        
+
+4. A financial political crisis as a consequence of excessive debt, unfunded liabilities, spending, bailouts, and gross discrepancy in wealth distribution going from the middle class to the rich. The danger of central economic planning, by the Federal Reserve must be understood.                                               
+
+ 5. World government taking over  local and US sovereignty by getting involved in the issues of war, welfare, trade, banking,  a world currency, taxes, property ownership, and private ownership of guns.
+
+Happily, there is an answer for these very dangerous trends.                                                     
+
+What a wonderful world it would be if everyone accepted the simple moral premise of rejecting all acts of aggression.  The retort to such a suggestion is always:  it’s too simplistic, too idealistic, impractical, naïve, utopian, dangerous, and unrealistic to strive for such an ideal.
+
+The answer to that is that for thousands of years the acceptance of government force, to rule over the people, at the sacrifice of liberty, was considered moral and the only available option for achieving peace and prosperity.
+
+What could be more utopian than that myth—considering the results especially looking at the state sponsored killing, by nearly every government during the 20th Century, estimated to be in the hundreds of millions.  It’s time to reconsider this grant of authority to the state.
+
+No good has ever come from granting monopoly power to the state to use aggression against the people to arbitrarily mold human behavior.  Such power, when left unchecked, becomes the seed of an ugly tyranny.  This method of governance has been adequately tested, and the results are in: reality dictates we try liberty.
+
+The idealism of non-aggression and rejecting all offensive use of force should be tried.  The idealism of government sanctioned violence has been abused throughout history and is the primary source of poverty and war.  The theory of a society being based on individual freedom has been around for a long time.  It’s time to take a bold step and actually permit it by advancing this cause, rather than taking a step backwards as some would like us to do.
+
+Today the principle of habeas corpus, established when King John signed the Magna Carta in 1215, is under attack. There’s every reason to believe that a renewed effort with the use of the internet that we can instead advance the cause of liberty by spreading an uncensored message that will serve to rein in government authority and challenge the obsession with war and welfare.
+
+What I’m talking about is a system of government guided by the moral principles of peace and tolerance.
+
+The Founders were convinced that a free society could not exist without a moral people.  Just writing rules won’t work if the people choose to ignore them.  Today the rule of law written in the Constitution has little meaning for most Americans, especially those who work in Washington DC.
+
+Benjamin Franklin claimed “only a virtuous people are capable of freedom.”  John Adams concurred:  “Our Constitution was made for a moral and religious people.  It is wholly inadequate to the government of any other.”
+
+A moral people must reject all violence in an effort to mold people’s beliefs or habits.
+
+A society that boos or ridicules the Golden Rule is not a moral society.  All great religions endorse the Golden Rule.  The same moral standards that individuals are required to follow should apply to all government officials.  They cannot be exempt.
+
+The ultimate solution is not in the hands of the government.
+
+The solution falls on each and every individual, with guidance from family, friends and community.
+
+The #1 responsibility for each of us is to change ourselves with hope that others will follow.  This is of greater importance than working on changing the government; that is secondary to promoting a virtuous society.  If we can achieve this, then the government will change.
+
+It doesn’t mean that political action or holding office has no value. At times it does nudge policy in the right direction. But what is true is that when seeking office is done for personal aggrandizement, money or power, it becomes useless if not harmful. When political action is taken for the right reasons it’s easy to understand why compromise should be avoided. It also becomes clear why progress is best achieved by working with coalitions, which bring people together, without anyone sacrificing his principles.
+
+Political action, to be truly beneficial, must be directed toward changing the hearts and minds of the people, recognizing that it’s the virtue and morality of the people that allow liberty to flourish.
+
+The Constitution or more laws per se, have no value if the people’s attitudes aren’t changed.
+
+To achieve liberty and peace, two powerful human emotions have to be overcome.  Number one is “envy” which leads to hate and class warfare.  Number two is “intolerance” which leads to bigoted and judgmental policies.  These emotions must be replaced with a much better understanding of love, compassion, tolerance and free market economics. Freedom, when understood, brings people together. When tried, freedom is popular.
+
+The problem we have faced over the years has been that economic interventionists are swayed by envy, whereas social interventionists are swayed by intolerance of habits and lifestyles. The misunderstanding that tolerance is an endorsement of certain activities, motivates many to legislate moral standards which should only be set by individuals making their own choices. Both sides use force to deal with these misplaced emotions. Both are authoritarians. Neither endorses voluntarism.  Both views ought to be rejected.
+
+I have come to one firm conviction after these many years of trying to figure out “the plain truth of things.”  The best chance for achieving peace and prosperity, for the maximum number of people world-wide, is to pursue the cause of LIBERTY.
+
+If you find this to be a worthwhile message, spread it throughout the land.

az-hadoop-jobtype-plugin/src/examples/pig-wc/wordcountpig.job 12(+12 -0)

diff --git a/az-hadoop-jobtype-plugin/src/examples/pig-wc/wordcountpig.job b/az-hadoop-jobtype-plugin/src/examples/pig-wc/wordcountpig.job
new file mode 100644
index 0000000..3d0ddbf
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/examples/pig-wc/wordcountpig.job
@@ -0,0 +1,12 @@
+type=pig
+pig.script=src/wordcountpig.pig
+
+user.to.proxy=azkaban
+
+HDFSRoot=/tmp
+
+param.inDataLocal=res/rpfarewell
+param.inData=${HDFSRoot}/${user.to.proxy}/wordcountpigin
+param.outData=${HDFSRoot}/${user.to.proxy}/wordcountpigout
+
+

az-hadoop-jobtype-plugin/src/jobtypes/common.properties 8(+8 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/common.properties b/az-hadoop-jobtype-plugin/src/jobtypes/common.properties
new file mode 100644
index 0000000..988d02f
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/common.properties
@@ -0,0 +1,8 @@
+## everything that the user job can know
+
+hadoop.home=
+#hive.home=
+#pig.home=
+#spark.home=
+
+#azkaban.should.proxy=

az-hadoop-jobtype-plugin/src/jobtypes/commonprivate.properties 28(+28 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/commonprivate.properties b/az-hadoop-jobtype-plugin/src/jobtypes/commonprivate.properties
new file mode 100644
index 0000000..6f0f616
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/commonprivate.properties
@@ -0,0 +1,28 @@
+## hadoop security manager setting common to all hadoop jobs
+hadoop.security.manager.class=azkaban.security.HadoopSecurityManager_H_1_0
+
+## hadoop security related settings
+
+# proxy.keytab.location=
+# proxy.user=
+
+# azkaban.should.proxy=true
+# obtain.binary.token=true
+# obtain.namenode.token=true
+# obtain.jobtracker.token=true
+
+# global classpath items for all jobs. e.g. hadoop-core jar, hadoop conf
+#jobtype.global.classpath=${hadoop.home}/*,${hadoop.home}/conf
+
+# global jvm args for all jobs. e.g. java.io.temp.dir, java.library.path
+#jobtype.global.jvm.args=
+
+# hadoop
+hadoop.home=
+#pig.home=
+#hive.home=
+#spark.home=
+
+# configs for jobtype security settings
+execute.as.user=true
+azkaban.native.lib=

az-hadoop-jobtype-plugin/src/jobtypes/gobblin/plugin.properties 4(+4 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/plugin.properties b/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/plugin.properties
new file mode 100644
index 0000000..4dd1c41
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/plugin.properties
@@ -0,0 +1,4 @@
+mr.listener.visualizer=false
+
+mapreduce.job.user.classpath.first=true
+mapreduce.user.classpath.first=true
\ No newline at end of file

az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/common.properties 25(+25 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/common.properties b/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/common.properties
new file mode 100644
index 0000000..5ca1e28
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/common.properties
@@ -0,0 +1,25 @@
+#####################################
+## Azkaban common for Gobblin
+#####################################
+mr.job.root.dir=${gobblin.work_dir}
+
+state.store.fs.uri=${fs.uri}
+state.store.dir=${gobblin.work_dir}/state-store
+
+source.filebased.fs.uri=${fs.uri}
+
+## Writer related configuration properties
+writer.fs.uri=${fs.uri}
+writer.staging.dir=${gobblin.work_dir}/task-staging
+writer.output.dir=${gobblin.work_dir}/task-output
+
+## Directory where error files from the quality checkers are stored
+qualitychecker.row.err.file=${gobblin.work_dir}/err
+
+## Job lock is not needed as Azkaban takes care of it
+job.lock.enabled=false
+
+# Directory where metrics log files are stored
+metrics.log.dir=${gobblin.work_dir}/metrics
+
+job.commit.policy=full
\ No newline at end of file

az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/hdfsToMysql.properties 26(+26 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/hdfsToMysql.properties b/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/hdfsToMysql.properties
new file mode 100644
index 0000000..3722451
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/hdfsToMysql.properties
@@ -0,0 +1,26 @@
+####################################
+# HDFS -> MySQL connector common
+####################################
+source.max.number.of.partitions=1
+mr.job.max.mappers=1
+
+taskexecutor.threadpool.size=1
+taskretry.threadpool.coresize=1
+taskretry.threadpool.maxsize=1
+
+# Source properties
+source.class=gobblin.source.extractor.hadoop.AvroFileSource
+
+# Converter properties - Record from source will be processed by the below series of converters
+converter.classes=gobblin.converter.jdbc.AvroToJdbcEntryConverter
+
+qualitychecker.task.policies=gobblin.policies.count.RowCountPolicy
+#AvroFileSource does not provide expected row count.
+qualitychecker.task.policy.types=OPTIONAL
+
+writer.builder.class=gobblin.writer.JdbcWriterBuilder
+writer.destination.type=MYSQL
+writer.jdbc.batch_size=1000
+
+data.publisher.type=gobblin.publisher.JdbcPublisher
+jdbc.publisher.driver=com.mysql.jdbc.Driver
\ No newline at end of file

az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/mysqlToHdfs.properties 46(+46 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/mysqlToHdfs.properties b/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/mysqlToHdfs.properties
new file mode 100644
index 0000000..215d39a
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/presets/mysqlToHdfs.properties
@@ -0,0 +1,46 @@
+####################################
+# MySQL -> HDFS connector common
+####################################
+
+mr.job.max.mappers=1
+taskexecutor.threadpool.size=1
+taskretry.threadpool.coresize=1
+taskretry.threadpool.maxsize=1
+
+# Source properties
+source.class=gobblin.source.extractor.extract.jdbc.MysqlSource
+source.max.number.of.partitions=1
+source.conn.driver=com.mysql.jdbc.Driver
+source.conn.timeout=500000
+source.conn.port=3306
+
+#Timezone table http://joda-time.sourceforge.net/timezones.html
+source.timezone=America/Los_Angeles
+
+source.querybased.is.compression.enabled=true
+
+#Only applicable for source.querybased.extract.type=snapshot
+source.querybased.low.watermark.backup.secs=0
+# Start value is required for more than one partition
+source.querybased.start.value=19700101000000
+source.querybased.watermark.type=timestamp
+source.max.number.of.partitions=1
+source.querybased.partition.interval=1
+
+# file name of output data
+extract.namespace=${source.querybased.schema}
+
+# Converter properties - Record from source will be processed by the below series of converters
+converter.classes=gobblin.converter.avro.JsonIntermediateToAvroConverter
+converter.avro.timestamp.format=yyyy-MM-dd HH:mm:ss'.0'
+converter.avro.date.format=yyyy-MM-dd
+converter.avro.time.format=HH:mm:ss
+
+qualitychecker.task.policies=gobblin.policies.count.RowCountPolicy
+qualitychecker.task.policy.types=FAIL
+
+writer.destination.type=HDFS
+writer.output.format=AVRO
+writer.file.path=${source.entity}
+
+data.publisher.type=gobblin.publisher.BaseDataPublisher
\ No newline at end of file

az-hadoop-jobtype-plugin/src/jobtypes/gobblin/private.properties 9(+9 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/private.properties b/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/private.properties
new file mode 100644
index 0000000..998a6b4
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/gobblin/private.properties
@@ -0,0 +1,9 @@
+jobtype.class=azkaban.jobtype.connectors.gobblin.GobblinHadoopJob
+
+jobtype.classpath=${plugin.dir}/lib/*
+gobblin.config.preset.dir=${plugin.dir}/presets
+
+job.hdfs.jars=/resources/gobblin
+
+#Gobblin file system URI TODO Can we make this cluster independent?
+fs.uri=hdfs://localhost:9000
\ No newline at end of file

az-hadoop-jobtype-plugin/src/jobtypes/hadoopJava/plugin.properties 1(+1 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/hadoopJava/plugin.properties b/az-hadoop-jobtype-plugin/src/jobtypes/hadoopJava/plugin.properties
new file mode 100644
index 0000000..443322b
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/hadoopJava/plugin.properties
@@ -0,0 +1 @@
+mr.listener.visualizer=false

az-hadoop-jobtype-plugin/src/jobtypes/hadoopJava/private.properties 1(+1 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/hadoopJava/private.properties b/az-hadoop-jobtype-plugin/src/jobtypes/hadoopJava/private.properties
new file mode 100644
index 0000000..610fe75
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/hadoopJava/private.properties
@@ -0,0 +1 @@
+jobtype.class=azkaban.jobtype.HadoopJavaJob

az-hadoop-jobtype-plugin/src/jobtypes/hadoopShell/plugin.properties 1(+1 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/hadoopShell/plugin.properties b/az-hadoop-jobtype-plugin/src/jobtypes/hadoopShell/plugin.properties
new file mode 100644
index 0000000..7105354
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/hadoopShell/plugin.properties
@@ -0,0 +1 @@
+hadoop.global.opts=-Dazkaban.link.attempt.url=${azkaban.link.attempt.url} -Dazkaban.link.job.url=${azkaban.link.job.url} -Dazkaban.link.execution.url=${azkaban.link.execution.url}

az-hadoop-jobtype-plugin/src/jobtypes/hadoopShell/private.properties 3(+3 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/hadoopShell/private.properties b/az-hadoop-jobtype-plugin/src/jobtypes/hadoopShell/private.properties
new file mode 100644
index 0000000..d4dfba6
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/hadoopShell/private.properties
@@ -0,0 +1,3 @@
+jobtype.class=azkaban.jobtype.HadoopShell
+
+command.blacklist.regex=(?i).*kinit.*

az-hadoop-jobtype-plugin/src/jobtypes/hive/plugin.properties 6(+6 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/hive/plugin.properties b/az-hadoop-jobtype-plugin/src/jobtypes/hive/plugin.properties
new file mode 100755
index 0000000..edefbc9
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/hive/plugin.properties
@@ -0,0 +1,6 @@
+
+hive.aux.jars.path=${hive.home}/aux/lib
+
+hive.jvm.args=-Dhive.querylog.location=. -Dhive.exec.scratchdir=/tmp/hive-${user.to.proxy} -Dhive.aux.jars.path=${hive.aux.jars.path}
+jobtype.jvm.args=${hive.jvm.args}
+

az-hadoop-jobtype-plugin/src/jobtypes/hive/private.properties 10(+10 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/hive/private.properties b/az-hadoop-jobtype-plugin/src/jobtypes/hive/private.properties
new file mode 100755
index 0000000..101dc5c
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/hive/private.properties
@@ -0,0 +1,10 @@
+jobtype.classpath=${hadoop.home}/conf,${hadoop.home}/lib/*,${hive.home}/lib/*,${hive.home}/conf,${hive.aux.jar.path}
+jobtype.class=azkaban.jobtype.HadoopHiveJob
+
+hive.aux.jar.path=${hive.home}/aux/lib
+
+## definitely set hive home otherwise we dont have conf
+
+#jobtype.global.classpath=${hive.classpath.items}
+#hive.classpath.items=${hive.home}/lib/*,${hive.home}/conf/,${hive.aux.jars.path}/*,${hadoop.home}/*,${hadoop.home}/lib/*,${hadoop.home}/conf/
+

az-hadoop-jobtype-plugin/src/jobtypes/java/private.properties 2(+2 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/java/private.properties b/az-hadoop-jobtype-plugin/src/jobtypes/java/private.properties
new file mode 100644
index 0000000..68e22e7
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/java/private.properties
@@ -0,0 +1,2 @@
+jobtype.class=azkaban.jobtype.JavaJob
+

az-hadoop-jobtype-plugin/src/jobtypes/pig-0.11.0/plugin.properties 4(+4 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.11.0/plugin.properties b/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.11.0/plugin.properties
new file mode 100644
index 0000000..b886d4b
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.11.0/plugin.properties
@@ -0,0 +1,4 @@
+pig.listener.visualizer=false
+
+jobtype.classpath=${pig.home}/lib/*,${pig.home}/*
+pig.home=
\ No newline at end of file

az-hadoop-jobtype-plugin/src/jobtypes/pig-0.11.0/private.properties 10(+10 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.11.0/private.properties b/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.11.0/private.properties
new file mode 100644
index 0000000..80ec7a7
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.11.0/private.properties
@@ -0,0 +1,10 @@
+#assume lib/* has everything pig needs
+
+jobtype.classpath=${hadoop.home}/conf,${hadoop.home}/lib/*,lib/*
+jobtype.class=azkaban.jobtype.HadoopPigJob
+
+
+## preregister and imports
+#udf.import.list=
+#pig.additional.jars=
+

az-hadoop-jobtype-plugin/src/jobtypes/pig-0.12.0/plugin.properties 4(+4 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.12.0/plugin.properties b/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.12.0/plugin.properties
new file mode 100644
index 0000000..b886d4b
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.12.0/plugin.properties
@@ -0,0 +1,4 @@
+pig.listener.visualizer=false
+
+jobtype.classpath=${pig.home}/lib/*,${pig.home}/*
+pig.home=
\ No newline at end of file

az-hadoop-jobtype-plugin/src/jobtypes/pig-0.12.0/private.properties 7(+7 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.12.0/private.properties b/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.12.0/private.properties
new file mode 100644
index 0000000..913d733
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/pig-0.12.0/private.properties
@@ -0,0 +1,7 @@
+#assume lib/* has everything pig needs
+jobtype.classpath=${hadoop.home}/conf,${hadoop.home}/lib/*,lib/*
+jobtype.class=azkaban.jobtype.HadoopPigJob
+
+## preregister and imports
+#udf.import.list=
+#pig.additional.jars=
\ No newline at end of file

az-hadoop-jobtype-plugin/src/jobtypes/spark/plugin.properties 1(+1 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/spark/plugin.properties b/az-hadoop-jobtype-plugin/src/jobtypes/spark/plugin.properties
new file mode 100644
index 0000000..370bbf3
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/spark/plugin.properties
@@ -0,0 +1 @@
+queue=default

az-hadoop-jobtype-plugin/src/jobtypes/spark/private.properties 3(+3 -0)

diff --git a/az-hadoop-jobtype-plugin/src/jobtypes/spark/private.properties b/az-hadoop-jobtype-plugin/src/jobtypes/spark/private.properties
new file mode 100644
index 0000000..5348348
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/jobtypes/spark/private.properties
@@ -0,0 +1,3 @@
+jobtype.class=azkaban.jobtype.HadoopSparkJob
+
+jobtype.classpath=${hadoop.classpath}:${spark.home}/conf:${spark.home}/lib/*

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/AzkabanPigListener.java 323(+323 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/AzkabanPigListener.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/AzkabanPigListener.java
new file mode 100644
index 0000000..196872b
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/AzkabanPigListener.java
@@ -0,0 +1,323 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import java.io.IOException;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Queue;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobID;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.TaskReport;
+import org.apache.log4j.Logger;
+import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper;
+import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
+import org.apache.pig.impl.plan.OperatorKey;
+import org.apache.pig.tools.pigstats.JobStats;
+import org.apache.pig.tools.pigstats.OutputStats;
+import org.apache.pig.tools.pigstats.PigProgressNotificationListener;
+import org.apache.pig.tools.pigstats.PigStats;
+import org.apache.pig.tools.pigstats.ScriptState;
+
+import azkaban.jobtype.pig.PigJobDagNode;
+import azkaban.jobtype.pig.PigJobStats;
+import azkaban.utils.JSONUtils;
+import azkaban.utils.Props;
+
+public class AzkabanPigListener implements PigProgressNotificationListener {
+  private static Logger logger = Logger.getLogger(AzkabanPigListener.class);
+  private String statsFile;
+
+  private Map<String, PigJobDagNode> dagNodeNameMap =
+      new HashMap<String, PigJobDagNode>();
+  private Map<String, PigJobDagNode> dagNodeJobIdMap =
+      new HashMap<String, PigJobDagNode>();
+  private Set<String> completedJobIds = new HashSet<String>();
+
+  public AzkabanPigListener(Props props) {
+    statsFile = props.getString("azkaban.job.attachment.file");
+  }
+
+  @Override
+  public void initialPlanNotification(String scriptId, MROperPlan plan) {
+    logger.info("**********initialPlanNotification!**********");
+
+    // First pass: generate dagNodeNameMap.
+    Map<OperatorKey, MapReduceOper> planKeys = plan.getKeys();
+    for (Map.Entry<OperatorKey, MapReduceOper> entry : planKeys.entrySet()) {
+      String nodeName = entry.getKey().toString();
+      String[] aliases =
+          toArray(ScriptState.get().getAlias(entry.getValue()).trim());
+      String[] features =
+          toArray(ScriptState.get().getPigFeature(entry.getValue()).trim());
+
+      PigJobDagNode node = new PigJobDagNode(nodeName, aliases, features);
+      this.dagNodeNameMap.put(node.getName(), node);
+
+      // This shows how we can get the basic info about all nameless jobs
+      // before any execute. We can traverse the plan to build a DAG of this
+      // info.
+      logger.info("initialPlanNotification: aliases: "
+          + StringUtils.join(aliases, ",") + ", name: " + node.getName()
+          + ", features: " + StringUtils.join(features, ","));
+    }
+
+    // Second pass: connect the edges
+    for (Map.Entry<OperatorKey, MapReduceOper> entry : planKeys.entrySet()) {
+      PigJobDagNode node = this.dagNodeNameMap.get(entry.getKey().toString());
+      List<String> successorNodeList = new ArrayList<String>();
+      List<MapReduceOper> successors = plan.getSuccessors(entry.getValue());
+      if (successors != null) {
+        for (MapReduceOper successor : successors) {
+          PigJobDagNode successorNode =
+              this.dagNodeNameMap.get(successor.getOperatorKey().toString());
+          successorNodeList.add(successorNode.getName());
+          successorNode.addParent(node);
+        }
+      }
+      node.setSuccessors(successorNodeList);
+    }
+
+    // Third pass: find roots.
+    Queue<PigJobDagNode> parentQueue = new LinkedList<PigJobDagNode>();
+    Queue<PigJobDagNode> childQueue = new LinkedList<PigJobDagNode>();
+    for (Map.Entry<String, PigJobDagNode> entry : this.dagNodeNameMap
+        .entrySet()) {
+      PigJobDagNode node = entry.getValue();
+      if (node.getParents().isEmpty()) {
+        node.setLevel(0);
+        parentQueue.add(node);
+      }
+    }
+
+    // Final pass: BFS to set levels.
+    int level = 0;
+    Set<PigJobDagNode> visited = new HashSet<PigJobDagNode>();
+    while (parentQueue.peek() != null) {
+      PigJobDagNode node = null;
+      while ((node = parentQueue.poll()) != null) {
+        if (visited.contains(node)) {
+          continue;
+        }
+        node.setLevel(level);
+        for (String jobName : node.getSuccessors()) {
+          PigJobDagNode successorNode = this.dagNodeNameMap.get(jobName);
+          childQueue.add(successorNode);
+        }
+      }
+
+      Queue<PigJobDagNode> tmp = childQueue;
+      childQueue = parentQueue;
+      parentQueue = tmp;
+      ++level;
+    }
+
+    updateJsonFile();
+  }
+
+  private Object buildJobStatsJson() {
+    List<Object> jsonObj = new ArrayList<Object>();
+    for (Map.Entry<String, PigJobDagNode> entry : dagNodeJobIdMap.entrySet()) {
+      Map<String, Object> jobJsonObj = new HashMap<String, Object>();
+      PigJobDagNode node = entry.getValue();
+      jobJsonObj.put("name", node.getName());
+      jobJsonObj.put("jobId", node.getJobId());
+      jobJsonObj.put("parents", node.getParents());
+      jobJsonObj.put("successors", node.getSuccessors());
+      jobJsonObj.put("level", Integer.toString(node.getLevel()));
+      jobJsonObj.put("aliases", node.getAliases());
+      jobJsonObj.put("features", node.getFeatures());
+      PigJobStats pigStats = node.getJobStats();
+      if (pigStats != null) {
+        jobJsonObj.put("pigStats", pigStats.toJson());
+      }
+      jobJsonObj.put("state", node.getMapReduceJobState().toJson());
+      jobJsonObj.put("conf",
+          StatsUtils.propertiesToJson(node.getJobConfiguration()));
+      jsonObj.add(jobJsonObj);
+    }
+    return jsonObj;
+  }
+
+  private void updateJsonFile() {
+    File file = null;
+    try {
+      file = new File(statsFile);
+      JSONUtils.toJSON(buildJobStatsJson(), file);
+    } catch (Exception e) {
+      logger.error("Couldn't write stats file", e);
+    }
+  }
+
+  @Override
+  public void jobFailedNotification(String scriptId, JobStats stats) {
+    if (stats.getJobId() == null) {
+      logger.warn("jobId for failed job not found. This should only happen "
+          + "in local mode");
+      return;
+    }
+
+    PigJobDagNode node = dagNodeJobIdMap.get(stats.getJobId());
+    if (node == null) {
+      logger.warn("Unrecognized jobId reported for failed job: "
+          + stats.getJobId());
+      return;
+    }
+
+    addCompletedJobStats(node, stats);
+    updateJsonFile();
+  }
+
+  @Override
+  public void jobFinishedNotification(String scriptId, JobStats stats) {
+    PigJobDagNode node = dagNodeJobIdMap.get(stats.getJobId());
+    if (node == null) {
+      logger.warn("Unrecognized jobId reported for succeeded job: "
+          + stats.getJobId());
+      return;
+    }
+    addCompletedJobStats(node, stats);
+    updateJsonFile();
+  }
+
+  @Override
+  public void jobStartedNotification(String scriptId, String assignedJobId) {
+    logger.info("**********jobStartedNotification**********");
+    PigStats.JobGraph jobGraph = PigStats.get().getJobGraph();
+    logger.info("jobStartedNotification - jobId " + assignedJobId
+        + ", jobGraph:\n" + jobGraph);
+
+    // For each job in the graph, check if the stats for a job with this name
+    // is found. If so, look up it's scope and bind the jobId to the
+    // PigJobDagNode
+    // with the same scope.
+    for (JobStats jobStats : jobGraph) {
+      if (assignedJobId.equals(jobStats.getJobId())) {
+        logger.info("jobStartedNotification - scope " + jobStats.getName()
+            + " is jobId " + assignedJobId);
+        PigJobDagNode node = this.dagNodeNameMap.get(jobStats.getName());
+
+        if (node == null) {
+          logger
+              .warn("jobStartedNotification - unrecognized operator name "
+                  + "found (" + jobStats.getName() + ") for jobId "
+                  + assignedJobId);
+        } else {
+          node.setJobId(assignedJobId);
+          addMapReduceJobState(node);
+          dagNodeJobIdMap.put(node.getJobId(), node);
+          updateJsonFile();
+        }
+      }
+    }
+  }
+
+  @Override
+  public void jobsSubmittedNotification(String arg0, int arg1) {
+    logger.info("jobSubmittedNotification");
+    logger.info("The script id is " + arg0);
+    logger.info(arg1 + " jobs submitted.");
+  }
+
+  @Override
+  public void launchCompletedNotification(String arg0, int arg1) {
+    logger.info("launchCompletedNotification");
+    logger.info("The script id is " + arg0);
+    logger.info("Finished " + arg1 + " jobs successfully");
+  }
+
+  @Override
+  public void launchStartedNotification(String arg0, int arg1) {
+    logger.info("lanchStartedNotification");
+    logger.info("launching script " + arg0);
+    logger.info("launching " + arg1 + " mr jobs");
+  }
+
+  @Override
+  public void outputCompletedNotification(String arg0, OutputStats arg1) {
+    logger.info("outputCompletedNotification");
+    logger.info("The script id is " + arg0);
+    logger.info("The output stat name is " + arg1.getName());
+    logger.info("You can get a lot more useful information here.");
+  }
+
+  @Override
+  public void progressUpdatedNotification(String scriptId, int progress) {
+    // Then for each running job, we report the job progress.
+    for (PigJobDagNode node : dagNodeNameMap.values()) {
+      // Don't send progress events for unstarted jobs.
+      if (node.getJobId() == null) {
+        continue;
+      }
+      addMapReduceJobState(node);
+      // Only push job progress events for a completed job once.
+      if (node.getMapReduceJobState() != null
+          && !completedJobIds.contains(node.getJobId())) {
+        if (node.getMapReduceJobState().isComplete()) {
+          completedJobIds.add(node.getJobId());
+        }
+      }
+    }
+    updateJsonFile();
+  }
+
+  private static String[] toArray(String string) {
+    return string == null ? new String[0] : string.trim().split(",");
+  }
+
+  @SuppressWarnings("deprecation")
+  private void addMapReduceJobState(PigJobDagNode node) {
+    JobClient jobClient = PigStats.get().getJobClient();
+
+    try {
+      RunningJob runningJob = jobClient.getJob(node.getJobId());
+      if (runningJob == null) {
+        logger.warn("Couldn't find job status for jobId=" + node.getJobId());
+        return;
+      }
+
+      JobID jobID = runningJob.getID();
+      TaskReport[] mapTaskReport = jobClient.getMapTaskReports(jobID);
+      TaskReport[] reduceTaskReport = jobClient.getReduceTaskReports(jobID);
+      node.setMapReduceJobState(new MapReduceJobState(runningJob,
+          mapTaskReport, reduceTaskReport));
+
+      if (node.getJobConfiguration() == null) {
+        Properties jobConfProperties = StatsUtils.getJobConf(runningJob);
+        if (jobConfProperties != null && jobConfProperties.size() > 0) {
+          node.setJobConfiguration(jobConfProperties);
+        }
+      }
+    } catch (IOException e) {
+      logger.error("Error getting job info.", e);
+    }
+  }
+
+  private void addCompletedJobStats(PigJobDagNode node, JobStats stats) {
+    node.setJobStats(stats);
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinConstants.java 28(+28 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinConstants.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinConstants.java
new file mode 100644
index 0000000..cfc1bbd
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinConstants.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2014-2016 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.connectors.gobblin;
+
+/**
+ * Set of Gobblin constants
+ */
+public interface GobblinConstants {
+
+  public static final String GOBBLIN_PRESET_DIR_KEY = "gobblin.config.preset.dir"; //Directory where preset file lies.
+  public static final String GOBBLIN_PRESET_KEY = "gobblin.config_preset"; //Name of Gobblin preset
+  public static final String GOBBLIN_WORK_DIRECTORY_KEY = "gobblin.work_dir"; //Gobblin needs working directory. This will be a HDFS directory.
+  public static final String GOBBLIN_PROPERTIES_HELPER_ENABLED_KEY = "gobblin.properties_helper_enabled"; //Validates Gobblin job properties if enabled.
+  public static final String GOBBLIN_HDFS_JOB_JARS_KEY = "job.hdfs.jars";
+  public static final String GOBBLIN_JOB_JARS_KEY = "job.jars";
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinHadoopJob.java 218(+218 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinHadoopJob.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinHadoopJob.java
new file mode 100644
index 0000000..105e7b7
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinHadoopJob.java
@@ -0,0 +1,218 @@
+/*
+ * Copyright 2014-2016 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.connectors.gobblin;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Map;
+import java.util.Objects;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.log4j.Logger;
+
+import com.google.common.collect.Maps;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Predicate;
+
+import azkaban.flow.CommonJobProperties;
+import azkaban.jobtype.HadoopJavaJob;
+import azkaban.jobtype.connectors.gobblin.helper.HdfsToMySqlValidator;
+import azkaban.jobtype.connectors.gobblin.helper.IPropertiesValidator;
+import azkaban.jobtype.connectors.gobblin.helper.MySqlToHdfsValidator;
+import azkaban.utils.Props;
+
+/**
+ * Integration Azkaban with Gobblin. It prepares job properties for Gobblin and utilizes HadoopJavaJob to kick off the job
+ */
+public class GobblinHadoopJob extends HadoopJavaJob {
+  private static final String GOBBLIN_PRESET_COMMON_PROPERTIES_FILE_NAME = "common.properties";
+  private static final String GOBBLIN_QUERY_KEY = "source.querybased.query";
+  private static volatile Map<GobblinPresets, Properties> gobblinPresets;
+
+
+  public GobblinHadoopJob(String jobid, Props sysProps, Props jobProps, Logger log) {
+    super(jobid, sysProps, jobProps, log);
+    initializePresets();
+
+    jobProps.put(HadoopJavaJob.JOB_CLASS, "gobblin.azkaban.AzkabanJobLauncher");
+    jobProps.put("job.name", jobProps.get(CommonJobProperties.JOB_ID));
+    jobProps.put("launcher.type", "MAPREDUCE"); //Azkaban only supports MR mode
+    jobProps.put("fs.uri", sysProps.get("fs.uri")); //Azkaban should only support HDFS
+
+    //If gobblin jars are in HDFS pass HDFS path to Gobblin, otherwise pass local file system path.
+    if (sysProps.containsKey(GobblinConstants.GOBBLIN_HDFS_JOB_JARS_KEY)) {
+      jobProps.put(GobblinConstants.GOBBLIN_HDFS_JOB_JARS_KEY, sysProps.getString(GobblinConstants.GOBBLIN_HDFS_JOB_JARS_KEY));
+    } else {
+      jobProps.put(GobblinConstants.GOBBLIN_JOB_JARS_KEY, sysProps.get("jobtype.classpath"));
+    }
+
+    loadPreset();
+    transformProperties();
+    getLog().info("Job properties for Gobblin: " + printableJobProperties(jobProps));
+  }
+
+  /**
+   * Factory method that provides IPropertiesValidator based on preset in runtime.
+   * Using factory method pattern as it is expected to grow.
+   * @param preset
+   * @return IPropertiesValidator
+   */
+  private static IPropertiesValidator getValidator(GobblinPresets preset) {
+    Objects.requireNonNull(preset);
+    switch (preset) {
+      case MYSQL_TO_HDFS:
+        return new MySqlToHdfsValidator();
+      case HDFS_TO_MYSQL:
+        return new HdfsToMySqlValidator();
+      default:
+        throw new UnsupportedOperationException("Preset " + preset + " is not supported");
+    }
+  }
+
+  /**
+   * Print the job properties except property key contains "pass" and "word".
+   * @param jobProps
+   */
+  @VisibleForTesting
+  Map<String, String> printableJobProperties(Props jobProps) {
+    Predicate<String> keyPredicate = new Predicate<String>() {
+
+      @Override
+      public boolean apply(String key) {
+        if (StringUtils.isEmpty(key)) {
+          return true;
+        }
+        key = key.toLowerCase();
+        return !(key.contains("pass") && key.contains("word"));
+      }
+
+    };
+    return Maps.filterKeys(jobProps.getFlattened(), keyPredicate);
+  }
+
+  /**
+   * Initializes presets and cache it into preset map. As presets do not change while server is up,
+   * this initialization happens only once per JVM.
+   */
+  private void initializePresets() {
+    if (gobblinPresets == null) {
+      synchronized (GobblinHadoopJob.class) {
+        if (gobblinPresets == null) {
+          gobblinPresets = Maps.newHashMap();
+          String gobblinPresetDirName = sysProps.getString(GobblinConstants.GOBBLIN_PRESET_DIR_KEY);
+          File gobblinPresetDir = new File(gobblinPresetDirName);
+          File[] presetFiles = gobblinPresetDir.listFiles();
+          if (presetFiles == null) {
+            return;
+          }
+
+          File commonPropertiesFile = new File(gobblinPresetDir, GOBBLIN_PRESET_COMMON_PROPERTIES_FILE_NAME);
+          if (!commonPropertiesFile.exists()) {
+            throw new IllegalStateException("Gobbline preset common properties file is missing "
+                + commonPropertiesFile.getAbsolutePath());
+          }
+
+          for (File f : presetFiles) {
+            if (GOBBLIN_PRESET_COMMON_PROPERTIES_FILE_NAME.equals(f.getName())) { //Don't load common one itself.
+              continue;
+            }
+
+            if (f.isFile()) {
+              Properties prop = new Properties();
+              try (InputStream commonIs = new BufferedInputStream(new FileInputStream(commonPropertiesFile));
+                  InputStream presetIs = new BufferedInputStream(new FileInputStream(f))) {
+                prop.load(commonIs);
+                prop.load(presetIs);
+
+                String presetName = f.getName().substring(0, f.getName().lastIndexOf('.')); //remove extension from the file name
+                gobblinPresets.put(GobblinPresets.fromName(presetName), prop);
+              } catch (IOException e) {
+                throw new RuntimeException(e);
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * If input parameter has preset value, it will load set of properties into job property for the Gobblin job.
+   * Also, if user wants to validates the job properties(enabled by default), it will validate it based on the preset where
+   * preset is basically used as a proxy to the use case.
+   */
+  private void loadPreset() {
+    String presetName = jobProps.get(GobblinConstants.GOBBLIN_PRESET_KEY);
+    if (presetName == null) {
+      return;
+    }
+
+    GobblinPresets preset = GobblinPresets.fromName(presetName);
+    Properties presetProperties = gobblinPresets.get(preset);
+    if (presetProperties == null) {
+      throw new IllegalArgumentException("Preset " + presetName + " is not supported. Supported presets: "
+          + gobblinPresets.keySet());
+    }
+
+    getLog().info("Loading preset " + presetName + " : " + presetProperties);
+    Map<String, String> skipped = Maps.newHashMap();
+    for (String key : presetProperties.stringPropertyNames()) {
+      if (jobProps.containsKey(key)) {
+        skipped.put(key, presetProperties.getProperty(key));
+        continue;
+      }
+      jobProps.put(key, presetProperties.getProperty(key));
+    }
+    getLog().info("Loaded preset " + presetName);
+    if (!skipped.isEmpty()) {
+      getLog().info("Skipped some properties from preset as already exists in job properties. Skipped: " + skipped);
+    }
+
+    if (jobProps.getBoolean(GobblinConstants.GOBBLIN_PROPERTIES_HELPER_ENABLED_KEY, true)) {
+      getValidator(preset).validate(jobProps);
+    }
+  }
+
+  /**
+   * Transform property to make it work for Gobblin.
+   *
+   * e.g: Gobblin fails when there's semicolon in SQL query as it just appends " and 1=1;" into the query,
+   * making the syntax incorrect and fails. As having semicolon is a correct syntax, instead of expecting user to remove it,
+   * Azkaban will remove it for user to make it work with Gobblin.
+   */
+  private void transformProperties() {
+    //Gobblin does not accept the SQL query ends with semi-colon
+    String query = jobProps.getString(GOBBLIN_QUERY_KEY, null);
+    if(query == null) {
+      return;
+    }
+
+    query = query.trim();
+    int idx = -1;
+    if ((idx = query.indexOf(';')) >= 0) {
+      if(idx < query.length() - 1) {
+        //Query string has been already trimmed and if index is not end of the query String,
+        //it means there's more than one statement.
+        throw new IllegalArgumentException(GOBBLIN_QUERY_KEY + " should consist of one SELECT statement. " + query);
+      }
+      query = query.substring(0, idx);
+      jobProps.put(GOBBLIN_QUERY_KEY, query);
+    }
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinPresets.java 53(+53 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinPresets.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinPresets.java
new file mode 100644
index 0000000..57758bc
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/GobblinPresets.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2014-2016 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.connectors.gobblin;
+
+import java.util.Map;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Maps;
+
+
+/**
+ * An enum for GobblinPresets. Gobblin has more than hundred properties and GobblinPresets represents set of default properties.
+ * Using GobblinPresets, user can reduce number of input parameters which consequently increase usability.
+ */
+public enum GobblinPresets {
+  MYSQL_TO_HDFS("mysqlToHdfs"),
+  HDFS_TO_MYSQL("hdfsToMysql");
+
+  private static final Map<String, GobblinPresets> NAME_TO_PRESET;
+  static {
+    Map<String, GobblinPresets> tmp = Maps.newHashMap();
+    for (GobblinPresets preset : GobblinPresets.values()) {
+      tmp.put(preset.name, preset);
+    }
+    NAME_TO_PRESET = ImmutableMap.copyOf(tmp);
+  }
+
+  private final String name;
+
+  private GobblinPresets(String name) {
+    this.name = name;
+  }
+
+  public static GobblinPresets fromName(String name) {
+    GobblinPresets preset = NAME_TO_PRESET.get(name);
+    if (preset == null) {
+      throw new IllegalArgumentException(name + " is unrecognized. Known presets: " + NAME_TO_PRESET.keySet());
+    }
+    return preset;
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/HdfsToMySqlValidator.java 21(+21 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/HdfsToMySqlValidator.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/HdfsToMySqlValidator.java
new file mode 100644
index 0000000..0e694e2
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/HdfsToMySqlValidator.java
@@ -0,0 +1,21 @@
+package azkaban.jobtype.connectors.gobblin.helper;
+
+import azkaban.jobtype.connectors.gobblin.GobblinConstants;
+import azkaban.jobtype.javautils.ValidationUtils;
+import azkaban.utils.Props;
+
+public class HdfsToMySqlValidator implements IPropertiesValidator {
+
+  @Override
+  public void validate(Props props) {
+    ValidationUtils.validateAllNotEmpty(props
+        , GobblinConstants.GOBBLIN_WORK_DIRECTORY_KEY
+        , "jdbc.publisher.database_name" //Database
+        , "jdbc.publisher.table_name"    //Table
+        , "jdbc.publisher.username"
+        , "jdbc.publisher.password"
+        , "jdbc.publisher.url"
+        , "extract.table.type" //snapshot_only, append_only, snapshot_append
+        , "source.filebased.data.directory");
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/IPropertiesValidator.java 32(+32 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/IPropertiesValidator.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/IPropertiesValidator.java
new file mode 100644
index 0000000..a6aee51
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/IPropertiesValidator.java
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2014-2016 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.connectors.gobblin.helper;
+
+import azkaban.utils.Props;
+
+
+/**
+ * Interface of Gobblin properties validator
+ */
+public interface IPropertiesValidator {
+
+  /**
+   * Validates props.
+   * @param props
+   * @throws UndefinedPropertyException if required property is missing
+   * @throws IllegalArgumentException if property is set incorrectly
+   */
+  public void validate(Props props);
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/MySqlToHdfsValidator.java 44(+44 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/MySqlToHdfsValidator.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/MySqlToHdfsValidator.java
new file mode 100644
index 0000000..c030351
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/connectors/gobblin/helper/MySqlToHdfsValidator.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2014-2016 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.connectors.gobblin.helper;
+
+import azkaban.jobtype.connectors.gobblin.GobblinConstants;
+import azkaban.jobtype.javautils.ValidationUtils;
+import azkaban.utils.Props;
+
+/**
+ * Property validator for preset mySqlToHdfs
+ */
+public class MySqlToHdfsValidator implements IPropertiesValidator {
+
+  @Override
+  public void validate(Props props) {
+
+    ValidationUtils.validateAllNotEmpty(props, GobblinConstants.GOBBLIN_WORK_DIRECTORY_KEY
+                                             , "source.querybased.schema" //Database
+                                             , "source.entity"            //Table
+                                             , "source.conn.host"
+                                             , "source.conn.port"
+                                             , "source.conn.username"
+                                             , "source.conn.password"
+                                             , "source.timezone"
+                                             , "extract.table.type" //snapshot_only, append_only, snapshot_append
+                                             , "data.publisher.final.dir"); //Output directory
+
+    //Validate parameters for watermark
+    ValidationUtils.validateAllOrNone(props, "source.querybased.extract.type"
+                                           , "extract.delta.fields");
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/examples/java/WordCount.java 133(+133 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/examples/java/WordCount.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/examples/java/WordCount.java
new file mode 100644
index 0000000..b617765
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/examples/java/WordCount.java
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.examples.java;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
+import org.apache.log4j.Logger;
+
+import azkaban.jobtype.javautils.AbstractHadoopJob;
+import azkaban.utils.Props;
+
+public class WordCount extends AbstractHadoopJob {
+
+  private static final Logger logger = Logger.getLogger(WordCount.class);
+
+  private final String inputPath;
+  private final String outputPath;
+  private boolean forceOutputOverrite;
+
+  public WordCount(String name, Props props) {
+    super(name, props);
+    this.inputPath = props.getString("input.path");
+    this.outputPath = props.getString("output.path");
+    this.forceOutputOverrite =
+        props.getBoolean("force.output.overwrite", false);
+  }
+
+  public static class Map extends MapReduceBase implements
+      Mapper<LongWritable, Text, Text, IntWritable> {
+
+    static enum Counters {
+      INPUT_WORDS
+    };
+
+    private final static IntWritable one = new IntWritable(1);
+    private Text word = new Text();
+
+    private long numRecords = 0;
+
+    @Override
+    public void map(LongWritable key, Text value,
+        OutputCollector<Text, IntWritable> output, Reporter reporter)
+        throws IOException {
+      String line = value.toString();
+      StringTokenizer tokenizer = new StringTokenizer(line);
+      while (tokenizer.hasMoreTokens()) {
+        word.set(tokenizer.nextToken());
+        output.collect(word, one);
+        reporter.incrCounter(Counters.INPUT_WORDS, 1);
+      }
+
+      if ((++numRecords % 100) == 0) {
+        reporter.setStatus("Finished processing " + numRecords + " records "
+            + "from the input file");
+      }
+    }
+  }
+
+  public static class Reduce extends MapReduceBase implements
+      Reducer<Text, IntWritable, Text, IntWritable> {
+    @Override
+    public void reduce(Text key, Iterator<IntWritable> values,
+        OutputCollector<Text, IntWritable> output, Reporter reporter)
+        throws IOException {
+      int sum = 0;
+      while (values.hasNext()) {
+        sum += values.next().get();
+      }
+      output.collect(key, new IntWritable(sum));
+    }
+  }
+
+  @Override
+  public void run() throws Exception {
+    logger.info(String.format("Starting %s", getClass().getSimpleName()));
+
+    // hadoop conf should be on the classpath
+    JobConf jobconf = getJobConf();
+    jobconf.setJarByClass(WordCount.class);
+
+    jobconf.setOutputKeyClass(Text.class);
+    jobconf.setOutputValueClass(IntWritable.class);
+
+    jobconf.setMapperClass(Map.class);
+    jobconf.setReducerClass(Reduce.class);
+
+    jobconf.setInputFormat(TextInputFormat.class);
+    jobconf.setOutputFormat(TextOutputFormat.class);
+
+    FileInputFormat.addInputPath(jobconf, new Path(inputPath));
+    FileOutputFormat.setOutputPath(jobconf, new Path(outputPath));
+
+    if (forceOutputOverrite) {
+      FileSystem fs =
+          FileOutputFormat.getOutputPath(jobconf).getFileSystem(jobconf);
+      fs.delete(FileOutputFormat.getOutputPath(jobconf), true);
+    }
+
+    super.run();
+  }
+
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopConfigurationInjector.java 209(+209 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopConfigurationInjector.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopConfigurationInjector.java
new file mode 100644
index 0000000..701e61b
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopConfigurationInjector.java
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2014 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package azkaban.jobtype;
+
+import azkaban.flow.CommonJobProperties;
+import azkaban.utils.Props;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStream;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.log4j.Logger;
+
+
+/**
+ * HadoopConfigurationInjector is responsible for inserting links back to the
+ * Azkaban UI in configurations and for automatically injecting designated job
+ * properties into the Hadoop configuration.
+ * <p>
+ * It is assumed that the necessary links have already been loaded into the
+ * properties. After writing the necessary links as a xml file as required by
+ * Hadoop's configuration, clients may add the links as a default resource
+ * using injectResources() so that they are included in any Configuration
+ * constructed.
+ */
+public class HadoopConfigurationInjector {
+  private static Logger _logger = Logger.getLogger(HadoopConfigurationInjector.class);
+
+  // File to which the Hadoop configuration to inject will be written.
+  private static final String INJECT_FILE = "hadoop-inject.xml";
+
+  // Prefix for properties to be automatically injected into the Hadoop conf.
+  public static final String INJECT_PREFIX = "hadoop-inject.";
+
+  public static final String WORKFLOW_ID_SEPERATOR = "$";
+  private static final String WORKFLOW_ID_CONFIG = "yarn.workflow.id";
+  /*
+   * To be called by the forked process to load the generated links and Hadoop
+   * configuration properties to automatically inject.
+   *
+   * @param props The Azkaban properties
+   */
+  public static void injectResources(Props props) {
+    // Add mapred, yarn and hdfs site configs (in addition to core-site, which
+    // is automatically added) as default resources before we add the injected
+    // configuration. This will cause the injected properties to override the
+    // default site properties (instead of vice-versa). This is safe to do,
+    // even when these site files don't exist for your Hadoop installation.
+    if (props.getBoolean("azkaban.inject.hadoop-site.configs", true)) {
+      Configuration.addDefaultResource("mapred-default.xml");
+      Configuration.addDefaultResource("mapred-site.xml");
+      Configuration.addDefaultResource("yarn-default.xml");
+      Configuration.addDefaultResource("yarn-site.xml");
+      Configuration.addDefaultResource("hdfs-default.xml");
+      Configuration.addDefaultResource("hdfs-site.xml");
+    }
+    Configuration.addDefaultResource(INJECT_FILE);
+  }
+
+  /**
+   * Writes out the XML configuration file that will be injected by the client
+   * as a configuration resource.
+   * <p>
+   * This file will include a series of links injected by Azkaban as well as
+   * any job properties that begin with the designated injection prefix.
+   *
+   * @param props The Azkaban properties
+   * @param workingDir The Azkaban job working directory
+   */
+  public static void prepareResourcesToInject(Props props, String workingDir) {
+    try {
+      Configuration conf = new Configuration(false);
+
+      // First, inject a series of Azkaban links. These are equivalent to
+      // CommonJobProperties.[EXECUTION,WORKFLOW,JOB,JOBEXEC,ATTEMPT]_LINK
+      addHadoopProperties(props);
+
+      // Next, automatically inject any properties that begin with the
+      // designated injection prefix.
+      Map<String, String> confProperties = props.getMapByPrefix(INJECT_PREFIX);
+
+      for (Map.Entry<String, String> entry : confProperties.entrySet()) {
+        String confKey = entry.getKey().replace(INJECT_PREFIX, "");
+        String confVal = entry.getValue();
+        if (confVal != null) {
+          conf.set(confKey, confVal);
+        }
+      }
+
+      // Now write out the configuration file to inject.
+      File file = getConfFile(props, workingDir, INJECT_FILE);
+      OutputStream xmlOut = new FileOutputStream(file);
+      conf.writeXml(xmlOut);
+      xmlOut.close();
+    } catch (Throwable e) {
+      _logger.error("Encountered error while preparing the Hadoop configuration resource file", e);
+    }
+  }
+
+  private static void addHadoopProperty(Props props, String propertyName) {
+      props.put(INJECT_PREFIX + propertyName, props.get(propertyName));
+  }
+
+  private static void addHadoopWorkflowProperty(Props props, String propertyName) {
+    String workflowID = props.get(CommonJobProperties.PROJECT_NAME)
+        + WORKFLOW_ID_SEPERATOR + props.get(CommonJobProperties.FLOW_ID);
+    props.put(INJECT_PREFIX + propertyName, workflowID);
+  }
+
+  private static void addHadoopProperties(Props props) {
+    String[] propsToInject = new String[]{
+        CommonJobProperties.EXEC_ID,
+        CommonJobProperties.FLOW_ID,
+        CommonJobProperties.JOB_ID,
+        CommonJobProperties.PROJECT_NAME,
+        CommonJobProperties.PROJECT_VERSION,
+        CommonJobProperties.EXECUTION_LINK,
+        CommonJobProperties.JOB_LINK,
+        CommonJobProperties.WORKFLOW_LINK,
+        CommonJobProperties.JOBEXEC_LINK,
+        CommonJobProperties.ATTEMPT_LINK,
+        CommonJobProperties.OUT_NODES,
+        CommonJobProperties.IN_NODES,
+        CommonJobProperties.PROJECT_LAST_CHANGED_DATE,
+        CommonJobProperties.PROJECT_LAST_CHANGED_BY,
+        CommonJobProperties.SUBMIT_USER
+    };
+
+    for(String propertyName : propsToInject) {
+      addHadoopProperty(props, propertyName);
+    }
+    addHadoopWorkflowProperty(props, WORKFLOW_ID_CONFIG);
+  }
+
+  /**
+   * Resolve the location of the file containing the configuration file.
+   *
+   * @param props The Azkaban properties
+   * @param workingDir The Azkaban job working directory
+   * @param fileName The desired configuration file name
+   */
+  public static File getConfFile(Props props, String workingDir, String fileName) {
+    File jobDir = new File(workingDir, getDirName(props));
+    if (!jobDir.exists()) {
+      jobDir.mkdir();
+    }
+    return new File(jobDir, fileName);
+  }
+
+  /**
+   * For classpath reasons, we'll put each link file in a separate directory.
+   * This must be called only after the job id has been inserted by the job.
+   *
+   * @param props The Azkaban properties
+   */
+  public static String getDirName(Props props) {
+    String dirSuffix = props.get(CommonJobProperties.NESTED_FLOW_PATH);
+
+    if ((dirSuffix == null) || (dirSuffix.length() == 0)) {
+      dirSuffix = props.get(CommonJobProperties.JOB_ID);
+      if ((dirSuffix == null) || (dirSuffix.length() == 0)) {
+        throw new RuntimeException("azkaban.flow.nested.path and azkaban.job.id were not set");
+      }
+    }
+
+    return "_resources_" + dirSuffix.replace(':', '_');
+  }
+
+  /**
+   * Gets the path to the directory in which the generated links and Hadoop
+   * conf properties files are written.
+   *
+   * @param props The Azkaban properties
+   * @param workingDir The Azkaban job working directory
+   */
+  public static String getPath(Props props, String workingDir) {
+    return new File(workingDir, getDirName(props)).toString();
+  }
+
+  /**
+   * Loads an Azkaban property into the Hadoop configuration.
+   *
+   * @param props The Azkaban properties
+   * @param conf The Hadoop configuration
+   * @param name The property name to load from the Azkaban properties into the Hadoop configuration
+   */
+  public static void loadProp(Props props, Configuration conf, String name) {
+    String prop = props.get(name);
+    if (prop != null) {
+      conf.set(name, prop);
+    }
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopHiveJob.java 297(+297 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopHiveJob.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopHiveJob.java
new file mode 100644
index 0000000..7df4569
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopHiveJob.java
@@ -0,0 +1,297 @@
+/*
+ * Copyright 2014 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.log4j.Logger;
+
+import azkaban.flow.CommonJobProperties;
+import azkaban.jobExecutor.JavaProcessJob;
+import azkaban.security.commons.HadoopSecurityManager;
+import azkaban.security.commons.HadoopSecurityManagerException;
+import azkaban.utils.Props;
+import azkaban.utils.StringUtils;
+
+public class HadoopHiveJob extends JavaProcessJob {
+
+  public static final String HIVE_SCRIPT = "hive.script";
+  private static final String HIVECONF_PARAM_PREFIX = "hiveconf.";
+  private static final String HIVEVAR_PARAM_PREFIX = "hivevar.";
+  public static final String HADOOP_SECURE_HIVE_WRAPPER =
+      "azkaban.jobtype.HadoopSecureHiveWrapper";
+
+  private String userToProxy = null;
+  private boolean shouldProxy = false;
+  private boolean obtainTokens = false;
+  private File tokenFile = null;
+
+  private HadoopSecurityManager hadoopSecurityManager;
+
+  private boolean debug = false;
+
+  public HadoopHiveJob(String jobid, Props sysProps, Props jobProps, Logger log)
+      throws IOException {
+    super(jobid, sysProps, jobProps, log);
+
+    getJobProps().put(CommonJobProperties.JOB_ID, jobid);
+
+    shouldProxy = getSysProps().getBoolean(HadoopSecurityManager.ENABLE_PROXYING, false);
+    getJobProps().put(HadoopSecurityManager.ENABLE_PROXYING, Boolean.toString(shouldProxy));
+    obtainTokens = getSysProps().getBoolean(HadoopSecurityManager.OBTAIN_BINARY_TOKEN, false);
+
+    debug = getJobProps().getBoolean("debug", false);
+
+    if (shouldProxy) {
+      getLog().info("Initiating hadoop security manager.");
+      try {
+        hadoopSecurityManager = HadoopJobUtils.loadHadoopSecurityManager(getSysProps(), log);
+      } catch (RuntimeException e) {
+        throw new RuntimeException("Failed to get hadoop security manager!" + e);
+      }
+    }
+  }
+
+  @Override
+  public void run() throws Exception {
+    HadoopConfigurationInjector.prepareResourcesToInject(getJobProps(),
+        getWorkingDirectory());
+
+    if (shouldProxy && obtainTokens) {
+      userToProxy = getJobProps().getString("user.to.proxy");
+      getLog().info("Need to proxy. Getting tokens.");
+      // get tokens in to a file, and put the location in props
+      Props props = new Props();
+      props.putAll(getJobProps());
+      props.putAll(getSysProps());
+      HadoopJobUtils.addAdditionalNamenodesToPropsFromMRJob(props, getLog());
+      tokenFile = HadoopJobUtils.getHadoopTokens(hadoopSecurityManager, props, getLog());
+      getJobProps().put("env." + HADOOP_TOKEN_FILE_LOCATION,
+          tokenFile.getAbsolutePath());
+    }
+
+    try {
+      super.run();
+    } catch (Throwable t) {
+      t.printStackTrace();
+      getLog().error("caught error running the job");
+      throw new Exception(t);
+    } finally {
+      if (tokenFile != null) {
+        HadoopJobUtils.cancelHadoopTokens(hadoopSecurityManager, userToProxy, tokenFile, getLog());
+        if (tokenFile.exists()) {
+          tokenFile.delete();
+        }
+      }
+    }
+  }
+
+  @Override
+  protected String getJavaClass() {
+    return HADOOP_SECURE_HIVE_WRAPPER;
+  }
+
+  @Override
+  protected String getJVMArguments() {
+    String args = super.getJVMArguments();
+
+    String typeUserGlobalJVMArgs =
+        getJobProps().getString("jobtype.global.jvm.args", null);
+    if (typeUserGlobalJVMArgs != null) {
+      args += " " + typeUserGlobalJVMArgs;
+    }
+    String typeSysGlobalJVMArgs =
+        getSysProps().getString("jobtype.global.jvm.args", null);
+    if (typeSysGlobalJVMArgs != null) {
+      args += " " + typeSysGlobalJVMArgs;
+    }
+    String typeUserJVMArgs = getJobProps().getString("jobtype.jvm.args", null);
+    if (typeUserJVMArgs != null) {
+      args += " " + typeUserJVMArgs;
+    }
+    String typeSysJVMArgs = getSysProps().getString("jobtype.jvm.args", null);
+    if (typeSysJVMArgs != null) {
+      args += " " + typeSysJVMArgs;
+    }
+
+    if (shouldProxy) {
+      info("Setting up secure proxy info for child process");
+      String secure;
+      secure =
+          " -D" + HadoopSecurityManager.USER_TO_PROXY + "="
+              + getJobProps().getString(HadoopSecurityManager.USER_TO_PROXY);
+      String extraToken =
+          getSysProps().getString(HadoopSecurityManager.OBTAIN_BINARY_TOKEN,
+              "false");
+      if (extraToken != null) {
+        secure +=
+            " -D" + HadoopSecurityManager.OBTAIN_BINARY_TOKEN + "="
+                + extraToken;
+      }
+      info("Secure settings = " + secure);
+      args += secure;
+    } else {
+      info("Not setting up secure proxy info for child process");
+    }
+
+    return args;
+  }
+
+  @Override
+  protected String getMainArguments() {
+    ArrayList<String> list = new ArrayList<String>();
+
+    // for hiveconf
+    Map<String, String> map = getHiveConf();
+    if (map != null) {
+      for (Map.Entry<String, String> entry : map.entrySet()) {
+        list.add("-hiveconf");
+        list.add(StringUtils.shellQuote(
+            entry.getKey() + "=" + entry.getValue(), StringUtils.SINGLE_QUOTE));
+      }
+    }
+
+    if (debug) {
+      list.add("-hiveconf");
+      list.add("hive.root.logger=INFO,console");
+    }
+
+    // for hivevar
+    Map<String, String> hiveVarMap = getHiveVar();
+    if (hiveVarMap != null) {
+      for (Map.Entry<String, String> entry : hiveVarMap.entrySet()) {
+        list.add("-hivevar");
+        list.add(StringUtils.shellQuote(
+            entry.getKey() + "=" + entry.getValue(), StringUtils.SINGLE_QUOTE));
+      }
+    }
+
+    list.add("-f");
+    list.add(getScript());
+
+    return StringUtils.join((Collection<String>) list, " ");
+  }
+
+  @Override
+  protected List<String> getClassPaths() {
+
+    List<String> classPath = super.getClassPaths();
+
+    // To add az-core jar classpath
+    classPath.add(getSourcePathFromClass(Props.class));
+
+    // To add az-common jar classpath
+    classPath.add(getSourcePathFromClass(JavaProcessJob.class));
+    classPath.add(getSourcePathFromClass(HadoopSecureHiveWrapper.class));
+    classPath.add(getSourcePathFromClass(HadoopSecurityManager.class));
+
+    classPath.add(HadoopConfigurationInjector.getPath(getJobProps(),
+        getWorkingDirectory()));
+    List<String> typeClassPath =
+        getSysProps().getStringList("jobtype.classpath", null, ",");
+    if (typeClassPath != null) {
+      // fill in this when load this jobtype
+      String pluginDir = getSysProps().get("plugin.dir");
+      for (String jar : typeClassPath) {
+        File jarFile = new File(jar);
+        if (!jarFile.isAbsolute()) {
+          jarFile = new File(pluginDir + File.separatorChar + jar);
+        }
+
+        if (!classPath.contains(jarFile.getAbsolutePath())) {
+          classPath.add(jarFile.getAbsolutePath());
+        }
+      }
+    }
+
+    List<String> typeGlobalClassPath =
+        getSysProps().getStringList("jobtype.global.classpath", null, ",");
+    if (typeGlobalClassPath != null) {
+      for (String jar : typeGlobalClassPath) {
+        if (!classPath.contains(jar)) {
+          classPath.add(jar);
+        }
+      }
+    }
+
+    return classPath;
+  }
+
+  protected String getScript() {
+    return getJobProps().getString(HIVE_SCRIPT);
+  }
+
+  protected Map<String, String> getHiveConf() {
+    return getJobProps().getMapByPrefix(HIVECONF_PARAM_PREFIX);
+  }
+
+  protected Map<String, String> getHiveVar() {
+    return getJobProps().getMapByPrefix(HIVEVAR_PARAM_PREFIX);
+  }
+
+  private static String getSourcePathFromClass(Class<?> containedClass) {
+    File file =
+        new File(containedClass.getProtectionDomain().getCodeSource()
+            .getLocation().getPath());
+
+    if (!file.isDirectory() && file.getName().endsWith(".class")) {
+      String name = containedClass.getName();
+      StringTokenizer tokenizer = new StringTokenizer(name, ".");
+      while (tokenizer.hasMoreTokens()) {
+        tokenizer.nextElement();
+        file = file.getParentFile();
+      }
+
+      return file.getPath();
+    } else {
+      return containedClass.getProtectionDomain().getCodeSource().getLocation()
+          .getPath();
+    }
+  }
+
+  /**
+   * This cancel method, in addition to the default canceling behavior, also kills the MR jobs launched by Hive
+   * on Hadoop
+   */
+  @Override
+  public void cancel() throws InterruptedException {
+    super.cancel();
+
+    info("Cancel called.  Killing the Hive launched MR jobs on the cluster");
+
+    String azExecId = jobProps.getString(CommonJobProperties.EXEC_ID);
+    final String logFilePath =
+        String.format("%s/_job.%s.%s.log", getWorkingDirectory(), azExecId,
+            getId());
+    info("log file path is: " + logFilePath);
+
+    HadoopJobUtils.proxyUserKillAllSpawnedHadoopJobs(logFilePath, jobProps, tokenFile, getLog());
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJavaJob.java 271(+271 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJavaJob.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJavaJob.java
new file mode 100644
index 0000000..4d103ad
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJavaJob.java
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+import java.io.File;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.log4j.Logger;
+
+import azkaban.flow.CommonJobProperties;
+import azkaban.jobExecutor.JavaProcessJob;
+import azkaban.security.commons.HadoopSecurityManager;
+import azkaban.utils.Props;
+
+public class HadoopJavaJob extends JavaProcessJob {
+
+  public static final String RUN_METHOD_PARAM = "method.run";
+  public static final String CANCEL_METHOD_PARAM = "method.cancel";
+  public static final String PROGRESS_METHOD_PARAM = "method.progress";
+
+  public static final String JOB_CLASS = "job.class";
+  public static final String DEFAULT_CANCEL_METHOD = "cancel";
+  public static final String DEFAULT_RUN_METHOD = "run";
+  public static final String DEFAULT_PROGRESS_METHOD = "getProgress";
+
+  private String _runMethod;
+  private String _cancelMethod;
+  private String _progressMethod;
+
+  private Object _javaObject = null;
+
+  private String userToProxy = null;
+  private boolean shouldProxy = false;
+  private boolean obtainTokens = false;
+  private boolean noUserClasspath = false;
+  private File tokenFile = null;
+
+  private HadoopSecurityManager hadoopSecurityManager;
+
+  public HadoopJavaJob(String jobid, Props sysProps, Props jobProps, Logger log)
+      throws RuntimeException {
+    super(jobid, sysProps, jobProps, log);
+
+    getJobProps().put(CommonJobProperties.JOB_ID, jobid);
+    shouldProxy =
+        getSysProps().getBoolean(HadoopSecurityManager.ENABLE_PROXYING, false);
+    getJobProps().put(HadoopSecurityManager.ENABLE_PROXYING,
+        Boolean.toString(shouldProxy));
+    obtainTokens =
+        getSysProps().getBoolean(HadoopSecurityManager.OBTAIN_BINARY_TOKEN,
+            false);
+    noUserClasspath =
+        getSysProps().getBoolean("azkaban.no.user.classpath", false);
+
+    if (shouldProxy) {
+      getLog().info("Initiating hadoop security manager.");
+      try {
+        hadoopSecurityManager =
+            HadoopJobUtils.loadHadoopSecurityManager(getSysProps(), log);
+      } catch (RuntimeException e) {
+        e.printStackTrace();
+        throw new RuntimeException("Failed to get hadoop security manager!"
+            + e.getCause());
+      }
+    }
+  }
+
+  @Override
+  protected String getJVMArguments() {
+    String args = super.getJVMArguments();
+
+    String typeUserGlobalJVMArgs =
+        getJobProps().getString("jobtype.global.jvm.args", null);
+    if (typeUserGlobalJVMArgs != null) {
+      args += " " + typeUserGlobalJVMArgs;
+    }
+    String typeSysGlobalJVMArgs =
+        getSysProps().getString("jobtype.global.jvm.args", null);
+    if (typeSysGlobalJVMArgs != null) {
+      args += " " + typeSysGlobalJVMArgs;
+    }
+    String typeUserJVMArgs = getJobProps().getString("jobtype.jvm.args", null);
+    if (typeUserJVMArgs != null) {
+      args += " " + typeUserJVMArgs;
+    }
+    String typeSysJVMArgs = getSysProps().getString("jobtype.jvm.args", null);
+    if (typeSysJVMArgs != null) {
+      args += " " + typeSysJVMArgs;
+    }
+    return args;
+  }
+
+  @Override
+  protected List<String> getClassPaths() {
+    List<String> classPath;
+    if (!noUserClasspath) {
+      classPath = super.getClassPaths();
+    } else {
+      getLog().info("Supressing user supplied classpath settings.");
+      classPath = new ArrayList<String>();
+    }
+
+    classPath.add(getSourcePathFromClass(HadoopJavaJobRunnerMain.class));
+
+    /**
+     * Todo kunkun-tang: The legacy code uses a quite outdated method to resolve
+     * Azkaban dependencies, and should be replaced later.
+     */
+
+    // To add az-core jar classpath
+    classPath.add(getSourcePathFromClass(Props.class));
+
+    // To add az-common jar classpath
+    classPath.add(getSourcePathFromClass(JavaProcessJob.class));
+    classPath.add(getSourcePathFromClass(HadoopSecurityManager.class));
+
+    classPath.add(HadoopConfigurationInjector.getPath(getJobProps(),
+        getWorkingDirectory()));
+
+    // merging classpaths from plugin.properties
+    mergeClassPaths(classPath,
+        getJobProps().getStringList("jobtype.classpath", null, ","));
+    // merging classpaths from private.properties
+    mergeClassPaths(classPath,
+        getSysProps().getStringList("jobtype.classpath", null, ","));
+
+    List<String> typeGlobalClassPath =
+        getSysProps().getStringList("jobtype.global.classpath", null, ",");
+    if (typeGlobalClassPath != null) {
+      for (String jar : typeGlobalClassPath) {
+        if (!classPath.contains(jar)) {
+          classPath.add(jar);
+        }
+      }
+    }
+
+    return classPath;
+  }
+
+  private void mergeClassPaths(List<String> classPath,
+      List<String> typeClassPath) {
+    if (typeClassPath != null) {
+      // fill in this when load this jobtype
+      String pluginDir = getSysProps().get("plugin.dir");
+      for (String jar : typeClassPath) {
+        File jarFile = new File(jar);
+        if (!jarFile.isAbsolute()) {
+          jarFile = new File(pluginDir + File.separatorChar + jar);
+        }
+
+        if (!classPath.contains(jarFile.getAbsolutePath())) {
+          classPath.add(jarFile.getAbsolutePath());
+        }
+      }
+    }
+  }
+
+  @Override
+  public void run() throws Exception {
+    HadoopConfigurationInjector.prepareResourcesToInject(getJobProps(),
+        getWorkingDirectory());
+
+    if (shouldProxy && obtainTokens) {
+      userToProxy = getJobProps().getString("user.to.proxy");
+      getLog().info("Need to proxy. Getting tokens.");
+      Props props = new Props();
+      props.putAll(getJobProps());
+      props.putAll(getSysProps());
+
+      HadoopJobUtils.addAdditionalNamenodesToPropsFromMRJob(props, getLog());
+      tokenFile =
+          HadoopJobUtils
+              .getHadoopTokens(hadoopSecurityManager, props, getLog());
+      getJobProps().put("env." + HADOOP_TOKEN_FILE_LOCATION,
+          tokenFile.getAbsolutePath());
+    }
+    try {
+      super.run();
+    } catch (Exception e) {
+      e.printStackTrace();
+      throw new Exception(e);
+    } finally {
+      if (tokenFile != null) {
+        try {
+          HadoopJobUtils.cancelHadoopTokens(hadoopSecurityManager, userToProxy,
+              tokenFile, getLog());
+        } catch (Throwable t) {
+          t.printStackTrace();
+          getLog().error("Failed to cancel tokens.");
+        }
+        if (tokenFile.exists()) {
+          tokenFile.delete();
+        }
+      }
+    }
+  }
+
+  private static String getSourcePathFromClass(Class<?> containedClass) {
+    File file =
+        new File(containedClass.getProtectionDomain().getCodeSource()
+            .getLocation().getPath());
+
+    if (!file.isDirectory() && file.getName().endsWith(".class")) {
+      String name = containedClass.getName();
+      StringTokenizer tokenizer = new StringTokenizer(name, ".");
+      while (tokenizer.hasMoreTokens()) {
+        tokenizer.nextElement();
+
+        file = file.getParentFile();
+      }
+      return file.getPath();
+    } else {
+      return containedClass.getProtectionDomain().getCodeSource().getLocation()
+          .getPath();
+    }
+  }
+
+  @Override
+  protected String getJavaClass() {
+    return HadoopJavaJobRunnerMain.class.getName();
+  }
+
+  @Override
+  public String toString() {
+    return "JavaJob{" + "_runMethod='" + _runMethod + '\''
+        + ", _cancelMethod='" + _cancelMethod + '\'' + ", _progressMethod='"
+        + _progressMethod + '\'' + ", _javaObject=" + _javaObject + ", props="
+        + getJobProps() + '}';
+  }
+
+  /**
+   * This cancel method, in addition to the default canceling behavior, also
+   * kills the MR jobs launched by this job on Hadoop
+   */
+  @Override
+  public void cancel() throws InterruptedException {
+    super.cancel();
+
+    info("Cancel called.  Killing the launched MR jobs on the cluster");
+
+    String azExecId = jobProps.getString(CommonJobProperties.EXEC_ID);
+    final String logFilePath =
+        String.format("%s/_job.%s.%s.log", getWorkingDirectory(), azExecId,
+            getId());
+    info("log file path is: " + logFilePath);
+
+    HadoopJobUtils.proxyUserKillAllSpawnedHadoopJobs(logFilePath, jobProps,
+        tokenFile, getLog());
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJavaJobRunnerMain.java 427(+427 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJavaJobRunnerMain.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJavaJobRunnerMain.java
new file mode 100644
index 0000000..ecab9ac
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJavaJobRunnerMain.java
@@ -0,0 +1,427 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import static azkaban.security.commons.SecurityUtils.MAPREDUCE_JOB_CREDENTIALS_BINARY;
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileInputStream;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Writer;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.security.PrivilegedExceptionAction;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.apache.log4j.ConsoleAppender;
+import org.apache.log4j.Layout;
+import org.apache.log4j.Level;
+import org.apache.log4j.Logger;
+import org.apache.log4j.PatternLayout;
+
+import azkaban.jobExecutor.ProcessJob;
+import azkaban.security.commons.HadoopSecurityManager;
+import azkaban.utils.JSONUtils;
+import azkaban.utils.Props;
+
+public class HadoopJavaJobRunnerMain {
+
+  public static final String JOB_CLASS = "job.class";
+  public static final String DEFAULT_RUN_METHOD = "run";
+  public static final String DEFAULT_CANCEL_METHOD = "cancel";
+
+  // This is the Job interface method to get the properties generated by the
+  // job.
+  public static final String GET_GENERATED_PROPERTIES_METHOD =
+      "getJobGeneratedProperties";
+
+  public static final String CANCEL_METHOD_PARAM = "method.cancel";
+  public static final String RUN_METHOD_PARAM = "method.run";
+  public static final String[] PROPS_CLASSES = new String[] {
+      "azkaban.utils.Props", "azkaban.common.utils.Props" };
+
+  private static final Layout DEFAULT_LAYOUT = new PatternLayout("%p %m\n");
+
+  public final Logger _logger;
+
+  public String _cancelMethod;
+  public String _jobName;
+  public Object _javaObject;
+  private boolean _isFinished = false;
+
+  private static boolean securityEnabled;
+
+  public static void main(String[] args) throws Exception {
+    @SuppressWarnings("unused")
+    HadoopJavaJobRunnerMain wrapper = new HadoopJavaJobRunnerMain();
+  }
+
+  public HadoopJavaJobRunnerMain() throws Exception {
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      @Override
+      public void run() {
+        cancelJob();
+      }
+    });
+
+    try {
+      _jobName = System.getenv(ProcessJob.JOB_NAME_ENV);
+      String propsFile = System.getenv(ProcessJob.JOB_PROP_ENV);
+
+      _logger = Logger.getRootLogger();
+      _logger.removeAllAppenders();
+      ConsoleAppender appender = new ConsoleAppender(DEFAULT_LAYOUT);
+      appender.activateOptions();
+      _logger.addAppender(appender);
+      _logger.setLevel(Level.INFO); //Explicitly setting level to INFO
+
+      Properties props = new Properties();
+//      props.load(new BufferedReader(new FileReader(propsFile)));
+
+      BufferedReader br = new BufferedReader(new InputStreamReader(
+          new FileInputStream(propsFile), StandardCharsets.UTF_8));
+      props.load(br);
+
+      HadoopConfigurationInjector.injectResources(new Props(null, props));
+
+      final Configuration conf = new Configuration();
+
+      UserGroupInformation.setConfiguration(conf);
+      securityEnabled = UserGroupInformation.isSecurityEnabled();
+
+      _logger.info("Running job " + _jobName);
+      String className = props.getProperty(JOB_CLASS);
+      if (className == null) {
+        throw new Exception("Class name is not set.");
+      }
+      _logger.info("Class name " + className);
+
+      UserGroupInformation loginUser = null;
+      UserGroupInformation proxyUser = null;
+
+      if (shouldProxy(props)) {
+        String userToProxy = props.getProperty("user.to.proxy");
+        if (securityEnabled) {
+          String filelocation = System.getenv(HADOOP_TOKEN_FILE_LOCATION);
+          _logger.info("Found token file " + filelocation);
+          _logger.info("Security enabled is "
+              + UserGroupInformation.isSecurityEnabled());
+
+          _logger.info("Setting mapreduce.job.credentials.binary to "
+              + filelocation);
+          System.setProperty("mapreduce.job.credentials.binary", filelocation);
+
+          _logger.info("Proxying enabled.");
+
+          loginUser = UserGroupInformation.getLoginUser();
+
+          _logger.info("Current logged in user is " + loginUser.getUserName());
+
+          proxyUser =
+              UserGroupInformation.createProxyUser(userToProxy, loginUser);
+          for (Token<?> token : loginUser.getTokens()) {
+            proxyUser.addToken(token);
+          }
+        } else {
+          proxyUser = UserGroupInformation.createRemoteUser(userToProxy);
+        }
+        _logger.info("Proxied as user " + userToProxy);
+      }
+
+      // Create the object using proxy
+      if (shouldProxy(props)) {
+        _javaObject =
+            getObjectAsProxyUser(props, _logger, _jobName, className, proxyUser);
+      } else {
+        _javaObject = getObject(_jobName, className, props, _logger);
+      }
+
+      if (_javaObject == null) {
+        _logger.info("Could not create java object to run job: " + className);
+        throw new Exception("Could not create running object");
+      }
+      _logger.info("Got object " + _javaObject.toString());
+
+      _cancelMethod =
+          props.getProperty(CANCEL_METHOD_PARAM, DEFAULT_CANCEL_METHOD);
+
+      final String runMethod =
+          props.getProperty(RUN_METHOD_PARAM, DEFAULT_RUN_METHOD);
+      _logger.info("Invoking method " + runMethod);
+
+      if (shouldProxy(props)) {
+        _logger.info("Proxying enabled.");
+        runMethodAsUser(props, _javaObject, runMethod, proxyUser);
+      } else {
+        _logger.info("Proxy check failed, not proxying run.");
+        runMethod(_javaObject, runMethod);
+      }
+
+      _isFinished = true;
+
+      // Get the generated properties and store them to disk, to be read
+      // by ProcessJob.
+      try {
+        final Method generatedPropertiesMethod =
+            _javaObject.getClass().getMethod(GET_GENERATED_PROPERTIES_METHOD,
+                new Class<?>[] {});
+        Object outputGendProps =
+            generatedPropertiesMethod.invoke(_javaObject, new Object[] {});
+
+        if (outputGendProps != null) {
+          final Method toPropertiesMethod =
+              outputGendProps.getClass().getMethod("toProperties",
+                  new Class<?>[] {});
+          Properties properties =
+              (Properties) toPropertiesMethod.invoke(outputGendProps,
+                  new Object[] {});
+
+          Props outputProps = new Props(null, properties);
+          outputGeneratedProperties(outputProps);
+        } else {
+          _logger.info(GET_GENERATED_PROPERTIES_METHOD
+              + " method returned null.  No properties to pass along");
+        }
+      } catch (NoSuchMethodException e) {
+        _logger.info(String.format(
+            "Apparently there isn't a method[%s] on object[%s], using "
+                + "empty Props object instead.",
+            GET_GENERATED_PROPERTIES_METHOD, _javaObject));
+        outputGeneratedProperties(new Props());
+      }
+    } catch (Exception e) {
+      _isFinished = true;
+      throw e;
+    }
+  }
+
+  private void runMethodAsUser(Properties props, final Object obj,
+      final String runMethod, final UserGroupInformation ugi)
+      throws IOException, InterruptedException {
+    ugi.doAs(new PrivilegedExceptionAction<Void>() {
+      @Override
+      public Void run() throws Exception {
+
+        Configuration conf = new Configuration();
+        if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
+          conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY,
+              System.getenv(HADOOP_TOKEN_FILE_LOCATION));
+        }
+
+        runMethod(obj, runMethod);
+        return null;
+      }
+    });
+  }
+
+  private void runMethod(Object obj, String runMethod)
+      throws IllegalAccessException, InvocationTargetException,
+      NoSuchMethodException {
+    obj.getClass().getMethod(runMethod, new Class<?>[] {}).invoke(obj);
+  }
+
+  private void outputGeneratedProperties(Props outputProperties) {
+    _logger.info("Outputting generated properties to "
+        + ProcessJob.JOB_OUTPUT_PROP_FILE);
+
+    if (outputProperties == null) {
+      _logger.info("  no gend props");
+      return;
+    }
+    for (String key : outputProperties.getKeySet()) {
+      _logger
+          .info("  gend prop " + key + " value:" + outputProperties.get(key));
+    }
+
+    String outputFileStr = System.getenv(ProcessJob.JOB_OUTPUT_PROP_FILE);
+    if (outputFileStr == null) {
+      return;
+    }
+
+    Map<String, String> properties = new LinkedHashMap<String, String>();
+    for (String key : outputProperties.getKeySet()) {
+      properties.put(key, outputProperties.get(key));
+    }
+
+    Writer writer = null;
+    try {
+//      writer = new BufferedWriter(new FileWriter(outputFileStr));
+      writer = Files.newBufferedWriter(Paths.get(outputFileStr), Charset.defaultCharset());
+
+      JSONUtils.writePropsNoJarDependency(properties, writer);
+    } catch (Exception e) {
+
+    } finally {
+      if (writer != null) {
+        try {
+          writer.close();
+        } catch (IOException e) {
+        }
+      }
+    }
+  }
+
+  public void cancelJob() {
+    if (_isFinished) {
+      return;
+    }
+    _logger.info("Attempting to call cancel on this job");
+    if (_javaObject != null) {
+      Method method = null;
+
+      try {
+        method = _javaObject.getClass().getMethod(_cancelMethod);
+      } catch (SecurityException e) {
+      } catch (NoSuchMethodException e) {
+      }
+
+      if (method != null)
+        try {
+          method.invoke(_javaObject);
+        } catch (Exception e) {
+          if (_logger != null) {
+            _logger.error("Cancel method failed! ", e);
+          }
+        }
+      else {
+        throw new RuntimeException("Job " + _jobName
+            + " does not have cancel method " + _cancelMethod);
+      }
+    }
+  }
+
+  private static Object getObjectAsProxyUser(final Properties prop,
+      final Logger logger, final String jobName, final String className,
+      final UserGroupInformation ugi) throws Exception {
+
+    Object obj = ugi.doAs(new PrivilegedExceptionAction<Object>() {
+      @Override
+      public Object run() throws Exception {
+        return getObject(jobName, className, prop, logger);
+      }
+    });
+
+    return obj;
+  }
+
+  private static Object getObject(String jobName, String className,
+      Properties properties, Logger logger) throws Exception {
+
+    Class<?> runningClass =
+        HadoopJavaJobRunnerMain.class.getClassLoader().loadClass(className);
+
+    if (runningClass == null) {
+      throw new Exception("Class " + className
+          + " was not found. Cannot run job.");
+    }
+
+    Class<?> propsClass = null;
+    for (String propClassName : PROPS_CLASSES) {
+      try {
+        propsClass =
+            HadoopJavaJobRunnerMain.class.getClassLoader().loadClass(
+                propClassName);
+      } catch (ClassNotFoundException e) {
+      }
+
+      if (propsClass != null
+          && getConstructor(runningClass, String.class, propsClass) != null) {
+        // is this the props class
+        break;
+      }
+      propsClass = null;
+    }
+
+    Object obj = null;
+    if (propsClass != null
+        && getConstructor(runningClass, String.class, propsClass) != null) {
+      // Create props class
+      Constructor<?> propsCon =
+          getConstructor(propsClass, propsClass, Properties[].class);
+      Object props =
+          propsCon.newInstance(null, new Properties[] { properties });
+
+      Constructor<?> con =
+          getConstructor(runningClass, String.class, propsClass);
+      logger.info("Constructor found " + con.toGenericString());
+      obj = con.newInstance(jobName, props);
+    } else if (getConstructor(runningClass, String.class, Properties.class) != null) {
+
+      Constructor<?> con =
+          getConstructor(runningClass, String.class, Properties.class);
+      logger.info("Constructor found " + con.toGenericString());
+      obj = con.newInstance(jobName, properties);
+    } else if (getConstructor(runningClass, String.class, Map.class) != null) {
+      Constructor<?> con =
+          getConstructor(runningClass, String.class, Map.class);
+      logger.info("Constructor found " + con.toGenericString());
+
+      HashMap<Object, Object> map = new HashMap<Object, Object>();
+      for (Map.Entry<Object, Object> entry : properties.entrySet()) {
+        map.put(entry.getKey(), entry.getValue());
+      }
+      obj = con.newInstance(jobName, map);
+    } else if (getConstructor(runningClass, String.class) != null) {
+      Constructor<?> con = getConstructor(runningClass, String.class);
+      logger.info("Constructor found " + con.toGenericString());
+      obj = con.newInstance(jobName);
+    } else if (getConstructor(runningClass) != null) {
+      Constructor<?> con = getConstructor(runningClass);
+      logger.info("Constructor found " + con.toGenericString());
+      obj = con.newInstance();
+    } else {
+      logger.error("Constructor not found. Listing available Constructors.");
+      for (Constructor<?> c : runningClass.getConstructors()) {
+        logger.info(c.toGenericString());
+      }
+    }
+    return obj;
+  }
+
+  private static Constructor<?> getConstructor(Class<?> c, Class<?>... args) {
+    try {
+      Constructor<?> cons = c.getConstructor(args);
+      return cons;
+    } catch (NoSuchMethodException e) {
+      return null;
+    }
+  }
+
+  public boolean shouldProxy(Properties props) {
+    String shouldProxy =
+        props.getProperty(HadoopSecurityManager.ENABLE_PROXYING);
+
+    return shouldProxy != null && shouldProxy.equals("true");
+  }
+
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJobUtils.java 596(+596 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJobUtils.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJobUtils.java
new file mode 100644
index 0000000..d1fbfe5
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopJobUtils.java
@@ -0,0 +1,596 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.nio.charset.StandardCharsets;
+import java.security.PrivilegedExceptionAction;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.api.records.ApplicationId;
+import org.apache.hadoop.yarn.client.api.YarnClient;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.hadoop.yarn.exceptions.YarnException;
+import org.apache.log4j.Logger;
+
+import azkaban.security.commons.HadoopSecurityManager;
+import azkaban.security.commons.HadoopSecurityManagerException;
+import azkaban.utils.Props;
+
+/**
+ * <pre>
+ * There are many common methods that's required by the Hadoop*Job.java's. They are all consolidated
+ * here.
+ *
+ * Methods here include getting/setting hadoop tokens,
+ * methods for manipulating lib folder paths and jar paths passed in from Azkaban prop file,
+ * and finally methods for helping to parse logs for application ids,
+ * and kill the applications via Yarn (very helpful during the cancel method)
+ *
+ * </pre>
+ *
+ *
+ * @see azkaban.jobtype.HadoopSparkJob
+ * @see HadoopHiveJob
+ * @see HadoopPigJob
+ * @see HadoopJavaJob
+ */
+
+public class HadoopJobUtils {
+  public static String MATCH_ALL_REGEX = ".*";
+
+  public static String MATCH_NONE_REGEX = ".^";
+
+  public static final String HADOOP_SECURITY_MANAGER_CLASS_PARAM = "hadoop.security.manager.class";
+
+  // the regex to look for while looking for application id's in the hadoop log
+  public static final Pattern APPLICATION_ID_PATTERN = Pattern
+          .compile("^(application_\\d+_\\d+).*");
+
+  // Azkaban built in property name
+  public static final String JOBTYPE_GLOBAL_JVM_ARGS = "jobtype.global.jvm.args";
+
+  // Azkaban built in property name
+  public static final String JOBTYPE_JVM_ARGS = "jobtype.jvm.args";
+
+  // Azkaban built in property name
+  public static final String JVM_ARGS = "jvm.args";
+
+  // MapReduce config for specifying additional namenodes for delegation tokens
+  public static final String MAPREDUCE_JOB_OTHER_NAMENODES = "mapreduce.job.hdfs-servers";
+
+  // Azkaban property for listing additional namenodes for delegation tokens
+  private static final String OTHER_NAMENODES_PROPERTY = "other_namenodes";
+
+  /**
+   * Invalidates a Hadoop authentication token file
+   *
+   * @param hadoopSecurityManager
+   * @param userToProxy
+   * @param tokenFile
+   * @param log
+   */
+  public static void cancelHadoopTokens(HadoopSecurityManager hadoopSecurityManager,
+          String userToProxy, File tokenFile, Logger log) {
+    try {
+      hadoopSecurityManager.cancelTokens(tokenFile, userToProxy, log);
+    } catch (HadoopSecurityManagerException e) {
+      log.error(e.getCause() + e.getMessage());
+    } catch (Exception e) {
+      log.error(e.getCause() + e.getMessage());
+    }
+  }
+
+  /**
+   * Based on the HADOOP_SECURITY_MANAGER_CLASS_PARAM setting in the incoming props, finds the
+   * correct HadoopSecurityManager Java class
+   *
+   * @param props
+   * @param log
+   * @return a HadoopSecurityManager object. Will throw exception if any errors occur (including not
+   *         finding a class)
+   * @throws RuntimeException
+   *           : If any errors happen along the way.
+   */
+  public static HadoopSecurityManager loadHadoopSecurityManager(Props props, Logger log)
+          throws RuntimeException {
+
+    Class<?> hadoopSecurityManagerClass = props.getClass(HADOOP_SECURITY_MANAGER_CLASS_PARAM, true,
+            HadoopJobUtils.class.getClassLoader());
+    log.info("Loading hadoop security manager " + hadoopSecurityManagerClass.getName());
+    HadoopSecurityManager hadoopSecurityManager = null;
+
+    try {
+      Method getInstanceMethod = hadoopSecurityManagerClass.getMethod("getInstance", Props.class);
+      hadoopSecurityManager = (HadoopSecurityManager) getInstanceMethod.invoke(
+              hadoopSecurityManagerClass, props);
+    } catch (InvocationTargetException e) {
+      String errMsg = "Could not instantiate Hadoop Security Manager "
+              + hadoopSecurityManagerClass.getName() + e.getCause();
+      log.error(errMsg);
+      throw new RuntimeException(errMsg, e);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+
+    return hadoopSecurityManager;
+
+  }
+
+  /**
+   * The same as {@link #addAdditionalNamenodesToProps}, but assumes that the
+   * calling job is MapReduce-based and so uses the
+   * {@link #MAPREDUCE_JOB_OTHER_NAMENODES} from a {@link Configuration} object
+   * to get the list of additional namenodes.
+   * @param props Props to add the new Namenode URIs to.
+   * @see #addAdditionalNamenodesToProps(Props, String)
+   */
+  public static void addAdditionalNamenodesToPropsFromMRJob(Props props, Logger log) {
+    String additionalNamenodes =
+        (new Configuration()).get(MAPREDUCE_JOB_OTHER_NAMENODES);
+    if (additionalNamenodes != null && additionalNamenodes.length() > 0) {
+      log.info("Found property " + MAPREDUCE_JOB_OTHER_NAMENODES +
+          " = " + additionalNamenodes + "; setting additional namenodes");
+      HadoopJobUtils.addAdditionalNamenodesToProps(props, additionalNamenodes);
+    }
+  }
+
+  /**
+   * Takes the list of other Namenodes from which to fetch delegation tokens,
+   * the {@link #OTHER_NAMENODES_PROPERTY} property, from Props and inserts it
+   * back with the addition of the the potentially JobType-specific Namenode URIs
+   * from additionalNamenodes. Modifies props in-place.
+   * @param props Props to add the new Namenode URIs to.
+   * @param additionalNamenodes Comma-separated list of Namenode URIs from which to fetch
+   *                            delegation tokens.
+   */
+  public static void addAdditionalNamenodesToProps(Props props, String additionalNamenodes) {
+    String otherNamenodes = props.get(OTHER_NAMENODES_PROPERTY);
+    if (otherNamenodes != null && otherNamenodes.length() > 0) {
+      props.put(OTHER_NAMENODES_PROPERTY, otherNamenodes + "," + additionalNamenodes);
+    } else {
+      props.put(OTHER_NAMENODES_PROPERTY, additionalNamenodes);
+    }
+  }
+
+  /**
+   * Fetching token with the Azkaban user
+   *
+   * @param hadoopSecurityManager
+   * @param props
+   * @param log
+   * @return
+   * @throws HadoopSecurityManagerException
+   */
+  public static File getHadoopTokens(HadoopSecurityManager hadoopSecurityManager, Props props,
+          Logger log) throws HadoopSecurityManagerException {
+
+    File tokenFile = null;
+    try {
+      tokenFile = File.createTempFile("mr-azkaban", ".token");
+    } catch (Exception e) {
+      throw new HadoopSecurityManagerException("Failed to create the token file.", e);
+    }
+
+    hadoopSecurityManager.prefetchToken(tokenFile, props, log);
+
+    return tokenFile;
+  }
+
+  /**
+   * <pre>
+   * If there's a * specification in the "jar" argument (e.g. jar=./lib/*,./lib2/*),
+   * this method helps to resolve the * into actual jar names inside the folder, and in order.
+   * This is due to the requirement that Spark 1.4 doesn't seem to do the resolution for users
+   *
+   * </pre>
+   *
+   * @param unresolvedJarSpec
+   * @return jar file list, comma separated, all .../* expanded into actual jar names in order
+   *
+   */
+  public static String resolveWildCardForJarSpec(String workingDirectory, String unresolvedJarSpec,
+          Logger log) {
+
+    log.debug("resolveWildCardForJarSpec: unresolved jar specification: " + unresolvedJarSpec);
+    log.debug("working directory: " + workingDirectory);
+
+    if (unresolvedJarSpec == null || unresolvedJarSpec.isEmpty())
+      return "";
+
+    StringBuilder resolvedJarSpec = new StringBuilder();
+
+    String[] unresolvedJarSpecList = unresolvedJarSpec.split(",");
+    for (String s : unresolvedJarSpecList) {
+      // if need resolution
+      if (s.endsWith("*")) {
+        // remove last 2 characters to get to the folder
+        String dirName = String.format("%s/%s", workingDirectory, s.substring(0, s.length() - 2));
+
+        File[] jars = null;
+        try {
+          jars = getFilesInFolderByRegex(new File(dirName), ".*jar");
+        } catch (FileNotFoundException fnfe) {
+          log.warn("folder does not exist: " + dirName);
+          continue;
+        }
+
+        // if the folder is there, add them to the jar list
+        for (File jar : jars) {
+          resolvedJarSpec.append(jar.toString()).append(",");
+        }
+      } else { // no need for resolution
+        resolvedJarSpec.append(s).append(",");
+      }
+    }
+
+    log.debug("resolveWildCardForJarSpec: resolvedJarSpec: " + resolvedJarSpec);
+
+    // remove the trailing comma
+    int lastCharIndex = resolvedJarSpec.length() - 1;
+    if (lastCharIndex >= 0 && resolvedJarSpec.charAt(lastCharIndex) == ',') {
+      resolvedJarSpec.deleteCharAt(lastCharIndex);
+    }
+
+    return resolvedJarSpec.toString();
+  }
+
+  /**
+   * <pre>
+   * Spark-submit accepts a execution jar or a python file.
+   * This method looks for the proper user execution jar or a python file.
+   * The user input is expected in the following 3 formats:
+   *   1. ./lib/abc
+   *   2. ./lib/abc.jar
+   *   3. ./lib/abc.py
+   *
+   * This method will use prefix matching to find any jar/py that is the form of abc*.(jar|py),
+   * so that users can bump jar versions without doing modifications to their Hadoop DSL.
+   *
+   * This method will throw an Exception if more than one jar that matches the prefix is found
+   *
+   * @param workingDirectory
+   * @param userSpecifiedJarName
+   * @return the resolved actual jar/py file name to execute
+   */
+  public static String resolveExecutionJarName(String workingDirectory,
+          String userSpecifiedJarName, Logger log) {
+
+    if (log.isDebugEnabled()) {
+      String debugMsg = String.format(
+              "Resolving execution jar name: working directory: %s,  user specified name: %s",
+              workingDirectory, userSpecifiedJarName);
+      log.debug(debugMsg);
+    }
+
+    // in case user decides to specify with abc.jar, instead of only abc
+    if (userSpecifiedJarName.endsWith(".jar")) {
+      userSpecifiedJarName = userSpecifiedJarName.replace(".jar", "");
+    } else if (userSpecifiedJarName.endsWith(".py")) {
+      userSpecifiedJarName = userSpecifiedJarName.replace(".py", "");
+    }
+
+    // can't use java 1.7 stuff, reverting to a slightly ugly implementation
+    String userSpecifiedJarPath = String.format("%s/%s", workingDirectory, userSpecifiedJarName);
+    int lastIndexOfSlash = userSpecifiedJarPath.lastIndexOf("/");
+    final String jarPrefix = userSpecifiedJarPath.substring(lastIndexOfSlash + 1);
+    final String dirName = userSpecifiedJarPath.substring(0, lastIndexOfSlash);
+
+    if (log.isDebugEnabled()) {
+      String debugMsg = String.format("Resolving execution jar name: dirname: %s, jar name: %s",
+              dirName, jarPrefix);
+      log.debug(debugMsg);
+    }
+
+    File[] potentialExecutionJarList;
+    try {
+      potentialExecutionJarList = getFilesInFolderByRegex(new File(dirName), jarPrefix + ".*(jar|py)");
+    } catch (FileNotFoundException e) {
+      throw new IllegalStateException(
+              "execution jar is suppose to be in this folder, but the folder doesn't exist: "
+                      + dirName);
+    }
+
+    if (potentialExecutionJarList.length == 0) {
+      throw new IllegalStateException("unable to find execution jar for Spark at path: "
+              + userSpecifiedJarPath + "*.(jar|py)");
+    } else if (potentialExecutionJarList.length > 1) {
+      throw new IllegalStateException(
+              "I find more than one matching instance of the execution jar at the path, don't know which one to use: "
+                      + userSpecifiedJarPath + "*.(jar|py)");
+    }
+
+    String resolvedJarName = potentialExecutionJarList[0].toString();
+    log.info("Resolving execution jar/py name: resolvedJarName: " + resolvedJarName);
+    return resolvedJarName;
+  }
+
+  /**
+   *
+   * @return a list of files in the given folder that matches the regex. It may be empty, but will
+   *         never return a null
+   * @throws FileNotFoundException
+   */
+  private static File[] getFilesInFolderByRegex(File folder, final String regex)
+          throws FileNotFoundException {
+    // sanity check
+
+    if (!folder.exists()) {
+      throw new FileNotFoundException();
+
+    }
+    if (!folder.isDirectory()) {
+      throw new IllegalStateException(
+              "execution jar is suppose to be in this folder, but the object present is not a directory: "
+                      + folder);
+    }
+
+    File[] matchingFiles = folder.listFiles(new FilenameFilter() {
+      @Override
+      public boolean accept(File dir, String name) {
+        if (name.matches(regex))
+          return true;
+        else
+          return false;
+      }
+    });
+
+    if (matchingFiles == null) {
+      throw new IllegalStateException(
+              "the File[] matchingFiles variable is null.  This means an IOException occured while doing listFiles.  Please check disk availability and retry again");
+    }
+
+    return matchingFiles;
+  }
+
+/**
+ * This method is a decorator around the KillAllSpawnedHadoopJobs method.
+ * This method takes additional parameters to determine whether KillAllSpawnedHadoopJobs needs to be executed
+ * using doAs as a different user
+ *
+ * @param logFilePath Azkaban log file path
+ * @param jobProps Azkaban job props
+ * @param tokenFile Pass in the tokenFile if value is known.  It is ok to skip if the token file is in the environmental variable
+ * @param log a usable logger
+ */
+  public static void proxyUserKillAllSpawnedHadoopJobs(final String logFilePath, Props jobProps, File tokenFile, final Logger log) {
+    Properties properties = new Properties();
+    properties.putAll(jobProps.getFlattened());
+
+    try {
+      if (HadoopSecureWrapperUtils.shouldProxy(properties)) {
+        UserGroupInformation proxyUser =
+            HadoopSecureWrapperUtils.setupProxyUser(properties,
+                tokenFile.getAbsolutePath(), log);
+        proxyUser.doAs(new PrivilegedExceptionAction<Void>() {
+          @Override
+          public Void run() throws Exception {
+            HadoopJobUtils.killAllSpawnedHadoopJobs(logFilePath, log);
+            return null;
+          }
+        });
+      } else {
+        HadoopJobUtils.killAllSpawnedHadoopJobs(logFilePath, log);
+      }
+    } catch (Throwable t) {
+      log.warn("something happened while trying to kill all spawned jobs", t);
+    }
+  }
+
+
+  /**
+   * Pass in a log file, this method will find all the hadoop jobs it has launched, and kills it
+   *
+   * Only works with Hadoop2
+   *
+   * @param logFilePath
+   * @param log
+   * @return a Set<String>. The set will contain the applicationIds that this job tried to kill.
+   */
+  public static Set<String> killAllSpawnedHadoopJobs(String logFilePath, Logger log) {
+    Set<String> allSpawnedJobs = findApplicationIdFromLog(logFilePath, log);
+    log.info("applicationIds to kill: " + allSpawnedJobs);
+
+    for (String appId : allSpawnedJobs) {
+      try {
+        killJobOnCluster(appId, log);
+      } catch (Throwable t) {
+        log.warn("something happened while trying to kill this job: " + appId, t);
+      }
+    }
+
+    return allSpawnedJobs;
+  }
+
+  /**
+   * <pre>
+   * Takes in a log file, will grep every line to look for the application_id pattern.
+   * If it finds multiple, it will return all of them, de-duped (this is possible in the case of pig jobs)
+   * This can be used in conjunction with the @killJobOnCluster method in this file.
+   * </pre>
+   *
+   * @param logFilePath
+   * @return a Set. May be empty, but will never be null
+   */
+  public static Set<String> findApplicationIdFromLog(String logFilePath, Logger log) {
+
+    File logFile = new File(logFilePath);
+
+    if (!logFile.exists()) {
+      throw new IllegalArgumentException("the logFilePath does not exist: " + logFilePath);
+    }
+    if (!logFile.isFile()) {
+      throw new IllegalArgumentException("the logFilePath specified  is not a valid file: "
+              + logFilePath);
+    }
+    if (!logFile.canRead()) {
+      throw new IllegalArgumentException("unable to read the logFilePath specified: " + logFilePath);
+    }
+
+    BufferedReader br = null;
+    Set<String> applicationIds = new HashSet<String>();
+
+    try {
+//      br = new BufferedReader(new FileReader(logFile));
+      br = new BufferedReader(new InputStreamReader(
+          new FileInputStream(logFile), StandardCharsets.UTF_8));
+      String line;
+
+      // finds all the application IDs
+      while ((line = br.readLine()) != null) {
+        String [] inputs = line.split("\\s");
+        if (inputs != null) {
+          for (String input : inputs) {
+            Matcher m = APPLICATION_ID_PATTERN.matcher(input);
+            if (m.find()) {
+              String appId = m.group(1);
+              applicationIds.add(appId);
+            }
+          }
+        }
+      }
+    } catch (IOException e) {
+      log.error("Error while trying to find applicationId for log", e);
+    } finally {
+      try {
+        if (br != null)
+          br.close();
+      } catch (Exception e) {
+        // do nothing
+      }
+    }
+    return applicationIds;
+  }
+
+  /**
+   * <pre>
+   * Uses YarnClient to kill the job on HDFS.
+   * Using JobClient only works partially:
+   *   If yarn container has started but spark job haven't, it will kill
+   *   If spark job has started, the cancel will hang until the spark job is complete
+   *   If the spark job is complete, it will return immediately, with a job not found on job tracker
+   * </pre>
+   *
+   * @param applicationId
+   * @throws IOException
+   * @throws YarnException
+   */
+  public static void killJobOnCluster(String applicationId, Logger log) throws YarnException,
+          IOException {
+
+    YarnConfiguration yarnConf = new YarnConfiguration();
+    YarnClient yarnClient = YarnClient.createYarnClient();
+    yarnClient.init(yarnConf);
+    yarnClient.start();
+
+    String[] split = applicationId.split("_");
+    ApplicationId aid = ApplicationId.newInstance(Long.parseLong(split[1]),
+            Integer.parseInt(split[2]));
+
+    log.info("start klling application: " + aid);
+    yarnClient.killApplication(aid);
+    log.info("successfully killed application: " + aid);
+  }
+
+  /**
+   * <pre>
+   * constructions a javaOpts string based on the Props, and the key given, will return
+   *  String.format("-D%s=%s", key, value);
+   * </pre>
+   *
+   * @param props
+   * @param key
+   * @return will return String.format("-D%s=%s", key, value). Throws RuntimeException if props not
+   *         present
+   */
+  public static String javaOptStringFromAzkabanProps(Props props, String key) {
+    String value = props.get(key);
+    if (value == null) {
+      throw new RuntimeException(String.format("Cannot find property [%s], in azkaban props: [%s]",
+              key, value));
+    }
+    return String.format("-D%s=%s", key, value);
+  }
+
+  /**
+   * Filter a collection of String commands to match a whitelist regex and not match a blacklist
+   * regex.
+   *
+   * @param commands
+   *          Collection of commands to be filtered
+   * @param whitelistRegex
+   *          whitelist regex to work as inclusion criteria
+   * @param blacklistRegex
+   *          blacklist regex to work as exclusion criteria
+   * @param log
+   *          logger to report violation
+   * @return filtered list of matching. Empty list if no command match all the criteria.
+   */
+  public static List<String> filterCommands(Collection<String> commands, String whitelistRegex,
+          String blacklistRegex, Logger log) {
+    List<String> filteredCommands = new LinkedList<String>();
+    Pattern whitelistPattern = Pattern.compile(whitelistRegex);
+    Pattern blacklistPattern = Pattern.compile(blacklistRegex);
+    for (String command : commands) {
+      if (whitelistPattern.matcher(command).matches()
+              && !blacklistPattern.matcher(command).matches()) {
+        filteredCommands.add(command);
+      } else {
+        log.warn(String.format("Removing restricted command: %s", command));
+      }
+    }
+    return filteredCommands;
+  }
+
+  /**
+   * <pre>
+   * constructions a javaOpts string based on the Props, and the key given, will return
+   *  String.format("-D%s=%s", key, value);
+   * </pre>
+   *
+   * @param conf
+   * @param key
+   * @return will return String.format("-D%s=%s", key, value). Throws RuntimeException if props not
+   *         present
+   */
+  public static String javaOptStringFromHadoopConfiguration(Configuration conf, String key) {
+    String value = conf.get(key);
+    if (value == null) {
+      throw new RuntimeException(String.format("Cannot find property [%s], in Hadoop configuration: [%s]",
+              key, value));
+    }
+    return String.format("-D%s=%s", key, value);
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopPigJob.java 393(+393 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopPigJob.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopPigJob.java
new file mode 100644
index 0000000..0d70425
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopPigJob.java
@@ -0,0 +1,393 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+import java.io.File;
+import java.io.IOException;
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.log4j.Logger;
+import org.apache.pig.PigRunner;
+
+import azkaban.flow.CommonJobProperties;
+import azkaban.jobExecutor.JavaProcessJob;
+import azkaban.security.commons.HadoopSecurityManager;
+import azkaban.utils.Props;
+import azkaban.utils.StringUtils;
+
+/*
+ * need lib:
+ * apache pig
+ * hadoop-core*.jar
+ * HadoopSecurePigWrapper
+ * HadoopSecurityManager(corresponding version with hadoop)
+ * abandon support for pig 0.8 and prior versions. don't see a use case here.
+ */
+
+public class HadoopPigJob extends JavaProcessJob {
+
+  public static final String PIG_SCRIPT = "pig.script";
+  public static final String UDF_IMPORT = "udf.import.list";
+  public static final String PIG_ADDITIONAL_JARS = "pig.additional.jars";
+  public static final String DEFAULT_PIG_ADDITIONAL_JARS =
+      "default.pig.additional.jars";
+  public static final String PIG_PARAM_PREFIX = "param.";
+  public static final String PIG_PARAM_FILES = "paramfile";
+  public static final String HADOOP_UGI = "hadoop.job.ugi";
+  public static final String DEBUG = "debug";
+
+  public static String HADOOP_SECURE_PIG_WRAPPER =
+      "azkaban.jobtype.HadoopSecurePigWrapper";
+
+  private String userToProxy = null;
+  private boolean shouldProxy = false;
+  private boolean obtainTokens = false;
+  File tokenFile = null;
+
+  private final boolean userPigJar;
+
+  private HadoopSecurityManager hadoopSecurityManager;
+
+  private File pigLogFile = null;
+
+  public HadoopPigJob(String jobid, Props sysProps, Props jobProps, Logger log)
+      throws IOException {
+    super(jobid, sysProps, jobProps, log);
+
+    HADOOP_SECURE_PIG_WRAPPER = HadoopSecurePigWrapper.class.getName();
+
+    getJobProps().put(CommonJobProperties.JOB_ID, jobid);
+    shouldProxy =
+        getSysProps().getBoolean(HadoopSecurityManager.ENABLE_PROXYING, false);
+    getJobProps().put(HadoopSecurityManager.ENABLE_PROXYING,
+        Boolean.toString(shouldProxy));
+    obtainTokens =
+        getSysProps().getBoolean(HadoopSecurityManager.OBTAIN_BINARY_TOKEN,
+            false);
+    userPigJar = getJobProps().getBoolean("use.user.pig.jar", false);
+
+    if (shouldProxy) {
+      getLog().info("Initiating hadoop security manager.");
+      try {
+        hadoopSecurityManager =
+            HadoopJobUtils.loadHadoopSecurityManager(getSysProps(), log);
+      } catch (RuntimeException e) {
+        throw new RuntimeException("Failed to get hadoop security manager!" + e);
+      }
+    }
+  }
+
+  @Override
+  public void run() throws Exception {
+    HadoopConfigurationInjector.prepareResourcesToInject(getJobProps(),
+        getWorkingDirectory());
+
+    if (shouldProxy && obtainTokens) {
+      userToProxy = getJobProps().getString("user.to.proxy");
+      getLog().info("Need to proxy. Getting tokens.");
+      // get tokens in to a file, and put the location in props
+      Props props = new Props();
+      props.putAll(getJobProps());
+      props.putAll(getSysProps());
+      HadoopJobUtils.addAdditionalNamenodesToPropsFromMRJob(props, getLog());
+      tokenFile =
+          HadoopJobUtils
+              .getHadoopTokens(hadoopSecurityManager, props, getLog());
+      getJobProps().put("env." + HADOOP_TOKEN_FILE_LOCATION,
+          tokenFile.getAbsolutePath());
+    }
+    try {
+      super.run();
+    } catch (Throwable t) {
+      t.printStackTrace();
+      getLog().error("caught error running the job", t);
+      throw new Exception(t);
+    } finally {
+      if (tokenFile != null) {
+        HadoopJobUtils.cancelHadoopTokens(hadoopSecurityManager, userToProxy,
+            tokenFile, getLog());
+        if (tokenFile.exists()) {
+          tokenFile.delete();
+        }
+      }
+    }
+  }
+
+  @Override
+  protected String getJavaClass() {
+    return HADOOP_SECURE_PIG_WRAPPER;
+  }
+
+  @Override
+  protected String getJVMArguments() {
+    String args = super.getJVMArguments();
+
+    String typeGlobalJVMArgs =
+        getSysProps().getString("jobtype.global.jvm.args", null);
+    if (typeGlobalJVMArgs != null) {
+      args += " " + typeGlobalJVMArgs;
+    }
+
+    List<String> udfImport = getUDFImportList();
+    if (udfImport.size() > 0) {
+      args += " -Dudf.import.list=" + super.createArguments(udfImport, ":");
+    }
+
+    List<String> additionalJars = getAdditionalJarsList();
+    if (additionalJars.size() > 0) {
+      args +=
+          " -Dpig.additional.jars="
+              + super.createArguments(additionalJars, ":");
+    }
+
+    String hadoopUGI = getHadoopUGI();
+    if (hadoopUGI != null) {
+      args += " -Dhadoop.job.ugi=" + hadoopUGI;
+    }
+
+    if (shouldProxy) {
+      info("Setting up secure proxy info for child process");
+      String secure;
+      secure =
+          " -D" + HadoopSecurityManager.USER_TO_PROXY + "="
+              + getJobProps().getString(HadoopSecurityManager.USER_TO_PROXY);
+      String extraToken =
+          getSysProps().getString(HadoopSecurityManager.OBTAIN_BINARY_TOKEN,
+              "false");
+      if (extraToken != null) {
+        secure +=
+            " -D" + HadoopSecurityManager.OBTAIN_BINARY_TOKEN + "="
+                + extraToken;
+      }
+      info("Secure settings = " + secure);
+      args += secure;
+    } else {
+      info("Not setting up secure proxy info for child process");
+    }
+
+    return args;
+  }
+
+  @Override
+  protected String getMainArguments() {
+    ArrayList<String> list = new ArrayList<String>();
+    Map<String, String> map = getPigParams();
+    if (map != null) {
+      for (Map.Entry<String, String> entry : map.entrySet()) {
+        list.add("-param "
+            + StringUtils.shellQuote(entry.getKey() + "=" + entry.getValue(),
+                StringUtils.SINGLE_QUOTE));
+      }
+    }
+
+    List<String> paramFiles = getPigParamFiles();
+    if (paramFiles != null) {
+      for (String paramFile : paramFiles) {
+        list.add("-param_file " + paramFile);
+      }
+    }
+
+    if (getDebug()) {
+      list.add("-debug");
+    }
+
+    try {
+      pigLogFile =
+          File.createTempFile("piglogfile", ".log", new File(
+              getWorkingDirectory()));
+      jobProps.put("env." + "PIG_LOG_FILE", pigLogFile.getAbsolutePath());
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+
+    if (pigLogFile != null) {
+      list.add("-logfile " + pigLogFile.getAbsolutePath());
+    }
+
+    list.add(getScript());
+
+    return StringUtils.join((Collection<String>) list, " ");
+  }
+
+  @Override
+  protected List<String> getClassPaths() {
+
+    List<String> classPath = super.getClassPaths();
+
+    // To add az-core jar classpath
+    classPath.add(getSourcePathFromClass(Props.class));
+
+    // To add az-common jar classpath
+    classPath.add(getSourcePathFromClass(JavaProcessJob.class));
+    classPath.add(getSourcePathFromClass(HadoopSecurePigWrapper.class));
+    classPath.add(getSourcePathFromClass(HadoopSecurityManager.class));
+
+    classPath.add(HadoopConfigurationInjector.getPath(getJobProps(),
+        getWorkingDirectory()));
+
+    // assuming pig 0.8 and up
+    if (!userPigJar) {
+      classPath.add(getSourcePathFromClass(PigRunner.class));
+    }
+
+    // merging classpaths from plugin.properties
+    mergeClassPaths(classPath,
+        getJobProps().getStringList("jobtype.classpath", null, ","));
+    // merging classpaths from private.properties
+    mergeClassPaths(classPath,
+        getSysProps().getStringList("jobtype.classpath", null, ","));
+
+    List<String> typeGlobalClassPath =
+        getSysProps().getStringList("jobtype.global.classpath", null, ",");
+    if (typeGlobalClassPath != null) {
+      for (String jar : typeGlobalClassPath) {
+        if (!classPath.contains(jar)) {
+          classPath.add(jar);
+        }
+      }
+    }
+
+    return classPath;
+  }
+
+  private void mergeClassPaths(List<String> classPath,
+      List<String> typeClassPath) {
+    if (typeClassPath != null) {
+      // fill in this when load this jobtype
+      String pluginDir = getSysProps().get("plugin.dir");
+      for (String jar : typeClassPath) {
+        File jarFile = new File(jar);
+        if (!jarFile.isAbsolute()) {
+          jarFile = new File(pluginDir + File.separatorChar + jar);
+        }
+
+        if (!classPath.contains(jarFile.getAbsolutePath())) {
+          classPath.add(jarFile.getAbsolutePath());
+        }
+      }
+    }
+  }
+
+  protected boolean getDebug() {
+    return getJobProps().getBoolean(DEBUG, false);
+  }
+
+  protected String getScript() {
+    return getJobProps().getString(PIG_SCRIPT);
+  }
+
+  protected List<String> getUDFImportList() {
+    List<String> udfImports = new ArrayList<String>();
+    List<String> typeImports =
+        getSysProps().getStringList(UDF_IMPORT, null, ",");
+    List<String> jobImports =
+        getJobProps().getStringList(UDF_IMPORT, null, ",");
+    if (typeImports != null) {
+      udfImports.addAll(typeImports);
+    }
+    if (jobImports != null) {
+      udfImports.addAll(jobImports);
+    }
+    return udfImports;
+  }
+
+  protected List<String> getAdditionalJarsList() {
+    List<String> additionalJars = new ArrayList<String>();
+    mergeAdditionalJars(additionalJars, PIG_ADDITIONAL_JARS);
+    mergeAdditionalJars(additionalJars, DEFAULT_PIG_ADDITIONAL_JARS);
+    return additionalJars;
+  }
+
+  /**
+   * Merging all additional jars first from user specified/plugin.properties
+   * then private.properties for additionalJarProperty property
+   */
+  private void mergeAdditionalJars(List<String> additionalJars,
+      String additionalJarProperty) {
+    List<String> jobJars =
+        getJobProps().getStringList(additionalJarProperty, null, ",");
+    List<String> typeJars =
+        getSysProps().getStringList(additionalJarProperty, null, ",");
+    if (jobJars != null) {
+      additionalJars.addAll(jobJars);
+    }
+    if (typeJars != null) {
+      additionalJars.addAll(typeJars);
+    }
+  }
+
+  protected String getHadoopUGI() {
+    return getJobProps().getString(HADOOP_UGI, null);
+  }
+
+  protected Map<String, String> getPigParams() {
+    return getJobProps().getMapByPrefix(PIG_PARAM_PREFIX);
+  }
+
+  protected List<String> getPigParamFiles() {
+    return getJobProps().getStringList(PIG_PARAM_FILES, null, ",");
+  }
+
+  private static String getSourcePathFromClass(Class<?> containedClass) {
+    File file =
+        new File(containedClass.getProtectionDomain().getCodeSource()
+            .getLocation().getPath());
+
+    if (!file.isDirectory() && file.getName().endsWith(".class")) {
+      String name = containedClass.getName();
+      StringTokenizer tokenizer = new StringTokenizer(name, ".");
+      while (tokenizer.hasMoreTokens()) {
+        tokenizer.nextElement();
+        file = file.getParentFile();
+      }
+
+      return file.getPath();
+    } else {
+      return containedClass.getProtectionDomain().getCodeSource().getLocation()
+          .getPath();
+    }
+  }
+
+  /**
+   * This cancel method, in addition to the default canceling behavior, also
+   * kills the MR jobs launched by Pig on Hadoop
+   */
+  @Override
+  public void cancel() throws InterruptedException {
+    super.cancel();
+
+    info("Cancel called.  Killing the Pig launched MR jobs on the cluster");
+
+    String azExecId = jobProps.getString(CommonJobProperties.EXEC_ID);
+    final String logFilePath =
+        String.format("%s/_job.%s.%s.log", getWorkingDirectory(), azExecId,
+            getId());
+    info("log file path is: " + logFilePath);
+
+    HadoopJobUtils.proxyUserKillAllSpawnedHadoopJobs(logFilePath, jobProps,
+        tokenFile, getLog());
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureHiveWrapper.java 284(+284 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureHiveWrapper.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureHiveWrapper.java
new file mode 100644
index 0000000..3dabc64
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureHiveWrapper.java
@@ -0,0 +1,284 @@
+/*
+ * Copyright 2012 LinkedIn, Inc
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package azkaban.jobtype;
+
+import static azkaban.security.commons.SecurityUtils.MAPREDUCE_JOB_CREDENTIALS_BINARY;
+import static azkaban.utils.StringUtils.DOUBLE_QUOTE;
+import static azkaban.utils.StringUtils.SINGLE_QUOTE;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEAUXJARS;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORECONNECTURLKEY;
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+import java.io.File;
+import java.io.IOException;
+import java.security.PrivilegedExceptionAction;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.cli.CliDriver;
+import org.apache.hadoop.hive.cli.CliSessionState;
+import org.apache.hadoop.hive.cli.OptionsProcessor;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.log4j.Logger;
+
+import azkaban.jobtype.hiveutils.HiveQueryExecutionException;
+import azkaban.utils.Props;
+
+public class HadoopSecureHiveWrapper {
+
+  private static final String DOUBLE_QUOTE_STRING = Character
+      .toString(DOUBLE_QUOTE);
+  private static final String SINGLE_QUOTE_STRING = Character
+      .toString(SINGLE_QUOTE);
+
+  private static final Logger logger = Logger.getRootLogger();
+
+  private static CliSessionState ss;
+  private static String hiveScript;
+
+  public static void main(final String[] args) throws Exception {
+
+    Properties jobProps = HadoopSecureWrapperUtils.loadAzkabanProps();
+    HadoopConfigurationInjector.injectResources(new Props(null, jobProps));
+
+    hiveScript = jobProps.getProperty("hive.script");
+
+    if (HadoopSecureWrapperUtils.shouldProxy(jobProps)) {
+      String tokenFile = System.getenv(HADOOP_TOKEN_FILE_LOCATION);
+      UserGroupInformation proxyUser =
+          HadoopSecureWrapperUtils.setupProxyUser(jobProps, tokenFile, logger);
+      proxyUser.doAs(new PrivilegedExceptionAction<Void>() {
+        @Override
+        public Void run() throws Exception {
+          runHive(args);
+          return null;
+        }
+      });
+    } else {
+      runHive(args);
+    }
+  }
+
+  public static void runHive(String[] args) throws Exception {
+
+    final HiveConf hiveConf = new HiveConf(SessionState.class);
+
+    populateHiveConf(hiveConf, args);
+
+    if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
+      System.out.println("Setting hadoop tokens ... ");
+      hiveConf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY,
+          System.getenv(HADOOP_TOKEN_FILE_LOCATION));
+      System.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY,
+          System.getenv(HADOOP_TOKEN_FILE_LOCATION));
+    }
+
+    logger.info("HiveConf = " + hiveConf);
+    logger.info("According to the conf, we're talking to the Hive hosted at: "
+        + HiveConf.getVar(hiveConf, METASTORECONNECTURLKEY));
+
+    String orig = HiveConf.getVar(hiveConf, HIVEAUXJARS);
+    String expanded = expandHiveAuxJarsPath(orig);
+    if (orig == null || orig.equals(expanded)) {
+      logger.info("Hive aux jars variable not expanded");
+    } else {
+      logger.info("Expanded aux jars variable from [" + orig + "] to ["
+          + expanded + "]");
+      HiveConf.setVar(hiveConf, HIVEAUXJARS, expanded);
+    }
+
+    OptionsProcessor op = new OptionsProcessor();
+
+    if (!op.process_stage1(new String[] {})) {
+      throw new IllegalArgumentException("Can't process empty args?!?");
+    }
+
+    // hadoop-20 and above - we need to augment classpath using hiveconf
+    // components
+    // see also: code in ExecDriver.java
+    ClassLoader loader = hiveConf.getClassLoader();
+    String auxJars = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEAUXJARS);
+    logger.info("Got auxJars = " + auxJars);
+
+    if (StringUtils.isNotBlank(auxJars)) {
+      loader =
+          Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","));
+    }
+    hiveConf.setClassLoader(loader);
+    Thread.currentThread().setContextClassLoader(loader);
+
+    // See https://issues.apache.org/jira/browse/HIVE-1411
+    hiveConf.set("datanucleus.plugin.pluginRegistryBundleCheck", "LOG");
+
+    // to force hive to use the jobclient to submit the job, never using
+    // HADOOPBIN (to do localmode)
+    hiveConf.setBoolean("hive.exec.mode.local.auto", false);
+
+    ss = new CliSessionState(hiveConf);
+    SessionState.start(ss);
+
+    logger.info("SessionState = " + ss);
+    ss.out = System.out;
+    ss.err = System.err;
+    ss.in = System.in;
+
+    if (!op.process_stage2(ss)) {
+      throw new IllegalArgumentException(
+          "Can't process arguments from session state");
+    }
+
+    logger.info("Executing query: " + hiveScript);
+
+    CliDriver cli = new CliDriver();
+    Map<String, String> hiveVarMap = getHiveVarMap(args);
+
+    logger.info("hiveVarMap: " + hiveVarMap);
+
+    if (!hiveVarMap.isEmpty()) {
+      cli.setHiveVariables(getHiveVarMap(args));
+    }
+
+    int returnCode = cli.processFile(hiveScript);
+    if (returnCode != 0) {
+      logger.warn("Got exception " + returnCode + " from line: " + hiveScript);
+      throw new HiveQueryExecutionException(returnCode, hiveScript);
+    }
+  }
+
+  /**
+   * Normally hive.aux.jars.path is expanded from just being a path to the full
+   * list of files in the directory by the hive shell script. Since we normally
+   * won't be running from the script, it's up to us to do that work here. We
+   * use a heuristic that if there is no occurrence of ".jar" in the original,
+   * it needs expansion. Otherwise it's already been done for us.
+   *
+   * Also, surround the files with uri niceities.
+   */
+  static String expandHiveAuxJarsPath(String original) throws IOException {
+    if (original == null || original.contains(".jar"))
+      return original;
+
+    File[] files = new File(original).listFiles();
+
+    if (files == null || files.length == 0) {
+      logger
+          .info("No files in to expand in aux jar path. Returning original parameter");
+      return original;
+    }
+
+    return filesToURIString(files);
+
+  }
+
+  static String filesToURIString(File[] files) throws IOException {
+    StringBuffer sb = new StringBuffer();
+    for (int i = 0; i < files.length; i++) {
+      sb.append("file:///").append(files[i].getCanonicalPath());
+      if (i != files.length - 1)
+        sb.append(",");
+    }
+
+    return sb.toString();
+  }
+
+  /**
+   * Extract hiveconf from command line arguments and populate them into
+   * HiveConf
+   *
+   * An example: -hiveconf 'zipcode=10', -hiveconf hive.root.logger=INFO,console
+   *
+   * @param hiveConf
+   * @param args
+   */
+  private static void populateHiveConf(HiveConf hiveConf, String[] args) {
+
+    if (args == null) {
+      return;
+    }
+
+    int index = 0;
+    for (; index < args.length; index++) {
+      if ("-hiveconf".equals(args[index])) {
+        String hiveConfParam = stripSingleDoubleQuote(args[++index]);
+
+        String[] tokens = hiveConfParam.split("=");
+        if (tokens.length == 2) {
+          String name = tokens[0];
+          String value = tokens[1];
+          logger.info("Setting: " + name + "=" + value + " to hiveConf");
+          hiveConf.set(name, value);
+        } else {
+          logger.warn("Invalid hiveconf: " + hiveConfParam);
+        }
+      }
+    }
+  }
+
+  static Map<String, String> getHiveVarMap(String[] args) {
+
+    if (args == null) {
+      return Collections.emptyMap();
+    }
+
+    Map<String, String> hiveVarMap = new HashMap<String, String>();
+    for (int index = 0; index < args.length; index++) {
+      if ("-hivevar".equals(args[index])) {
+        String hiveVarParam = stripSingleDoubleQuote(args[++index]);
+        // Separate the parameter string at its first occurence of "="
+        int gap = hiveVarParam.indexOf("=");
+        if (gap == -1) {
+          logger.warn("Invalid hivevar: " + hiveVarParam);
+          continue;
+        }
+        String name = hiveVarParam.substring(0, gap);
+        String value = hiveVarParam.substring(gap + 1);
+        logger.info("Setting hivevar: " + name + "=" + value);
+        hiveVarMap.put(name, value);
+      }
+    }
+    return hiveVarMap;
+  }
+
+  /**
+   * Strip single quote or double quote at either end of the string
+   *
+   * @param input
+   * @return string with w/o leading or trailing single or double quote
+   */
+  private static String stripSingleDoubleQuote(String input) {
+    if (StringUtils.isEmpty(input)) {
+      return input;
+    }
+
+    if (input.startsWith(SINGLE_QUOTE_STRING)
+        || input.startsWith(DOUBLE_QUOTE_STRING)) {
+      input = input.substring(1);
+    }
+
+    if (input.endsWith(SINGLE_QUOTE_STRING)
+        || input.endsWith(DOUBLE_QUOTE_STRING)) {
+      input = input.substring(0, input.length() - 1);
+    }
+
+    return input;
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecurePigWrapper.java 175(+175 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecurePigWrapper.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecurePigWrapper.java
new file mode 100644
index 0000000..f5db1bd
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecurePigWrapper.java
@@ -0,0 +1,175 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.security.PrivilegedExceptionAction;
+import java.util.Iterator;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.log4j.Logger;
+import org.apache.pig.PigRunner;
+import org.apache.pig.tools.pigstats.JobStats;
+import org.apache.pig.tools.pigstats.PigStats;
+import org.apache.pig.tools.pigstats.PigStats.JobGraph;
+
+import azkaban.jobExecutor.ProcessJob;
+import azkaban.utils.Props;
+
+public class HadoopSecurePigWrapper {
+
+  private static final String PIG_DUMP_HADOOP_COUNTER_PROPERTY = "pig.dump.hadoopCounter";
+
+  private static File pigLogFile;
+
+  private static Props props;
+
+  private static final Logger logger;
+
+  static {
+    logger = Logger.getRootLogger();
+  }
+
+  public static void main(final String[] args) throws Exception {
+    Properties jobProps = HadoopSecureWrapperUtils.loadAzkabanProps();
+    props = new Props(null, jobProps);
+    HadoopConfigurationInjector.injectResources(props);
+
+    // special feature of secure pig wrapper: we will append the pig error file
+    // onto system out
+    pigLogFile = new File(System.getenv("PIG_LOG_FILE"));
+
+    if (HadoopSecureWrapperUtils.shouldProxy(jobProps)) {
+      String tokenFile = System.getenv(HADOOP_TOKEN_FILE_LOCATION);
+      UserGroupInformation proxyUser =
+          HadoopSecureWrapperUtils.setupProxyUser(jobProps, tokenFile, logger);
+      proxyUser.doAs(new PrivilegedExceptionAction<Void>() {
+        @Override
+        public Void run() throws Exception {
+          runPigJob(args);
+          return null;
+        }
+      });
+    } else {
+      runPigJob(args);
+    }
+  }
+
+  @SuppressWarnings("deprecation")
+  public static void runPigJob(String[] args) throws Exception {
+    PigStats stats = null;
+    if (props.getBoolean("pig.listener.visualizer", false) == true) {
+      stats = PigRunner.run(args, new AzkabanPigListener(props));
+    } else {
+      stats = PigRunner.run(args, null);
+    }
+
+    dumpHadoopCounters(stats);
+
+    if (stats.isSuccessful()) {
+      return;
+    }
+
+    if (pigLogFile != null) {
+      handleError(pigLogFile);
+    }
+
+    // see jira ticket PIG-3313. Will remove these when we use pig binary with
+    // that patch.
+    // /////////////////////
+    System.out.println("Trying to do self kill, in case pig could not.");
+    Set<Thread> threadSet = Thread.getAllStackTraces().keySet();
+    Thread[] threadArray = threadSet.toArray(new Thread[threadSet.size()]);
+    for (Thread t : threadArray) {
+      if (!t.isDaemon() && !t.equals(Thread.currentThread())) {
+        System.out.println("Killing thread " + t);
+        t.stop();
+      }
+    }
+    System.exit(1);
+    // ////////////////////
+    throw new RuntimeException("Pig job failed.");
+  }
+
+  /**
+   * Dump Hadoop counters for each of the M/R jobs in the given PigStats.
+   *
+   * @param pigStats
+   */
+  private static void dumpHadoopCounters(PigStats pigStats) {
+    try {
+      if (props.getBoolean(PIG_DUMP_HADOOP_COUNTER_PROPERTY, false)) {
+        if (pigStats != null) {
+          JobGraph jGraph = pigStats.getJobGraph();
+          Iterator<JobStats> iter = jGraph.iterator();
+          while (iter.hasNext()) {
+            JobStats jobStats = iter.next();
+            System.out.println("\n === Counters for job: "
+                + jobStats.getJobId() + " ===");
+            Counters counters = jobStats.getHadoopCounters();
+            if (counters != null) {
+              for (Counters.Group group : counters) {
+                System.out.println(" Counter Group: " + group.getDisplayName()
+                    + " (" + group.getName() + ")");
+                System.out.println("  number of counters in this group: "
+                    + group.size());
+                for (Counters.Counter counter : group) {
+                  System.out.println("  - " + counter.getDisplayName() + ": "
+                      + counter.getCounter());
+                }
+              }
+            } else {
+              System.out.println("There are no counters");
+            }
+          }
+        } else {
+          System.out.println("pigStats is null, can't dump Hadoop counters");
+        }
+      }
+    } catch (Exception e) {
+      System.out.println("Unexpected error: " + e.getMessage());
+      e.printStackTrace(System.out);
+    }
+  }
+
+  @SuppressWarnings("DefaultCharset")
+  private static void handleError(File pigLog) throws Exception {
+    System.out.println();
+    System.out.println("Pig logfile dump:");
+    System.out.println();
+    try {
+      BufferedReader reader = new BufferedReader(new FileReader(pigLog));
+      String line = reader.readLine();
+      while (line != null) {
+        System.err.println(line);
+        line = reader.readLine();
+      }
+      reader.close();
+    } catch (FileNotFoundException e) {
+      System.err.println("pig log file: " + pigLog + "  not found.");
+    }
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureSparkWrapper.java 450(+450 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureSparkWrapper.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureSparkWrapper.java
new file mode 100644
index 0000000..e5297db
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureSparkWrapper.java
@@ -0,0 +1,450 @@
+/*
+ * Copyright 2015 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import azkaban.utils.Props;
+import com.google.common.collect.Maps;
+
+import java.security.PrivilegedExceptionAction;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.lang.math.NumberUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.yarn.conf.YarnConfiguration;
+import org.apache.log4j.Logger;
+import org.apache.spark.SparkConf;
+import org.apache.spark.network.util.JavaUtils;
+import org.apache.spark.util.Utils;
+
+import static azkaban.flow.CommonJobProperties.ATTEMPT_LINK;
+import static azkaban.flow.CommonJobProperties.EXECUTION_LINK;
+import static azkaban.flow.CommonJobProperties.JOB_LINK;
+import static azkaban.flow.CommonJobProperties.WORKFLOW_LINK;
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+/**
+ * <pre>
+ * A Spark wrapper (more specifically a spark-submit wrapper) that works with Azkaban.
+ * This class will be running on a separate process with JVM/ENV properties, classpath and main args
+ *  built from {@link HadoopSparkJob}.
+ * This class's main() will receive input args built from {@link HadoopSparkJob},
+ *  and pass it on to spark-submit to launch spark job.
+ * This process will be the client of the spark job.
+ *
+ * </pre>
+ *
+ * @see HadoopSecureSparkWrapper
+ */
+public class HadoopSecureSparkWrapper {
+
+  private static final Logger logger = Logger.getRootLogger();
+  private static final String EMPTY_STRING = "";
+
+  //SPARK CONF PARAM
+  private static final String SPARK_CONF_EXTRA_DRIVER_OPTIONS = "spark.driver.extraJavaOptions";
+  private static final String SPARK_CONF_NUM_EXECUTORS = "spark.executor.instances";
+  private static final String SPARK_CONF_SHUFFLE_SERVICE_ENABLED = "spark.shuffle.service.enabled";
+  private static final String SPARK_CONF_DYNAMIC_ALLOC_ENABLED = "spark.dynamicAllocation.enabled";
+  private static final String SPARK_CONF_QUEUE = "spark.yarn.queue";
+  private static final String SPARK_EXECUTOR_NODE_LABEL_EXP = "spark.yarn.executor.nodeLabelExpression";
+  private static final String SPARK_EXECUTOR_MEMORY_OVERHEAD = "spark.yarn.executor.memoryOverhead";
+  private static final String SPARK_EXECUTOR_MEMORY = "spark.executor.memory";
+  private static final String SPARK_EXECUTOR_DEFAULT_MEMORY = "1024M";
+  private static final String SPARK_EXECUTOR_CORES = "spark.executor.cores";
+  private static final String SPARK_EXECUTOR_DEFAULT_CORES = "1";
+
+  //YARN CONF PARAM
+  private static final String YARN_CONF_NODE_LABELING_ENABLED = "yarn.node-labels.enabled";
+  public static final String DEFAULT_QUEUE = "default";
+
+  /**
+   * Entry point: a Java wrapper to the spark-submit command
+   * Args is built in HadoopSparkJob.
+   *
+   * @param args
+   * @throws Exception
+   */
+  public static void main(final String[] args) throws Exception {
+
+    Properties jobProps = HadoopSecureWrapperUtils.loadAzkabanProps();
+    HadoopConfigurationInjector.injectResources(new Props(null, jobProps));
+
+    if (HadoopSecureWrapperUtils.shouldProxy(jobProps)) {
+      String tokenFile = System.getenv(HADOOP_TOKEN_FILE_LOCATION);
+      UserGroupInformation proxyUser =
+          HadoopSecureWrapperUtils.setupProxyUser(jobProps, tokenFile, logger);
+      proxyUser.doAs(new PrivilegedExceptionAction<Void>() {
+        @Override
+        public Void run() throws Exception {
+          runSpark(args);
+          return null;
+        }
+      });
+    } else {
+      runSpark(args);
+    }
+  }
+
+  /**
+   * Actually adjusts cmd args based on execution environment and calls the spark-submit command
+   *
+   * @param args
+   */
+  private static void runSpark(String[] args) {
+
+    if (args.length == 0) {
+      throw new RuntimeException("SparkSubmit cannot run with zero args");
+    }
+
+    // Arg String passed to here are long strings delimited by SparkJobArg.delimiter
+    // merge everything together and repartition based by our ^Z character, instead of by the
+    // default "space" character
+    StringBuilder concat = new StringBuilder();
+    concat.append(args[0]);
+    for (int i = 1; i < args.length; i++) {
+      concat.append(" " + args[i]);
+    }
+    String[] newArgs = concat.toString().split(SparkJobArg.delimiter);
+
+    // Sample: [--driver-java-options, , --master, yarn-cluster, --class, myclass,
+    // --conf, queue=default, --executor-memory, 1g, --num-executors, 15, my.jar, myparams]
+    logger.info("Args before adjusting driver java opts: " + Arrays.toString(newArgs));
+
+    // Adjust driver java opts param
+    handleDriverJavaOpts(newArgs);
+
+    // If dynamic allocation policy for this jobtype is turned on, adjust related param
+    newArgs = handleDynamicResourceAllocation(newArgs);
+
+    // If yarn cluster enables node labeling, adjust related param
+    newArgs = handleNodeLabeling(newArgs);
+
+    // Realign params after adjustment
+    newArgs = removeNullsFromArgArray(newArgs);
+    logger.info("Args after adjusting driver java opts: " + Arrays.toString(newArgs));
+
+    org.apache.spark.deploy.SparkSubmit$.MODULE$.main(newArgs);
+  }
+
+  private static void handleDriverJavaOpts(String[] argArray) {
+    Configuration conf = new Configuration();
+    // Driver java opts is always the first elem(param name) and second elem(value) in the argArray
+    // Get current driver java opts here
+    StringBuilder driverJavaOptions = new StringBuilder(argArray[1]);
+    // In spark-submit, when both --driver-java-options and conf spark.driver.extraJavaOptions is used,
+    // spark-submit will only pick --driver-java-options, an arg we always have
+    // So if user gives --conf spark.driver.extraJavaOptions=XX, we append the value in --driver-java-options
+    for (int i = 0; i < argArray.length; i++) {
+      if (argArray[i].equals(SparkJobArg.SPARK_CONF_PREFIX.sparkParamName)
+        && argArray[i+1].startsWith(SPARK_CONF_EXTRA_DRIVER_OPTIONS)) {
+        driverJavaOptions.append(" ").append(argArray[++i].substring(SPARK_CONF_EXTRA_DRIVER_OPTIONS.length() + 1));
+      }
+    }
+
+    // Append addtional driver java opts about azkaban context
+    String[] requiredJavaOpts = { WORKFLOW_LINK, JOB_LINK, EXECUTION_LINK, ATTEMPT_LINK };
+    for (int i = 0; i < requiredJavaOpts.length; i++) {
+        driverJavaOptions.append(" ").append(HadoopJobUtils.javaOptStringFromHadoopConfiguration(conf,
+                  requiredJavaOpts[i]));
+    }
+    // Update driver java opts
+    argArray[1] = driverJavaOptions.toString();
+  }
+
+  private static String[] handleDynamicResourceAllocation(String[] argArray) {
+    // HadoopSparkJob will set env var on this process if we enforce dynamic allocation policy for spark jobtype.
+    // This policy can be enabled through spark jobtype plugin's conf property.
+    // Enabling dynamic allocation policy for azkaban spark jobtype is different from enabling dynamic allocation
+    // feature for Spark. This config inside Spark jobtype is to enforce dynamic allocation feature is used for all
+    // Spark applications submitted via Azkaban Spark job type.
+    String dynamicAllocProp = System.getenv(HadoopSparkJob.SPARK_DYNAMIC_RES_ENV_VAR);
+    boolean dynamicAllocEnabled = dynamicAllocProp != null && dynamicAllocProp.equals(Boolean.TRUE.toString());
+    if (dynamicAllocEnabled) {
+      for (int i = 0; i < argArray.length; i++) {
+        if (argArray[i] == null) {
+          continue;
+        }
+
+        // If user tries to disable dynamic allocation for his application
+        // by setting some conf params to false, we need to ignore these settings to enforce the application
+        // uses dynamic allocation for spark
+        if (argArray[i].equals(SparkJobArg.SPARK_CONF_PREFIX.sparkParamName) // --conf
+            && (argArray[i + 1].startsWith(SPARK_CONF_SHUFFLE_SERVICE_ENABLED) // spark.shuffle.service.enabled
+            || argArray[i + 1].startsWith(SPARK_CONF_DYNAMIC_ALLOC_ENABLED)) // spark.dynamicAllocation.enabled
+            ) {
+
+          logger.info(
+              "Azbakan enforces dynamic resource allocation. Ignore user param: " + argArray[i] + " " + argArray[i
+                  + 1]);
+          argArray[i] = null;
+          argArray[++i] = null;
+        }
+      }
+      // If dynamic allocation is enabled, make sure application is scheduled in right queue
+      argArray = handleQueueEnforcement(argArray);
+    }
+    return argArray;
+  }
+
+  /**
+   * This method is used to enforce queue for Spark application. Rules are explained below.
+   * a) If dynamic resource allocation is enabled for selected spark version and application requires large container
+   *    then schedule it into default queue by a default conf(spark.yarn.queue) in spark-defaults.conf.
+   * b) If dynamic resource allocation is enabled for selected spark version and application requires small container
+   *    then schedule it into Org specific queue.
+   * c) If dynamic resource allocation is disabled for selected spark version then schedule application into default
+   *    queue by a default conf(spark.yarn.queue) in spark-defaults.conf.
+   * @param argArray
+   * @return
+   */
+  protected static String[] handleQueueEnforcement(String[] argArray) {
+    SparkConf sparkConf = getSparkProperties();
+    Configuration conf = new Configuration();
+
+    int queueParameterIndex = getUserSpecifiedQueueParameterIndex(argArray);
+    boolean requiredSparkDefaultQueue = false;
+    if (sparkConf.getBoolean(SPARK_CONF_DYNAMIC_ALLOC_ENABLED, false)) {
+      if (isLargeContainerRequired(argArray, conf, sparkConf)) {
+        // Case A
+        requiredSparkDefaultQueue = true;
+        logger.info("Spark application requires Large containers. Scheduling this application into default queue by a "
+            + "default conf(spark.yarn.queue) in spark-defaults.conf.");
+      } else {
+        // Case B
+        logger.info(
+            "Dynamic allocation is enabled for selected spark version and application requires small container. "
+                + "Hence, scheduling this application into Org specific queue");
+        if (queueParameterIndex == -1) {
+          LinkedList<String> argList = new LinkedList(Arrays.asList(argArray));
+          argList.addFirst(SPARK_CONF_QUEUE + "=" + DEFAULT_QUEUE);
+          argList.addFirst(SparkJobArg.SPARK_CONF_PREFIX.sparkParamName);
+          argArray = argList.toArray(new String[argList.size()]);
+        }
+      }
+    } else {
+      // Case C
+      logger.info("Spark version, selected for this application, doesn't support dynamic allocation. Scheduling this "
+          + "application into default queue by a default conf(spark.yarn.queue) in spark-defaults.conf.");
+      requiredSparkDefaultQueue = true;
+    }
+
+    if (queueParameterIndex != -1 && requiredSparkDefaultQueue) {
+      logger.info("Azbakan enforces spark.yarn.queue queue. Ignore user param: " + argArray[queueParameterIndex] + " "
+          + argArray[queueParameterIndex + 1]);
+      argArray[queueParameterIndex] = null;
+      argArray[queueParameterIndex + 1] = null;
+    }
+    return argArray;
+  }
+
+  /**
+   * This method is used to check whether large container is required for application or not.
+   * To decide that, it is using parameters like
+   * User Job parameters/default value for : spark.executor.cores, spark.executor.memory, spark.yarn.executor.memoryOverhead
+   * Jobtype Plugin parameters: spark.min.mem.vore.ratio, spark.min.memory-gb.size
+   * If rounded memory / spark.executor.cores >= spark.min.mem.vore.ratio or rounded memory >= spark.min.memory-gb.size
+   * then large container is required to schedule this application.
+   * @param conf
+   * @param sparkConf
+   * @return
+   */
+  private static boolean isLargeContainerRequired(String[] argArray, Configuration conf, SparkConf sparkConf) {
+    Map<String, String> executorParameters = getUserSpecifiedExecutorParameters(argArray);
+    String executorVcore = executorParameters.get(SPARK_EXECUTOR_CORES);
+    String executorMem = executorParameters.get(SPARK_EXECUTOR_MEMORY);
+    String executorMemOverhead = executorParameters.get(SPARK_EXECUTOR_MEMORY_OVERHEAD);
+    if (executorVcore == null) {
+      executorVcore = sparkConf.get(SPARK_EXECUTOR_CORES, SPARK_EXECUTOR_DEFAULT_CORES);
+    }
+    if (executorMem == null) {
+      executorMem = sparkConf.get(SPARK_EXECUTOR_MEMORY, SPARK_EXECUTOR_DEFAULT_MEMORY);
+    }
+    if (executorMemOverhead == null) {
+      executorMemOverhead = sparkConf.get(SPARK_EXECUTOR_MEMORY_OVERHEAD, null);
+    }
+
+    double roundedMemoryGbSize = getRoundedMemoryGb(executorMem, executorMemOverhead, conf);
+
+    double minRatio = Double.parseDouble(System.getenv(HadoopSparkJob.SPARK_MIN_MEM_VCORE_RATIO_ENV_VAR));
+    double minMemSize = Double.parseDouble(System.getenv(HadoopSparkJob.SPARK_MIN_MEM_SIZE_ENV_VAR));
+
+    logger.info(
+        "RoundedMemoryGbSize: " + roundedMemoryGbSize + ", ExecutorVcore: " + executorVcore + ", MinRatio: " + minRatio
+            + ", MinMemSize: " + minMemSize);
+    return roundedMemoryGbSize / (double) Integer.parseInt(executorVcore) >= minRatio
+        || roundedMemoryGbSize >= minMemSize;
+  }
+
+  protected static String[] handleNodeLabeling(String[] argArray) {
+    // HadoopSparkJob will set env var on this process if we enable node labeling policy for spark jobtype.
+    // We also detect the yarn cluster settings has enable node labeling
+    // Enabling node labeling policy for spark job type is different from enabling node labeling
+    // feature for Yarn. This config inside Spark job type is to enforce node labeling feature for all
+    // Spark applications submitted via Azkaban Spark job type.
+    Configuration conf = new Configuration();
+    boolean nodeLabelingYarn = conf.getBoolean(YARN_CONF_NODE_LABELING_ENABLED, false);
+    String nodeLabelingProp = System.getenv(HadoopSparkJob.SPARK_NODE_LABELING_ENV_VAR);
+    boolean nodeLabelingPolicy = nodeLabelingProp != null && nodeLabelingProp.equals(Boolean.TRUE.toString());
+    String autoNodeLabelProp = System.getenv(HadoopSparkJob.SPARK_AUTO_NODE_LABELING_ENV_VAR);
+    boolean autoNodeLabeling = autoNodeLabelProp != null && autoNodeLabelProp.equals(Boolean.TRUE.toString());
+    String desiredNodeLabel = System.getenv(HadoopSparkJob.SPARK_DESIRED_NODE_LABEL_ENV_VAR);
+
+    SparkConf sparkConf = getSparkProperties();
+
+    if (nodeLabelingYarn && nodeLabelingPolicy) {
+      ignoreUserSpecifiedNodeLabelParameter(argArray, autoNodeLabeling);
+
+      // If auto node labeling is enabled, automatically sets spark.yarn.executor.nodeLabelExpression
+      // config based on user requested resources.
+      if (autoNodeLabeling) {
+        if (isLargeContainerRequired(argArray, conf, sparkConf)) {
+          LinkedList<String> argList = new LinkedList<String>(Arrays.asList(argArray));
+          argList.addFirst(SPARK_EXECUTOR_NODE_LABEL_EXP + "=" + desiredNodeLabel);
+          argList.addFirst(SparkJobArg.SPARK_CONF_PREFIX.sparkParamName);
+          argArray = argList.toArray(new String[argList.size()]);
+        }
+      }
+    }
+    return argArray;
+  }
+
+  /**
+   * This method is used to ignore user specified node label Parameter. When auto node labeling is enabled,
+   * job type should ignore user supplied node label expression for Spark executors.
+   * @param argArray
+   * @param autoNodeLabeling
+   */
+  private static void ignoreUserSpecifiedNodeLabelParameter(String[] argArray, boolean autoNodeLabeling) {
+    for (int i = 0; i < argArray.length; i++) {
+      if (argArray[i] == null) {
+        continue;
+      }
+      if (autoNodeLabeling) {
+        // This config will be automatically set by the job type based on the mem-to-vcore resource ratio requested by
+        // the user application.
+        if (argArray[i].equals(SparkJobArg.SPARK_CONF_PREFIX.sparkParamName) && argArray[i + 1]
+            .startsWith(SPARK_EXECUTOR_NODE_LABEL_EXP)) {
+          logger.info(
+              "Azbakan auto-sets node label expression. Ignore user param: " + argArray[i] + " " + argArray[i + 1]);
+          argArray[i] = null;
+          argArray[++i] = null;
+          continue;
+        }
+      }
+    }
+  }
+
+  /**
+   * This method is used to get User specified executor parameters. It is capturing executor-memory, executor-cores and
+   * spark.yarn.executor.memoryOverhead.
+   * @param argArray
+   * @return
+   */
+  private static Map<String, String> getUserSpecifiedExecutorParameters(String[] argArray) {
+    Map<String, String> executorParameters = Maps.newHashMap();
+    for (int i = 0; i < argArray.length; i++) {
+      if (argArray[i] == null) {
+        continue;
+      }
+      if (argArray[i].equals(SparkJobArg.EXECUTOR_CORES.sparkParamName)) {
+        executorParameters.put(SPARK_EXECUTOR_CORES, argArray[++i]);
+      }
+      if (argArray[i].equals(SparkJobArg.EXECUTOR_MEMORY.sparkParamName)) {
+        executorParameters.put(SPARK_EXECUTOR_MEMORY, argArray[++i]);
+      }
+      if (argArray[i].equals(SparkJobArg.SPARK_CONF_PREFIX.sparkParamName) && argArray[i + 1]
+          .startsWith(SPARK_EXECUTOR_MEMORY_OVERHEAD)) {
+        executorParameters.put(SPARK_EXECUTOR_MEMORY_OVERHEAD, argArray[i + 1].split("=")[1].trim());
+      }
+    }
+    return executorParameters;
+  }
+
+  /**
+   * This method is used to retrieve index of queue parameter passed by User.
+   * @param argArray
+   * @return
+   */
+  private static int getUserSpecifiedQueueParameterIndex(String[] argArray) {
+    int queueParameterIndex = -1;
+    for (int i = 0; i < argArray.length; i++) {
+      if (argArray[i] == null) {
+        continue;
+      }
+      // Fetch index of queue parameter passed by User.
+      // (--queue test or --conf spark.yarn.queue=test)
+      if ((argArray[i].equals(SparkJobArg.SPARK_CONF_PREFIX.sparkParamName) && argArray[i + 1]
+          .startsWith(SPARK_CONF_QUEUE)) || (argArray[i].equals(SparkJobArg.QUEUE.sparkParamName))) {
+        queueParameterIndex = i++;
+        break;
+      }
+    }
+    return queueParameterIndex;
+  }
+
+  /**
+   * This method is used to get Spark properties which will fetch properties from spark-defaults.conf file.
+   * @return
+   */
+  private static SparkConf getSparkProperties() {
+    String sparkPropertyFile = HadoopSecureSparkWrapper.class.getClassLoader()
+        .getResource("spark-defaults.conf").getPath();
+    SparkConf sparkConf = new SparkConf(false);
+    sparkConf.setAll(Utils.getPropertiesFromFile(sparkPropertyFile));
+    return sparkConf;
+  }
+
+  /**
+   * Get the memory GB size of Spark executor containers. The logic is as follows:
+   * 1) Transforms requested memory String into a number representing amount of MBs requested.
+   * 2a) If memory overhead is not set by the user, use the default logic to calculate it,
+   * which is to add max(requestedMemInMB * 10%, 384) to the requested memory size.
+   * 2b) If memory overhead is set by the user, directly add it.
+   * 3) Use the logic inside YARN to round up the container size according to defined min
+   * allocation for memory size.
+   * 4) Return the memory GB size.
+   * @param mem requested executor memory size, of the format 2G or 1024M
+   * @param memOverhead user defined memory overhead
+   * @param config Hadoop Configuration object
+   * @return the rounded executor memory GB size
+   */
+  private static double getRoundedMemoryGb(String mem, String memOverhead,
+      Configuration config) {
+    long memoryMb = JavaUtils.byteStringAsMb(mem);
+    if (memOverhead == null || !NumberUtils.isDigits(memOverhead)) {
+      memoryMb += Math.max(memoryMb / 10, 384);
+    } else {
+      memoryMb += Long.parseLong(memOverhead);
+    }
+    int increment = config.getInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB,
+        YarnConfiguration.DEFAULT_RM_SCHEDULER_MINIMUM_ALLOCATION_MB);
+    return Math.ceil(memoryMb * 1.0 / increment) * increment / 1024;
+  }
+
+  protected static String[] removeNullsFromArgArray(String[] argArray) {
+    List<String> argList = new ArrayList<String>(Arrays.asList(argArray));
+    argList.removeAll(Collections.singleton(null));
+    return argList.toArray(new String[argList.size()]);
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureWrapperUtils.java 156(+156 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureWrapperUtils.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureWrapperUtils.java
new file mode 100644
index 0000000..77e7d3a
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSecureWrapperUtils.java
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2015 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.Token;
+import org.apache.log4j.Logger;
+
+import azkaban.jobExecutor.ProcessJob;
+import azkaban.security.commons.HadoopSecurityManager;
+
+/**
+ * <pre>
+ * There are many common methods that's required by the HadoopSecure*Wrapper.java's. They are all consolidated
+ * here.
+ * </pre>
+ *
+ * @see HadoopSecurePigWrapper
+ * @see HadoopSecureHiveWrapper
+ * @see HadoopSecureSparkWrapper
+ */
+public class HadoopSecureWrapperUtils {
+
+  /**
+   * Perform all the magic required to get the proxyUser in a securitized grid
+   *
+   * @param userToProxy
+   * @return a UserGroupInformation object for the specified userToProxy, which will also contain
+   *         the logged in user's tokens
+   * @throws IOException
+   */
+  private static UserGroupInformation createSecurityEnabledProxyUser(String userToProxy, String filelocation, Logger log
+          ) throws IOException {
+
+    if (!new File(filelocation).exists()) {
+      throw new RuntimeException("hadoop token file doesn't exist.");
+    }
+
+    log.info("Found token file.  Setting " + HadoopSecurityManager.MAPREDUCE_JOB_CREDENTIALS_BINARY
+            + " to " + filelocation);
+    System.setProperty(HadoopSecurityManager.MAPREDUCE_JOB_CREDENTIALS_BINARY, filelocation);
+
+    UserGroupInformation loginUser = null;
+
+    loginUser = UserGroupInformation.getLoginUser();
+    log.info("Current logged in user is " + loginUser.getUserName());
+
+    UserGroupInformation proxyUser = UserGroupInformation.createProxyUser(userToProxy, loginUser);
+
+    for (Token<?> token : loginUser.getTokens()) {
+      proxyUser.addToken(token);
+    }
+    proxyUser.addCredentials(loginUser.getCredentials());
+    return proxyUser;
+  }
+
+  /**
+   * Sets up the UserGroupInformation proxyUser object so that calling code can do doAs returns null
+   * if the jobProps does not call for a proxyUser
+   *
+   * @param jobPropsIn
+   * @param tokenFile
+   *          pass tokenFile if known. Pass null if the tokenFile is in the environmental variable
+   *          already.
+   * @param log
+   * @return returns null if no need to run as proxyUser, otherwise returns valid proxyUser that can
+   *         doAs
+   */
+  public static UserGroupInformation setupProxyUser(Properties jobProps,
+      String tokenFile, Logger log) {
+    UserGroupInformation proxyUser = null;
+
+    if (!HadoopSecureWrapperUtils.shouldProxy(jobProps)) {
+      log.info("submitting job as original submitter, not proxying");
+      return proxyUser;
+    }
+
+    // set up hadoop related configurations
+    final Configuration conf = new Configuration();
+    UserGroupInformation.setConfiguration(conf);
+    boolean securityEnabled = UserGroupInformation.isSecurityEnabled();
+
+    // setting up proxy user if required
+    try {
+      String userToProxy = null;
+      userToProxy = jobProps.getProperty(HadoopSecurityManager.USER_TO_PROXY);
+      if (securityEnabled) {
+        proxyUser =
+            HadoopSecureWrapperUtils.createSecurityEnabledProxyUser(
+                userToProxy, tokenFile, log);
+        log.info("security enabled, proxying as user " + userToProxy);
+      } else {
+        proxyUser = UserGroupInformation.createRemoteUser(userToProxy);
+        log.info("security not enabled, proxying as user " + userToProxy);
+      }
+    } catch (IOException e) {
+      log.error("HadoopSecureWrapperUtils.setupProxyUser threw an IOException",
+          e);
+    }
+
+    return proxyUser;
+  }
+
+   /**
+   * Loading the properties file, which is a combination of the jobProps file and sysProps file
+   *
+   * @return a Property file, which is the combination of the jobProps file and sysProps file
+   * @throws IOException
+   * @throws FileNotFoundException
+   */
+
+   @SuppressWarnings("DefaultCharset")
+  public static Properties loadAzkabanProps() throws IOException, FileNotFoundException {
+    String propsFile = System.getenv(ProcessJob.JOB_PROP_ENV);
+    Properties props = new Properties();
+    props.load(new BufferedReader(new FileReader(propsFile)));
+    return props;
+  }
+
+  /**
+   * Looks for particular properties inside the Properties object passed in, and determines whether
+   * proxying should happen or not
+   *
+   * @param props
+   * @return a boolean value of whether the job should proxy or not
+   */
+  public static boolean shouldProxy(Properties props) {
+    String shouldProxy = props.getProperty(HadoopSecurityManager.ENABLE_PROXYING);
+    return shouldProxy != null && shouldProxy.equals("true");
+  }
+
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopShell.java 146(+146 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopShell.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopShell.java
new file mode 100644
index 0000000..b9764e9
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopShell.java
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2016 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.log4j.Logger;
+
+import azkaban.flow.CommonJobProperties;
+import azkaban.jobExecutor.ProcessJob;
+import azkaban.security.commons.HadoopSecurityManager;
+import azkaban.utils.Props;
+
+/**
+ * HadoopShell is a Hadoop security enabled "command" jobtype. This jobtype
+ * adheres to same format and other details as "command" jobtype.
+ *
+ * @author gaggarwa
+ *
+ */
+public class HadoopShell extends ProcessJob {
+	private String userToProxy = null;
+	private boolean shouldProxy = false;
+	private boolean obtainTokens = false;
+	private File tokenFile = null;
+	public static final String HADOOP_OPTS = ENV_PREFIX + "HADOOP_OPTS";
+	public static final String HADOOP_GLOBAL_OPTS = "hadoop.global.opts";
+	public static final String WHITELIST_REGEX = "command.whitelist.regex";
+	public static final String BLACKLIST_REGEX = "command.blacklist.regex";
+
+	private HadoopSecurityManager hadoopSecurityManager;
+
+	public HadoopShell(String jobid, Props sysProps, Props jobProps, Logger log) throws RuntimeException {
+		super(jobid, sysProps, jobProps, log);
+
+		shouldProxy = getSysProps().getBoolean(HadoopSecurityManager.ENABLE_PROXYING, false);
+		getJobProps().put(HadoopSecurityManager.ENABLE_PROXYING, Boolean.toString(shouldProxy));
+		obtainTokens = getSysProps().getBoolean(HadoopSecurityManager.OBTAIN_BINARY_TOKEN, false);
+
+		if (shouldProxy) {
+			getLog().info("Initiating hadoop security manager.");
+			try {
+				hadoopSecurityManager = HadoopJobUtils.loadHadoopSecurityManager(getSysProps(), log);
+			} catch (RuntimeException e) {
+				e.printStackTrace();
+				throw new RuntimeException("Failed to get hadoop security manager!" + e.getCause());
+			}
+		}
+	}
+
+	@Override
+	public void run() throws Exception {
+		setupHadoopOpts(getJobProps());
+		HadoopConfigurationInjector.prepareResourcesToInject(getJobProps(), getWorkingDirectory());
+		if (shouldProxy && obtainTokens) {
+			userToProxy = getJobProps().getString("user.to.proxy");
+			getLog().info("Need to proxy. Getting tokens.");
+			Props props = new Props();
+			props.putAll(getJobProps());
+			props.putAll(getSysProps());
+
+			tokenFile = HadoopJobUtils.getHadoopTokens(hadoopSecurityManager, props, getLog());
+			getJobProps().put("env." + HADOOP_TOKEN_FILE_LOCATION, tokenFile.getAbsolutePath());
+		}
+		try {
+			super.run();
+		} catch (Exception e) {
+			e.printStackTrace();
+			throw new Exception(e);
+		} finally {
+			if (tokenFile != null) {
+				try {
+					HadoopJobUtils.cancelHadoopTokens(hadoopSecurityManager, userToProxy, tokenFile, getLog());
+				} catch (Throwable t) {
+					t.printStackTrace();
+					getLog().error("Failed to cancel tokens.");
+				}
+				if (tokenFile.exists()) {
+					tokenFile.delete();
+				}
+			}
+		}
+	}
+
+	/**
+	 * Append HADOOP_GLOBAL_OPTS with HADOOP_OPTS in the given props
+	 *
+	 * @param props
+	 */
+	private void setupHadoopOpts(Props props) {
+		if (props.containsKey(HADOOP_GLOBAL_OPTS)) {
+			String hadoopGlobalOps = props.getString(HADOOP_GLOBAL_OPTS);
+			if (props.containsKey(HADOOP_OPTS)) {
+				String hadoopOps = props.getString(HADOOP_OPTS);
+				props.put(HADOOP_OPTS, String.format("%s %s", hadoopOps, hadoopGlobalOps));
+			} else {
+				props.put(HADOOP_OPTS, hadoopGlobalOps);
+			}
+		}
+	}
+
+	@Override
+	protected List<String> getCommandList() {
+		// Use the same parsing login as in default "command job";
+		List<String> commands = super.getCommandList();
+		return HadoopJobUtils.filterCommands(commands, getSysProps().getString(WHITELIST_REGEX, HadoopJobUtils.MATCH_ALL_REGEX), // ".*" will match everything
+				getSysProps().getString(BLACKLIST_REGEX, HadoopJobUtils.MATCH_NONE_REGEX), getLog()); // ".^" will match nothing
+	}
+
+	/**
+	 * This cancel method, in addition to the default canceling behavior, also
+	 * kills the MR jobs launched by this job on Hadoop
+	 */
+	@Override
+	public void cancel() throws InterruptedException {
+		super.cancel();
+
+		info("Cancel called.  Killing the launched Hadoop jobs on the cluster");
+
+		String azExecId = jobProps.getString(CommonJobProperties.EXEC_ID);
+		final String logFilePath = String.format("%s/_job.%s.%s.log", getWorkingDirectory(), azExecId, getId());
+		info("log file path is: " + logFilePath);
+
+		HadoopJobUtils.proxyUserKillAllSpawnedHadoopJobs(logFilePath, jobProps, tokenFile, getLog());
+	}
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSparkJob.java 693(+693 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSparkJob.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSparkJob.java
new file mode 100644
index 0000000..2373b0c
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/HadoopSparkJob.java
@@ -0,0 +1,693 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import static azkaban.security.commons.HadoopSecurityManager.ENABLE_PROXYING;
+import static azkaban.security.commons.HadoopSecurityManager.OBTAIN_BINARY_TOKEN;
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+import azkaban.flow.CommonJobProperties;
+import azkaban.jobExecutor.JavaProcessJob;
+import azkaban.security.commons.HadoopSecurityManager;
+import azkaban.utils.Props;
+import azkaban.utils.StringUtils;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.StringTokenizer;
+import org.apache.commons.lang.math.NumberUtils;
+import org.apache.log4j.Logger;
+import org.apache.tools.ant.DirectoryScanner;
+
+/**
+ * <pre>
+ * The Azkaban adaptor for running a Spark Submit job.
+ * Use this in conjunction with  {@link azkaban.jobtype.HadoopSecureSparkWrapper}
+ *
+ * This class is used by azkaban executor to build the classpath, main args, env and jvm props
+ * for HadoopSecureSparkWrapper. Executor will then launch the job process and run
+ * HadoopSecureSparkWrapper. HadoopSecureSparkWrapper will be the Spark client wrapper
+ * that uses the main args to launch spark-submit.
+ *
+ * Expect the following jobtype property:
+ *
+ * spark.home (client default SPARK_HOME if user doesn't give a spark-version)
+ *             Conf will be either SPARK_CONF_DIR(we do not override it) or {spark.home}/conf
+ *
+ * spark.1.6.0.home (spark.{version}.home is REQUIRED for the {version} that we want to support.
+ *                  e.g. user can use spark 1.6.0 by setting spark-version=1.6.0 in their job property.
+ *                  This class will then look for plugin property spark.1.6.0.home to get the proper spark
+ *                  bin/conf to launch the client)
+ *
+ * spark.1.6.0.conf (OPTIONAL. spark.{version}.conf is the conf used for the {version}.
+ *                  If not specified, the conf of this {version} will be spark.{version}.home/conf
+ *
+ * spark.base.dir To reduce dependency on azkban-jobtype plugin properties every time new spark binary is available,
+ *                this property needs to be set. It specifies path where spark binaries are kept.
+ *                If spark.{sparkVersion}.home is set in commonprivate.properties/private.properties,
+ *                then that will be returned. If spark.{sparkVersion}.home is not set and spark.base.dir is set then
+ *                it will retrieve Spark directory inside spark.base.dir, matching spark.home.prefix + sparkVersion pattern.
+ *
+ * spark.dynamic.res.alloc.enforced (set to true if we want to enforce dynamic resource allocation policy.
+ *                  Enabling dynamic allocation policy for spark job type is different from enabling dynamic
+ *                  allocation feature for Spark. This config inside Spark job type is to enforce dynamic
+ *                  allocation feature for all Spark applications submitted via Azkaban Spark job type.
+ *                  If set to true, our client wrapper will make sure user does not overrides dynamic
+ *                  allocation related conf. It expects the presence of SPARK-13723 in the version of
+ *                  Spark deployed in the cluster, so user explicitly setting num-executors will not
+ *                  disable dynamic allocation. If this parameter is enabled, we suggest the spark cluster
+ *                  should set up dynamic allocation properly and set related conf in spark-default.conf)
+ *
+ * spark.node.labeling.enforced (set to true if we want to enforce node labeling policy.
+  *                 Enabling node labeling policy for spark job type is different from enabling node
+ *                  labeling feature in YARN. This config inside Spark job type is to enforce node
+ *                  labeling is used for all Spark applications submitted via Azkaban Spark job type.
+ *                  If set to true, our client wrapper will ignore user specified queue. If this
+ *                  is enabled, we suggest to enable node labeling in yarn cluster, and also set
+ *                  queue param in spark-default.conf)
+ *
+ *
+ * </pre>
+ *
+ * @see azkaban.jobtype.HadoopSecureSparkWrapper
+ */
+public class HadoopSparkJob extends JavaProcessJob {
+
+  // SPARK_HOME ENV VAR for HadoopSecureSparkWrapper(Spark Client)
+  public static final String SPARK_HOME_ENV_VAR = "SPARK_HOME";
+  // SPARK_CONF_DIR ENV VAR for HadoopSecureSparkWrapper(Spark Client)
+  public static final String SPARK_CONF_DIR_ENV_VAR = "SPARK_CONF_DIR";
+  // SPARK JOBTYPE PROPERTY spark.dynamic.res.alloc.enforced
+  public static final String SPARK_DYNAMIC_RES_JOBTYPE_PROPERTY = "spark.dynamic.res.alloc.enforced";
+  // HadoopSecureSparkWrapper ENV VAR if spark.dynamic.res.alloc.enforced is set to true
+  public static final String SPARK_DYNAMIC_RES_ENV_VAR = "SPARK_DYNAMIC_RES_ENFORCED";
+  // SPARK JOBTYPE PROPERTY spark.node.labeling.enforced
+  public static final String SPARK_NODE_LABELING_JOBTYPE_PROPERTY = "spark.node.labeling.enforced";
+  // HadoopSecureSparkWrapper ENV VAR if spark.node.labeling.enforced is set to true
+  public static final String SPARK_NODE_LABELING_ENV_VAR = "SPARK_NODE_LABELING_ENFORCED";
+  // Jobtype property for whether to enable auto node labeling for Spark applications
+  // submitted via the Spark jobtype.
+  public static final String SPARK_AUTO_NODE_LABELING_JOBTYPE_PROPERTY = "spark.auto.node.labeling.enabled";
+  // Env var to be passed to {@HadoopSecureSparkWrapper} for whether auto node labeling
+  // is enabled
+  public static final String SPARK_AUTO_NODE_LABELING_ENV_VAR = "SPARK_AUTO_NODE_LABELING_ENABLED";
+  // Jobtype property to configure the desired node label expression when auto node
+  // labeling is enabled and min mem/vcore ratio is met.
+  public static final String SPARK_DESIRED_NODE_LABEL_JOBTYPE_PROPERTY = "spark.desired.node.label";
+  // Env var to be passed to {@HadoopSecureSparkWrapper} for the desired node label expression
+  public static final String SPARK_DESIRED_NODE_LABEL_ENV_VAR = "SPARK_DESIRED_NODE_LABEL";
+  // Jobtype property to configure the minimum mem/vcore ratio for a Spark application's
+  // executor to be submitted with the desired node label expression.
+  public static final String SPARK_MIN_MEM_VCORE_RATIO_JOBTYPE_PROPERTY = "spark.min.mem.vore.ratio";
+  // Env var to be passed to {@HadoopSecureSparkWrapper} for the value of minimum
+  // mem/vcore ratio
+  public static final String SPARK_MIN_MEM_VCORE_RATIO_ENV_VAR = "SPARK_MIN_MEM_VCORE_RATIO";
+  // Jobtype property to configure the minimum memory size (in GB) for a Spark application's
+  // executor to be submitted with the desired node label expression.
+  public static final String SPARK_MIN_MEM_SIZE_JOBTYPE_PROPERTY = "spark.min.memory-gb.size";
+  // Env var to be passed to {@HadoopSecureSparkWrapper} for the value of minimum
+  // memory size in GB
+  public static final String SPARK_MIN_MEM_SIZE_ENV_VAR = "SPARK_MIN_MEM_GB_SIZE";
+  // Jobtype property to denote base directory where spark binaries are placed
+  public static final String SPARK_BASE_DIR = "spark.base.dir";
+  // Jobtype property to configure prefix of directory of Spark binaries
+  public static final String SPARK_HOME_PREFIX = "spark.home.prefix";
+  // Jobtype property to configure regex which will be replaced by SPARK_VERSION_REGEX_TO_REPLACE_WITH in Spark version
+  // provided by user as a spark-version parameter
+  public static final String SPARK_VERSION_REGEX_TO_REPLACE = "spark.version.regex.to.replace";
+  // Jobtype property to configure regex which will be replacing SPARK_VERSION_REGEX_TO_REPLACE in Spark version
+  // provided by user as a spark-version parameter.
+  public static final String SPARK_VERSION_REGEX_TO_REPLACE_WITH = "spark.version.regex.to.replace.with";
+  // Jobtype property to configure reference document for available spark versions which can be referred by users
+  // in case they don't know which are the valid spark versions
+  public static final String SPARK_REFERENCE_DOCUMENT = "spark.reference.document";
+  // Azkaban/Java params
+  private static final String HADOOP_SECURE_SPARK_WRAPPER =
+      HadoopSecureSparkWrapper.class.getName();
+  // Spark configuration property to specify additional Namenodes to fetch tokens for
+  private static final String SPARK_CONF_ADDITIONAL_NAMENODES = "spark.yarn.access.namenodes";
+
+  // security variables
+  private String userToProxy = null;
+
+  private boolean shouldProxy = false;
+
+  private boolean obtainTokens = false;
+
+  private File tokenFile = null;
+
+  private HadoopSecurityManager hadoopSecurityManager;
+
+  public HadoopSparkJob(final String jobid, final Props sysProps, final Props jobProps,
+      final Logger log) {
+    super(jobid, sysProps, jobProps, log);
+
+    getJobProps().put(CommonJobProperties.JOB_ID, jobid);
+
+    this.shouldProxy = getSysProps().getBoolean(ENABLE_PROXYING, false);
+    getJobProps().put(ENABLE_PROXYING, Boolean.toString(this.shouldProxy));
+    this.obtainTokens = getSysProps().getBoolean(OBTAIN_BINARY_TOKEN, false);
+
+    if (this.shouldProxy) {
+      getLog().info("Initiating hadoop security manager.");
+      try {
+        this.hadoopSecurityManager =
+            HadoopJobUtils.loadHadoopSecurityManager(getSysProps(), log);
+      } catch (final RuntimeException e) {
+        throw new RuntimeException("Failed to get hadoop security manager!" + e);
+      }
+    }
+  }
+
+  static String testableGetMainArguments(final Props jobProps, final String workingDir,
+      final Logger log) {
+
+    // if we ever need to recreate a failure scenario in the test case
+    log.debug(jobProps);
+    log.debug(workingDir);
+
+    final List<String> argList = new ArrayList<>();
+
+    // special case handling for DRIVER_JAVA_OPTIONS
+    argList.add(SparkJobArg.DRIVER_JAVA_OPTIONS.sparkParamName);
+    final StringBuilder driverJavaOptions = new StringBuilder();
+    // note the default java opts are communicated through the hadoop conf and
+    // added in the
+    // HadoopSecureSparkWrapper
+    if (jobProps.containsKey(SparkJobArg.DRIVER_JAVA_OPTIONS.azPropName)) {
+      driverJavaOptions.append(" "
+          + jobProps.getString(SparkJobArg.DRIVER_JAVA_OPTIONS.azPropName));
+    }
+    argList.add(driverJavaOptions.toString());
+
+    // Note that execution_jar and params must appear in order, and as the last
+    // 2 params
+    // Because of the position they are specified in the SparkJobArg class, this
+    // should not be an
+    // issue
+    for (final SparkJobArg sparkJobArg : SparkJobArg.values()) {
+      if (!sparkJobArg.needSpecialTreatment) {
+        handleStandardArgument(jobProps, argList, sparkJobArg);
+      } else if (sparkJobArg.equals(SparkJobArg.SPARK_JARS)) {
+        sparkJarsHelper(jobProps, workingDir, log, argList);
+      } else if (sparkJobArg.equals(SparkJobArg.SPARK_CONF_PREFIX)) {
+        sparkConfPrefixHelper(jobProps, argList);
+      } else if (sparkJobArg.equals(SparkJobArg.DRIVER_JAVA_OPTIONS)) {
+        // do nothing because already handled above
+      } else if (sparkJobArg.equals(SparkJobArg.SPARK_FLAG_PREFIX)) {
+        sparkFlagPrefixHelper(jobProps, argList);
+      } else if (sparkJobArg.equals(SparkJobArg.EXECUTION_JAR)) {
+        executionJarHelper(jobProps, workingDir, log, argList);
+      } else if (sparkJobArg.equals(SparkJobArg.PARAMS)) {
+        paramsHelper(jobProps, argList);
+      } else if (sparkJobArg.equals(SparkJobArg.SPARK_VERSION)) {
+        // do nothing since this arg is not a spark-submit argument
+        // it is only used in getClassPaths() below
+      }
+    }
+    return StringUtils
+        .join((Collection<String>) argList, SparkJobArg.delimiter);
+  }
+
+  private static void paramsHelper(final Props jobProps, final List<String> argList) {
+    if (jobProps.containsKey(SparkJobArg.PARAMS.azPropName)) {
+      final String params = jobProps.getString(SparkJobArg.PARAMS.azPropName);
+      final String[] paramsList = params.split(" ");
+      for (final String s : paramsList) {
+        argList.add(s);
+      }
+    }
+  }
+
+  private static void executionJarHelper(final Props jobProps, final String workingDir,
+      final Logger log, final List<String> argList) {
+    if (jobProps.containsKey(SparkJobArg.EXECUTION_JAR.azPropName)) {
+      final String executionJarName =
+          HadoopJobUtils.resolveExecutionJarName(workingDir,
+              jobProps.getString(SparkJobArg.EXECUTION_JAR.azPropName), log);
+      argList.add(executionJarName);
+    }
+  }
+
+  private static void sparkFlagPrefixHelper(final Props jobProps, final List<String> argList) {
+    for (final Entry<String, String> entry : jobProps.getMapByPrefix(
+        SparkJobArg.SPARK_FLAG_PREFIX.azPropName).entrySet()) {
+      if ("true".equalsIgnoreCase(entry.getValue())) {
+        argList.add(SparkJobArg.SPARK_FLAG_PREFIX.sparkParamName
+            + entry.getKey());
+      }
+    }
+  }
+
+  private static void sparkJarsHelper(final Props jobProps, final String workingDir,
+      final Logger log, final List<String> argList) {
+    final String propSparkJars =
+        jobProps.getString(SparkJobArg.SPARK_JARS.azPropName, "");
+    final String jarList =
+        HadoopJobUtils
+            .resolveWildCardForJarSpec(workingDir, propSparkJars, log);
+    if (jarList.length() > 0) {
+      argList.add(SparkJobArg.SPARK_JARS.sparkParamName);
+      argList.add(jarList);
+    }
+  }
+
+  private static void sparkConfPrefixHelper(final Props jobProps, final List<String> argList) {
+    for (final Entry<String, String> entry : jobProps.getMapByPrefix(
+        SparkJobArg.SPARK_CONF_PREFIX.azPropName).entrySet()) {
+      argList.add(SparkJobArg.SPARK_CONF_PREFIX.sparkParamName);
+      final String sparkConfKeyVal =
+          String.format("%s=%s", entry.getKey(), entry.getValue());
+      argList.add(sparkConfKeyVal);
+    }
+  }
+
+  private static void handleStandardArgument(final Props jobProps,
+      final List<String> argList, final SparkJobArg sparkJobArg) {
+    if (jobProps.containsKey(sparkJobArg.azPropName)) {
+      argList.add(sparkJobArg.sparkParamName);
+      argList.add(jobProps.getString(sparkJobArg.azPropName));
+    }
+  }
+
+  private static String getSourcePathFromClass(final Class<?> containedClass) {
+    File file =
+        new File(containedClass.getProtectionDomain().getCodeSource()
+            .getLocation().getPath());
+
+    if (!file.isDirectory() && file.getName().endsWith(".class")) {
+      final String name = containedClass.getName();
+      final StringTokenizer tokenizer = new StringTokenizer(name, ".");
+      while (tokenizer.hasMoreTokens()) {
+        tokenizer.nextElement();
+        file = file.getParentFile();
+      }
+
+      return file.getPath();
+    } else {
+      return containedClass.getProtectionDomain().getCodeSource().getLocation()
+          .getPath();
+    }
+  }
+
+  /**
+   * Add additional namenodes specified in the Spark Configuration
+   * ({@link #SPARK_CONF_ADDITIONAL_NAMENODES}) to the Props provided.
+   * @param props Props to add additional namenodes to.
+   * @see HadoopJobUtils#addAdditionalNamenodesToProps(Props, String)
+   */
+  void addAdditionalNamenodesFromConf(final Props props) {
+    final String sparkConfDir = getSparkLibConf()[1];
+    final File sparkConfFile = new File(sparkConfDir, "spark-defaults.conf");
+    try {
+      final InputStreamReader inReader =
+          new InputStreamReader(new FileInputStream(sparkConfFile), StandardCharsets.UTF_8);
+      // Use Properties to avoid needing Spark on our classpath
+      final Properties sparkProps = new Properties();
+      sparkProps.load(inReader);
+      inReader.close();
+      final String additionalNamenodes =
+          sparkProps.getProperty(SPARK_CONF_ADDITIONAL_NAMENODES);
+      if (additionalNamenodes != null && additionalNamenodes.length() > 0) {
+        getLog().info("Found property " + SPARK_CONF_ADDITIONAL_NAMENODES +
+            " = " + additionalNamenodes + "; setting additional namenodes");
+        HadoopJobUtils.addAdditionalNamenodesToProps(props, additionalNamenodes);
+      }
+    } catch (final IOException e) {
+      getLog().warn("Unable to load Spark configuration; not adding any additional " +
+          "namenode delegation tokens.", e);
+    }
+  }
+
+  @Override
+  public void run() throws Exception {
+    HadoopConfigurationInjector.prepareResourcesToInject(getJobProps(),
+        getWorkingDirectory());
+
+    if (this.shouldProxy && this.obtainTokens) {
+      this.userToProxy = getJobProps().getString(HadoopSecurityManager.USER_TO_PROXY);
+      getLog().info("Need to proxy. Getting tokens.");
+      // get tokens in to a file, and put the location in props
+      final Props props = new Props();
+      props.putAll(getJobProps());
+      props.putAll(getSysProps());
+      addAdditionalNamenodesFromConf(props);
+      this.tokenFile =
+          HadoopJobUtils
+              .getHadoopTokens(this.hadoopSecurityManager, props, getLog());
+      getJobProps().put("env." + HADOOP_TOKEN_FILE_LOCATION,
+          this.tokenFile.getAbsolutePath());
+    }
+
+    // If we enable dynamic resource allocation or node labeling in jobtype property,
+    // then set proper env var for client wrapper(HadoopSecureSparkWrapper) to modify spark job conf
+    // before calling spark-submit to enforce every spark job uses dynamic allocation or node labeling
+    if (getSysProps().getBoolean(SPARK_DYNAMIC_RES_JOBTYPE_PROPERTY, Boolean.FALSE)) {
+      getJobProps().put("env." + SPARK_DYNAMIC_RES_ENV_VAR, Boolean.TRUE.toString());
+    }
+
+    if (getSysProps().getBoolean(SPARK_NODE_LABELING_JOBTYPE_PROPERTY, Boolean.FALSE)) {
+      getJobProps().put("env." + SPARK_NODE_LABELING_ENV_VAR, Boolean.TRUE.toString());
+    }
+
+    if (getSysProps().getBoolean(SPARK_AUTO_NODE_LABELING_JOBTYPE_PROPERTY, Boolean.FALSE)) {
+      final String desiredNodeLabel = getSysProps().get(SPARK_DESIRED_NODE_LABEL_JOBTYPE_PROPERTY);
+      if (desiredNodeLabel == null) {
+        throw new RuntimeException(SPARK_DESIRED_NODE_LABEL_JOBTYPE_PROPERTY  + " must be configured when " +
+            SPARK_AUTO_NODE_LABELING_JOBTYPE_PROPERTY + " is set to true.");
+      }
+      getJobProps().put("env." + SPARK_AUTO_NODE_LABELING_ENV_VAR, Boolean.TRUE.toString());
+      getJobProps().put("env." + SPARK_DESIRED_NODE_LABEL_ENV_VAR, desiredNodeLabel);
+    }
+
+    if (getSysProps().getBoolean(SPARK_DYNAMIC_RES_JOBTYPE_PROPERTY, Boolean.FALSE) || getSysProps()
+        .getBoolean(SPARK_AUTO_NODE_LABELING_JOBTYPE_PROPERTY, Boolean.FALSE)) {
+      final String minMemVcoreRatio = getSysProps().get(SPARK_MIN_MEM_VCORE_RATIO_JOBTYPE_PROPERTY);
+      final String minMemSize = getSysProps().get(SPARK_MIN_MEM_SIZE_JOBTYPE_PROPERTY);
+      if (minMemVcoreRatio == null || minMemSize == null) {
+        throw new RuntimeException(SPARK_MIN_MEM_SIZE_JOBTYPE_PROPERTY + " and " +
+            SPARK_MIN_MEM_VCORE_RATIO_JOBTYPE_PROPERTY + " must be configured.");
+      }
+      if (!NumberUtils.isNumber(minMemVcoreRatio)) {
+        throw new RuntimeException(SPARK_MIN_MEM_VCORE_RATIO_JOBTYPE_PROPERTY + " is configured as " +
+            minMemVcoreRatio + ", but it must be a number.");
+      }
+      if (!NumberUtils.isNumber(minMemSize)) {
+        throw new RuntimeException(SPARK_MIN_MEM_SIZE_JOBTYPE_PROPERTY + " is configured as " +
+            minMemSize + ", but it must be a number.");
+      }
+      getJobProps().put("env." + SPARK_MIN_MEM_VCORE_RATIO_ENV_VAR, minMemVcoreRatio);
+      getJobProps().put("env." + SPARK_MIN_MEM_SIZE_ENV_VAR, minMemSize);
+    }
+    try {
+      super.run();
+    } catch (final Throwable t) {
+      t.printStackTrace();
+      getLog().error("caught error running the job");
+      throw new Exception(t);
+    } finally {
+      if (this.tokenFile != null) {
+        HadoopJobUtils.cancelHadoopTokens(this.hadoopSecurityManager, this.userToProxy,
+            this.tokenFile, getLog());
+        if (this.tokenFile.exists()) {
+          this.tokenFile.delete();
+        }
+      }
+    }
+  }
+
+  @Override
+  protected String getJavaClass() {
+    return HADOOP_SECURE_SPARK_WRAPPER;
+  }
+
+  @Override
+  protected String getJVMArguments() {
+    String args = super.getJVMArguments();
+
+    final String typeUserGlobalJVMArgs =
+        getJobProps().getString(HadoopJobUtils.JOBTYPE_GLOBAL_JVM_ARGS, null);
+    if (typeUserGlobalJVMArgs != null) {
+      args += " " + typeUserGlobalJVMArgs;
+    }
+    final String typeSysGlobalJVMArgs =
+        getSysProps().getString(HadoopJobUtils.JOBTYPE_GLOBAL_JVM_ARGS, null);
+    if (typeSysGlobalJVMArgs != null) {
+      args += " " + typeSysGlobalJVMArgs;
+    }
+    final String typeUserJVMArgs =
+        getJobProps().getString(HadoopJobUtils.JOBTYPE_JVM_ARGS, null);
+    if (typeUserJVMArgs != null) {
+      args += " " + typeUserJVMArgs;
+    }
+    final String typeSysJVMArgs =
+        getSysProps().getString(HadoopJobUtils.JOBTYPE_JVM_ARGS, null);
+    if (typeSysJVMArgs != null) {
+      args += " " + typeSysJVMArgs;
+    }
+
+    final String typeUserJVMArgs2 =
+        getJobProps().getString(HadoopJobUtils.JVM_ARGS, null);
+    if (typeUserJVMArgs != null) {
+      args += " " + typeUserJVMArgs2;
+    }
+    final String typeSysJVMArgs2 =
+        getSysProps().getString(HadoopJobUtils.JVM_ARGS, null);
+    if (typeSysJVMArgs != null) {
+      args += " " + typeSysJVMArgs2;
+    }
+
+    if (this.shouldProxy) {
+      info("Setting up secure proxy info for child process");
+      String secure;
+      secure =
+          " -D" + HadoopSecurityManager.USER_TO_PROXY + "="
+              + getJobProps().getString(HadoopSecurityManager.USER_TO_PROXY);
+      final String extraToken =
+          getSysProps().getString(HadoopSecurityManager.OBTAIN_BINARY_TOKEN,
+              "false");
+      if (extraToken != null) {
+        secure +=
+            " -D" + HadoopSecurityManager.OBTAIN_BINARY_TOKEN + "="
+                + extraToken;
+      }
+      info("Secure settings = " + secure);
+      args += secure;
+    } else {
+      info("Not setting up secure proxy info for child process");
+    }
+    return args;
+  }
+
+  @Override
+  protected String getMainArguments() {
+    // Build the main() arguments for HadoopSecureSparkWrapper, which are then
+    // passed to spark-submit
+    return testableGetMainArguments(this.jobProps, getWorkingDirectory(), getLog());
+  }
+
+  @Override
+  protected List<String> getClassPaths() {
+    // The classpath for the process that runs HadoopSecureSparkWrapper
+    final String pluginDir = getSysProps().get("plugin.dir");
+    final List<String> classPath = super.getClassPaths();
+
+    // To add az-core jar classpath
+    classPath.add(getSourcePathFromClass(Props.class));
+
+    // To add az-common jar classpath
+    classPath.add(getSourcePathFromClass(JavaProcessJob.class));
+    classPath.add(getSourcePathFromClass(HadoopSecureHiveWrapper.class));
+    classPath.add(getSourcePathFromClass(HadoopSecurityManager.class));
+
+    classPath.add(HadoopConfigurationInjector.getPath(getJobProps(),
+        getWorkingDirectory()));
+
+    final List<String> typeClassPath =
+        getSysProps().getStringList("jobtype.classpath", null, ",");
+    info("Adding jobtype.classpath: " + typeClassPath);
+    if (typeClassPath != null) {
+      // fill in this when load this jobtype
+      for (final String jar : typeClassPath) {
+        File jarFile = new File(jar);
+        if (!jarFile.isAbsolute()) {
+          jarFile = new File(pluginDir + File.separatorChar + jar);
+        }
+        if (!classPath.contains(jarFile.getAbsolutePath())) {
+          classPath.add(jarFile.getAbsolutePath());
+        }
+      }
+    }
+
+    // Decide spark home/conf and append Spark classpath for the client.
+    final String[] sparkHomeConf = getSparkLibConf();
+
+    classPath.add(sparkHomeConf[0] + "/*");
+    classPath.add(sparkHomeConf[1]);
+
+    final List<String> typeGlobalClassPath =
+        getSysProps().getStringList("jobtype.global.classpath", null, ",");
+    info("Adding jobtype.global.classpath: " + typeGlobalClassPath);
+    if (typeGlobalClassPath != null) {
+      for (final String jar : typeGlobalClassPath) {
+        if (!classPath.contains(jar)) {
+          classPath.add(jar);
+        }
+      }
+    }
+
+    info("Final classpath: " + classPath);
+    return classPath;
+  }
+
+  /**
+   * This method is used to retrieve Spark home and conf locations. Below logic is mentioned in detail.
+   * a) If user has specified spark version in job property, e.g. spark-version=1.6.0, then
+   *    i) If spark.{sparkVersion}.home is set in commonprivate.properties/private.properties, then that will be returned.
+   *   ii) If spark.{sparkVersion}.home is not set and spark.home.dir is set then it will retrieve Spark directory inside
+   *       spark.home.dir, matching spark.home.prefix + sparkVersion pattern.
+   * b) If user has not specified spark version in job property, use default spark.home configured in the jobtype
+   *    plugin's config
+   * c) If spark home is not found by both of the above cases, then throw RuntimeException.
+   * @return
+   */
+  protected String[] getSparkLibConf() {
+    String sparkHome = null;
+    String sparkConf = null;
+    // If user has specified version in job property. e.g. spark-version=1.6.0
+    final String jobSparkVer = getJobProps().get(SparkJobArg.SPARK_VERSION.azPropName);
+    if (jobSparkVer != null) {
+      info("This job sets spark version: " + jobSparkVer);
+      // Spark jobtype supports this version through plugin's jobtype config
+      sparkHome = getSparkHome(jobSparkVer);
+      sparkConf = getSysProps().get("spark." + jobSparkVer + ".conf");
+      if (sparkConf == null) {
+        sparkConf = sparkHome + "/conf";
+      }
+      info("Using job specific spark: " + sparkHome + " and conf: " + sparkConf);
+      // Override the SPARK_HOME SPARK_CONF_DIR env for HadoopSecureSparkWrapper process(spark client)
+      getJobProps().put("env." + SPARK_HOME_ENV_VAR, sparkHome);
+      getJobProps().put("env." + SPARK_CONF_DIR_ENV_VAR, sparkConf);
+    } else {
+      // User job doesn't give spark-version
+      // Use default spark.home. Configured in the jobtype plugin's config
+      sparkHome = getSysProps().get("spark.home");
+      if (sparkHome == null) {
+        // Use system default SPARK_HOME env
+        sparkHome = System.getenv(SPARK_HOME_ENV_VAR);
+      }
+      sparkConf = (System.getenv(SPARK_CONF_DIR_ENV_VAR) != null) ?
+        System.getenv(SPARK_CONF_DIR_ENV_VAR) : (sparkHome + "/conf");
+      info("Using system default spark: " + sparkHome + " and conf: " + sparkConf);
+    }
+
+    if (sparkHome == null) {
+      throw new RuntimeException("SPARK is not available on the azkaban machine.");
+    } else {
+      final File homeDir = new File(sparkHome);
+      if (!homeDir.exists()) {
+        throw new RuntimeException("SPARK home dir does not exist.");
+      }
+      final File confDir = new File(sparkConf);
+      if (!confDir.exists()) {
+        error("SPARK conf dir does not exist. Will use SPARK_HOME/conf as default.");
+        sparkConf = sparkHome + "/conf";
+      }
+      final File defaultSparkConf = new File(sparkConf + "/spark-defaults.conf");
+      if (!defaultSparkConf.exists()) {
+        throw new RuntimeException("Default Spark config file spark-defaults.conf cannot"
+            + " be found at " + defaultSparkConf);
+      }
+    }
+
+    return new String[]{getSparkLibDir(sparkHome), sparkConf};
+  }
+
+  /**
+   * This method is used to get spark home from plugin's jobtype config.
+   * If spark.{sparkVersion}.home is set in commonprivate.properties/private.properties, then that will be returned.
+   * If spark.{sparkVersion}.home is not set and spark.base.dir is set then it will retrieve Spark directory inside
+   * spark.base.dir, matching spark.home.prefix + sparkVersion pattern. Regex pattern can be passed as properties for
+   * version formatting.
+   * @param sparkVersion
+   * @return
+   */
+  private String getSparkHome(final String sparkVersion) {
+    String sparkHome = getSysProps().get("spark." + sparkVersion + ".home");
+    if (sparkHome == null) {
+      info("Couldn't find spark." + sparkVersion + ".home property.");
+      final String sparkDir = getSysProps().get(SPARK_BASE_DIR);
+      final String sparkHomePrefix =
+          getSysProps().get(SPARK_HOME_PREFIX) != null ? getSysProps().get(SPARK_HOME_PREFIX) : "*";
+      final String replaceTo = getSysProps().get(SPARK_VERSION_REGEX_TO_REPLACE);
+      final String replaceWith =
+          getSysProps().get(SPARK_VERSION_REGEX_TO_REPLACE_WITH) != null ? getSysProps()
+          .get(SPARK_VERSION_REGEX_TO_REPLACE_WITH) : "";
+      final String versionPatterToMatch =
+          sparkHomePrefix + ( replaceTo != null ? sparkVersion
+              .replace(replaceTo, replaceWith) : sparkVersion) + "*";
+      info("Looking for spark at  " + sparkDir + " directory with " + sparkHomePrefix + " prefix for " + sparkVersion
+          + " version.");
+      final DirectoryScanner scanner = new DirectoryScanner();
+      scanner.setBasedir(sparkDir);
+      scanner.setIncludes(new String[]{versionPatterToMatch});
+      scanner.scan();
+      final String[] directories = scanner.getIncludedDirectories();
+      if (directories != null && directories.length > 0) {
+        sparkHome = sparkDir + "/" + directories[directories.length - 1];
+      } else {
+        final String sparkReferenceDoc = getSysProps().get(SPARK_REFERENCE_DOCUMENT);
+        final String exceptionMessage =
+            sparkReferenceDoc == null ? "SPARK version specified by User is not available."
+            : "SPARK version specified by User is not available. Available versions are mentioned at: "
+                + sparkReferenceDoc;
+        throw new RuntimeException(exceptionMessage);
+      }
+    }
+    return sparkHome;
+  }
+
+  /**
+   * Given the dir path of Spark Home, return the dir path of Spark lib.
+   * It is either sparkHome/lib or sparkHome/jars based on the version of
+   * Spark chosen by user.
+   * @param sparkHome dir path of Spark Home
+   * @return dir path of Spark lib
+   */
+  private String getSparkLibDir(final String sparkHome) {
+    // sparkHome should have already been checked when this method is invoked
+    final File homeDir = new File(sparkHome);
+    File libDir = new File(homeDir, "lib");
+    if (libDir.exists()) {
+      return libDir.getAbsolutePath();
+    } else {
+      libDir = new File(homeDir, "jars");
+      if (libDir.exists()) {
+        return libDir.getAbsolutePath();
+      } else {
+        throw new RuntimeException("SPARK lib dir does not exist.");
+      }
+    }
+  }
+
+  /**
+   * This cancel method, in addition to the default canceling behavior, also
+   * kills the Spark job on Hadoop
+   */
+  @Override
+  public void cancel() throws InterruptedException {
+    super.cancel();
+
+    info("Cancel called.  Killing the Spark job on the cluster");
+
+    final String azExecId = this.jobProps.getString(CommonJobProperties.EXEC_ID);
+    final String logFilePath =
+        String.format("%s/_job.%s.%s.log", getWorkingDirectory(), azExecId,
+            getId());
+    info("log file path is: " + logFilePath);
+
+    HadoopJobUtils.proxyUserKillAllSpawnedHadoopJobs(logFilePath, this.jobProps,
+        this.tokenFile, getLog());
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/AddExternalPartitionHQL.java 44(+44 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/AddExternalPartitionHQL.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/AddExternalPartitionHQL.java
new file mode 100644
index 0000000..9569e32
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/AddExternalPartitionHQL.java
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban.hive.actions;
+
+class AddExternalPartitionHQL implements HQL {
+  // ALTER TABLE table_name ADD PARTITION (partCol = 'value1') location 'loc1';
+  private final String table;
+  private final String partition;
+  private final String value;
+  private final String location;
+  private final boolean ifNotExists;
+
+  public AddExternalPartitionHQL(String table, String partition, String value,
+      String location, boolean ifNotExists) {
+    // @TODO: Null checks
+    this.table = table;
+    this.partition = partition;
+    this.value = value;
+    this.location = location;
+    this.ifNotExists = ifNotExists;
+  }
+
+  @Override
+  public String toHQL() {
+    String ifNot = ifNotExists ? "IF NOT EXISTS " : "";
+
+    return "ALTER TABLE " + table + " ADD " + ifNot + "PARTITION (" + partition
+        + " = '" + value + "') LOCATION '" + location + "';";
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/AlterTableLocationQL.java 33(+33 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/AlterTableLocationQL.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/AlterTableLocationQL.java
new file mode 100644
index 0000000..53e9f59
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/AlterTableLocationQL.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban.hive.actions;
+
+class AlterTableLocationQL implements HQL {
+  private final String table;
+  private final String newLocation;
+
+  public AlterTableLocationQL(String table, String newLocation) {
+    // @TODO: Null checks
+    this.table = table;
+    this.newLocation = newLocation;
+  }
+
+  @Override
+  public String toHQL() {
+    return "ALTER TABLE " + table + " SET LOCATION '" + newLocation + "';";
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/Constants.java 24(+24 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/Constants.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/Constants.java
new file mode 100644
index 0000000..450b28b
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/Constants.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban.hive.actions;
+
+class Constants {
+  static final String DROP_ALL_PARTITIONS_AND_ADD_LATEST =
+      "drop.all.partitions.and.add.newest";
+  static final String UPDATE_TABLE_LOCATION_TO_LATEST =
+      "update.table.location.to.latest";
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/DropAllPartitionsAddLatest.java 160(+160 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/DropAllPartitionsAddLatest.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/DropAllPartitionsAddLatest.java
new file mode 100644
index 0000000..8f2479f
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/DropAllPartitionsAddLatest.java
@@ -0,0 +1,160 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban.hive.actions;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.log4j.Logger;
+
+import azkaban.jobtype.hiveutils.HiveQueryExecutionException;
+import azkaban.jobtype.hiveutils.HiveQueryExecutor;
+import azkaban.jobtype.hiveutils.azkaban.HiveAction;
+import azkaban.jobtype.hiveutils.azkaban.HiveViaAzkabanException;
+import azkaban.jobtype.hiveutils.util.AzkHiveAction;
+import azkaban.jobtype.hiveutils.util.AzkabanJobPropertyDescription;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Properties;
+
+import static azkaban.jobtype.hiveutils.azkaban.Utils.verifyProperty;
+import static azkaban.jobtype.hiveutils.azkaban.hive.actions.Constants.DROP_ALL_PARTITIONS_AND_ADD_LATEST;
+
+/**
+ * Drop all the existing partitions in specified table and then add the
+ * latest/greatest/highest directory in the specified subdirectory and add that
+ * as the only partition in the table. This action is suitable for tables with
+ * full replacement policies where we only want one partition.</p>
+ *
+ * For example, if we have a table foo with derived_date partitions that
+ * correspond to directories:</p> /foo/2012-01-01</p> /2012-01-02</p>
+ * /2012-01-03</p> And currently only have derived_date="2012-01-02" registered,
+ * we want to drop that partition and add "2012-01-03" as the only
+ * partition.</p> </p> This action makes several assumptions:
+ * <ul>
+ * <li>The databases are named the same as the directories in tableLocations, ie
+ * /derived/member corresponds to some database with a table named member</li>
+ * <li>The table is has only a single partition column</li>
+ * <li>The partition name corresponds to directories within the specified
+ * tableLocations</li>
+ * <li>The partition values are the same name as the directories within the
+ * specified tableLocations</li>
+ * <li>The partition directories are named in ascending order such that a sort
+ * will give us the one to add and we will drop all but that one</li>
+ * </ul>
+ */
+@AzkHiveAction(DROP_ALL_PARTITIONS_AND_ADD_LATEST)
+public class DropAllPartitionsAddLatest implements HiveAction {
+  private final static Logger LOG =
+      Logger
+          .getLogger("com.linkedin.hive.azkaban.hive.actions.DropAllPartitionsAddLatest");
+
+  public static final String DROP_AND_ADD = DROP_ALL_PARTITIONS_AND_ADD_LATEST;
+
+  @AzkabanJobPropertyDescription("Comma-separated list of tables to drop/add partitions to.  All tables must be within the same database")
+  public static final String HIVE_TABLE = "hive.tables";
+  @AzkabanJobPropertyDescription("Database to drop/add partitions within")
+  public static final String HIVE_DATABASE = "hive.database";
+  @AzkabanJobPropertyDescription("Name of partition of drop/add from table, eg. datepartition")
+  public static final String HIVE_PARTITION = "hive.partition";
+  @AzkabanJobPropertyDescription("Directory on hdfs where external table resides, eg /data/derived/. Tables should be in this directory.")
+  public static final String HIVE_TABLES_LOCATION = "hive.tables.location";
+
+  private final String database;
+  private final String[] tables;
+  private final String partition;
+  private HiveQueryExecutor hqe;
+  private String tableLocations;
+
+  public DropAllPartitionsAddLatest(Properties p, HiveQueryExecutor hqe)
+      throws HiveViaAzkabanException {
+    // The goal here is to get to a fluent API ala
+    // LinkedInHive.get("magic")
+    //     .forDatabase("u_jhoman")
+    //     .forTable("zoiks")
+    //     .dropPartition("date-stamp","2012-01-01")
+    //     .addPartition("date-stamp","2012-01-02", "/some/path").go();
+    this.database = verifyProperty(p, HIVE_DATABASE);
+    this.tables = verifyProperty(p, HIVE_TABLE).split(",");
+    this.partition = verifyProperty(p, HIVE_PARTITION);
+    this.tableLocations = verifyProperty(p, HIVE_TABLES_LOCATION);
+    this.hqe = hqe;
+  }
+
+  @Override
+  public void execute() throws HiveViaAzkabanException {
+    ArrayList<HQL> hql = new ArrayList<HQL>();
+    hql.add(new UseDatabaseHQL(database));
+
+    Configuration conf = new Configuration();
+    try {
+      FileSystem fs = FileSystem.get(conf);
+
+      for (String table : tables) {
+        LOG.info("Determining HQL commands for table " + table);
+        hql.addAll(addAndDrop(fs, tableLocations, table));
+      }
+      fs.close();
+    } catch (IOException e) {
+      throw new HiveViaAzkabanException(
+          "Exception fetching the directories/partitions from HDFS", e);
+    }
+
+    StringBuffer query = new StringBuffer();
+    for (HQL q : hql) {
+      query.append(q.toHQL()).append("\n");
+    }
+
+    System.out.println("Query to execute:\n" + query.toString());
+    try {
+      hqe.executeQuery(query.toString());
+    } catch (HiveQueryExecutionException e) {
+      throw new HiveViaAzkabanException("Problem executing query ["
+          + query.toString() + "] on Hive", e);
+    }
+
+  }
+
+  private ArrayList<HQL> addAndDrop(FileSystem fs, String basepath, String table)
+      throws IOException, HiveViaAzkabanException {
+    ArrayList<HQL> toDropAndAdd = new ArrayList<HQL>();
+    ArrayList<String> directories = null;
+
+    directories = Utils.fetchDirectories(fs, basepath + "/" + table, false);
+
+    if (directories.size() == 0) {
+      throw new HiveViaAzkabanException(
+          "No directories to remove or add found in " + tableLocations);
+    }
+
+    Collections.sort(directories);
+
+    String toAdd = directories.remove(directories.size() - 1);
+
+    LOG.info("For table " + table + ", going to add " + toAdd
+        + " and attempt to drop " + directories.size() + " others");
+    for (String directory : directories) {
+      toDropAndAdd.add(new DropPartitionHQL(table, partition, directory, true));
+    }
+
+    toDropAndAdd.add(new AddExternalPartitionHQL(table, partition, toAdd,
+        toAdd, true));
+    return toDropAndAdd;
+  }
+
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/DropPartitionHQL.java 41(+41 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/DropPartitionHQL.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/DropPartitionHQL.java
new file mode 100644
index 0000000..51a134a
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/DropPartitionHQL.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban.hive.actions;
+
+class DropPartitionHQL implements HQL {
+  private final String table;
+  private final String partition;
+  private final String value;
+  private final boolean ifExists;
+
+  DropPartitionHQL(String table, String partition, String value,
+      boolean ifExists) {
+    // @TODO: Null checks
+    this.table = table;
+    this.partition = partition;
+    this.value = value;
+    this.ifExists = ifExists;
+  }
+
+  @Override
+  public String toHQL() {
+    String exists = ifExists ? "IF EXISTS " : "";
+
+    return "ALTER TABLE " + table + " DROP " + exists + "PARTITION ("
+        + partition + "='" + value + "');";
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/ExecuteHiveQuery.java 200(+200 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/ExecuteHiveQuery.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/ExecuteHiveQuery.java
new file mode 100644
index 0000000..cd6019e
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/ExecuteHiveQuery.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban.hive.actions;
+
+import java.io.FileInputStream;
+import java.nio.charset.StandardCharsets;
+import org.apache.log4j.Logger;
+
+import azkaban.jobtype.hiveutils.HiveQueryExecutionException;
+import azkaban.jobtype.hiveutils.HiveQueryExecutor;
+import azkaban.jobtype.hiveutils.azkaban.Utils;
+import azkaban.jobtype.hiveutils.azkaban.HiveAction;
+import azkaban.jobtype.hiveutils.azkaban.HiveViaAzkabanException;
+import azkaban.jobtype.hiveutils.util.AzkabanJobPropertyDescription;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.InputStreamReader;
+import java.io.IOException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Properties;
+
+/**
+ * Execute the provided Hive query. Queries can be specified to Azkaban either
+ * directly or as a pointer to a file provided with workflow.
+ */
+public class ExecuteHiveQuery implements HiveAction {
+  private final static Logger LOG = Logger
+      .getLogger("com.linkedin.hive.azkaban.hive.actions.ExecuteHiveQuery");
+  @AzkabanJobPropertyDescription("Verbatim query to execute. Can also specify hive.query.nn where nn is a series of padded numbers, which will be executed in order")
+  public static final String HIVE_QUERY = "hive.query";
+  @AzkabanJobPropertyDescription("File to load query from.  Should be in same zip.")
+  public static final String HIVE_QUERY_FILE = "hive.query.file";
+  @AzkabanJobPropertyDescription("URL to retrieve the query from.")
+  public static final String HIVE_QUERY_URL = "hive.query.url";
+
+  private final HiveQueryExecutor hqe;
+  private final String q;
+
+  public ExecuteHiveQuery(Properties properties, HiveQueryExecutor hqe)
+      throws HiveViaAzkabanException {
+    String singleLine = properties.getProperty(HIVE_QUERY);
+    String multiLine = extractMultilineQuery(properties);
+    String queryFile = extractQueryFromFile(properties);
+    String queryURL = extractQueryFromURL(properties);
+
+    this.q = determineQuery(singleLine, multiLine, queryFile, queryURL);
+    this.hqe = hqe;
+  }
+
+  @SuppressWarnings("Finally")
+  private String extractQueryFromFile(Properties properties)
+      throws HiveViaAzkabanException {
+    String file = properties.getProperty(HIVE_QUERY_FILE);
+
+    if (file == null)
+      return null;
+
+    LOG.info("Attempting to read query from file: " + file);
+
+    StringBuilder contents = new StringBuilder();
+    BufferedReader br = null;
+    try {
+//      br = new BufferedReader(new FileReader(file));
+      br = new BufferedReader(new InputStreamReader(
+          new FileInputStream(file), StandardCharsets.UTF_8));
+
+      String line;
+
+      while ((line = br.readLine()) != null) {
+        contents.append(line);
+        contents.append(System.getProperty("line.separator"));
+      }
+
+    } catch (IOException e) {
+      throw new HiveViaAzkabanException(e);
+    } finally {
+      if (br != null)
+        try {
+          br.close();
+        } catch (IOException e) {
+          // TODO: Just throw IOException and catch-wrap in the constructor...
+          throw new HiveViaAzkabanException(e);
+        }
+    }
+
+    return contents.toString();
+  }
+
+  @SuppressWarnings("Finally")
+  private String extractQueryFromURL(Properties properties)
+      throws HiveViaAzkabanException {
+    String url = properties.getProperty(HIVE_QUERY_URL);
+
+    if (url == null)
+      return null;
+
+    LOG.info("Attempting to retrieve query from URL: " + url);
+
+    StringBuilder contents = new StringBuilder();
+    BufferedReader br = null;
+
+    try {
+      URL queryURL = new URL(url);
+
+      br = new BufferedReader(new InputStreamReader(queryURL.openStream(), StandardCharsets.UTF_8));
+      String line;
+
+      while ((line = br.readLine()) != null) {
+        contents.append(line);
+        contents.append(System.getProperty("line.separator"));
+      }
+    } catch (IOException e) {
+      throw new HiveViaAzkabanException(e);
+    } finally {
+      if (br != null)
+        try {
+          br.close();
+        } catch (IOException e) {
+          // TODO: Just throw IOException and catch-wrap in the constructor...
+          throw new HiveViaAzkabanException(e);
+        }
+    }
+
+    return contents.toString();
+  }
+
+  private String determineQuery(String singleLine, String multiLine,
+      String queryFromFile, String queryFromURL) throws HiveViaAzkabanException {
+    int specifiedValues = 0;
+
+    for (String s : new String[] { singleLine, multiLine, queryFromFile,
+        queryFromURL }) {
+      if (s != null)
+        specifiedValues++;
+    }
+
+    if (specifiedValues == 0)
+      throw new HiveViaAzkabanException("Must specify " + HIVE_QUERY + " xor "
+          + HIVE_QUERY + ".nn xor " + HIVE_QUERY_FILE + " xor "
+          + HIVE_QUERY_URL + " in properties. Exiting.");
+
+    if (specifiedValues != 1)
+      throw new HiveViaAzkabanException("Must specify only " + HIVE_QUERY
+          + " or " + HIVE_QUERY + ".nn or " + HIVE_QUERY_FILE + " or "
+          + HIVE_QUERY_URL + " in properties, not more than one. Exiting.");
+
+    if (singleLine != null) {
+      LOG.info("Returning " + HIVE_QUERY + " = " + singleLine);
+      return singleLine;
+    } else if (multiLine != null) {
+      LOG.info("Returning consolidated " + HIVE_QUERY + ".nn = " + multiLine);
+      return multiLine;
+    } else if (queryFromFile != null) {
+      LOG.info("Returning query from file " + queryFromFile);
+      return queryFromFile;
+    } else {
+      LOG.info("Returning query from URL " + queryFromURL);
+      return queryFromURL;
+    }
+  }
+
+  private String extractMultilineQuery(Properties properties) {
+    ArrayList<String> lines = new ArrayList<String>();
+
+    for (int i = 0; i < 100; i++) {
+      String padded = String.format("%02d", i);
+      String value = properties.getProperty(HIVE_QUERY + "." + padded);
+      if (value != null) {
+        lines.add(value);
+      }
+    }
+
+    return Utils.joinNewlines(lines);
+  }
+
+  @Override
+  public void execute() throws HiveViaAzkabanException {
+    try {
+      hqe.executeQuery(q);
+    } catch (HiveQueryExecutionException e) {
+      throw new HiveViaAzkabanException(e);
+    }
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/HQL.java 21(+21 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/HQL.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/HQL.java
new file mode 100644
index 0000000..5ff8eee
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/HQL.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban.hive.actions;
+
+interface HQL {
+  String toHQL();
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/UpdateTableLocationToLatest.java 135(+135 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/UpdateTableLocationToLatest.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/UpdateTableLocationToLatest.java
new file mode 100644
index 0000000..3e37e8f
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/UpdateTableLocationToLatest.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban.hive.actions;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.log4j.Logger;
+
+import azkaban.jobtype.hiveutils.HiveQueryExecutionException;
+import azkaban.jobtype.hiveutils.HiveQueryExecutor;
+import azkaban.jobtype.hiveutils.azkaban.HiveAction;
+import azkaban.jobtype.hiveutils.azkaban.HiveViaAzkabanException;
+import azkaban.jobtype.hiveutils.util.AzkHiveAction;
+import azkaban.jobtype.hiveutils.util.AzkabanJobPropertyDescription;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Properties;
+
+import static azkaban.jobtype.hiveutils.azkaban.Utils.verifyProperty;
+
+/**
+ * Alter the specified table's location to the 'latest' directory found in
+ * specified base directory, where latest is defined as greate lexically.</p>
+ *
+ * For example, if we have a base dir foo with directories:</p>
+ *   /foo/2012-01-01</p>
+ *       /2012-01-02</p>
+ *       /2012-01-03</p>
+ * and we specify table as 'bar', this action will execute
+ * ALTER TABLE bar SET LOCATION '/foo/2012-01-03';</p>
+ * </p>
+ */
+@AzkHiveAction(Constants.UPDATE_TABLE_LOCATION_TO_LATEST)
+public class UpdateTableLocationToLatest implements HiveAction {
+  private final static Logger LOG = Logger
+      .getLogger(UpdateTableLocationToLatest.class);
+
+  public static final String UPDATE_TABLE_LOCATION_TO_LATEST =
+      Constants.UPDATE_TABLE_LOCATION_TO_LATEST;
+
+  @AzkabanJobPropertyDescription("Comma-separated list of tables to update.  All tables must be within the same database")
+  public static final String HIVE_TABLES = "hive.tables";
+  @AzkabanJobPropertyDescription("Comma-separated list of new tables locations base paths. These dirs will be searched for latest directory.  Must correspond to hive.tables.")
+  public static final String HIVE_TABLES_LOCATIONS = "hive.tables.locations";
+  @AzkabanJobPropertyDescription("Database within tables to update are located")
+  public static final String HIVE_DATABASE = "hive.database";
+
+  private final String database;
+  private final String[] tables;
+  private final String[] tablesLocations;
+  private final HiveQueryExecutor hqe;
+
+  public UpdateTableLocationToLatest(Properties p, HiveQueryExecutor hqe)
+      throws HiveViaAzkabanException {
+    this.database = verifyProperty(p, HIVE_DATABASE);
+    this.tables = verifyProperty(p, HIVE_TABLES).split(",");
+    this.tablesLocations = verifyProperty(p, HIVE_TABLES_LOCATIONS).split(",");
+
+    if (tables.length != tablesLocations.length) {
+      throw new HiveViaAzkabanException(HIVE_TABLES + " and "
+          + HIVE_TABLES_LOCATIONS + " don't have same number of elements");
+    }
+
+    this.hqe = hqe;
+  }
+
+  @Override
+  public void execute() throws HiveViaAzkabanException {
+    ArrayList<HQL> hql = new ArrayList<HQL>();
+    hql.add(new UseDatabaseHQL(database));
+
+    Configuration conf = new Configuration();
+    try {
+      FileSystem fs = FileSystem.get(conf);
+
+      for (int i = 0; i < tables.length; i++) {
+        LOG.info("Determining HQL commands for table " + tables[i]);
+        hql.add(latestURI(fs, tablesLocations[i], tables[i]));
+      }
+      fs.close();
+    } catch (IOException e) {
+      throw new HiveViaAzkabanException(
+          "Exception fetching the directories from HDFS", e);
+    }
+    StringBuffer query = new StringBuffer();
+    for (HQL q : hql) {
+      query.append(q.toHQL()).append("\n");
+    }
+
+    System.out.println("Query to execute:\n" + query.toString());
+    try {
+      hqe.executeQuery(query.toString());
+    } catch (HiveQueryExecutionException e) {
+      throw new HiveViaAzkabanException("Problem executing query ["
+          + query.toString() + "] on Hive", e);
+    }
+
+  }
+
+  private HQL latestURI(FileSystem fs, String basePath, String table)
+      throws HiveViaAzkabanException, IOException {
+    ArrayList<String> directories = null;
+
+    // Alter Table Set Location requires full URI...
+    // https://issues.apache.org/jira/browse/HIVE-3860
+    directories = Utils.fetchDirectories(fs, basePath, true);
+
+    if (directories.size() == 0) {
+      throw new HiveViaAzkabanException(
+          "No directories to set as new location in " + basePath);
+    }
+
+    Collections.sort(directories);
+
+    String toAdd = directories.remove(directories.size() - 1);
+
+    return new AlterTableLocationQL(table, toAdd);
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/useDatabaseHQL.java 30(+30 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/useDatabaseHQL.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/useDatabaseHQL.java
new file mode 100644
index 0000000..e0e41b7
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/useDatabaseHQL.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban.hive.actions;
+
+class UseDatabaseHQL implements HQL {
+  private final String database;
+
+  UseDatabaseHQL(String database) {
+    this.database = database;
+  }
+
+  @Override
+  public String toHQL() {
+    return "USE " + database + ";";
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/Utils.java 55(+55 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/Utils.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/Utils.java
new file mode 100644
index 0000000..f7c51c0
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/hive/actions/Utils.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban.hive.actions;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.Logger;
+
+import azkaban.jobtype.hiveutils.azkaban.HiveViaAzkabanException;
+
+import java.io.IOException;
+import java.util.ArrayList;
+
+class Utils {
+  private final static Logger LOG = Logger.getLogger(Utils.class);
+
+  static ArrayList<String> fetchDirectories(FileSystem fs, String location,
+      boolean returnFullPath) throws IOException, HiveViaAzkabanException {
+    LOG.info("Fetching directories in " + location);
+    Path p = new Path(location);
+    FileStatus[] statuses = fs.listStatus(p);
+
+    if (statuses == null || statuses.length == 0) {
+      throw new HiveViaAzkabanException("Couldn't find any directories in "
+          + location);
+    }
+
+    ArrayList<String> files = new ArrayList<String>(statuses.length);
+    for (FileStatus status : statuses) {
+      if (!status.isDir())
+        continue;
+      if (status.getPath().getName().startsWith("."))
+        continue;
+
+      files.add(returnFullPath ? status.getPath().toString() : status.getPath()
+          .getName());
+    }
+    return files;
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveAction.java 21(+21 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveAction.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveAction.java
new file mode 100644
index 0000000..672265e
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveAction.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban;
+
+public interface HiveAction {
+  public void execute() throws HiveViaAzkabanException;
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveViaAzkaban.java 70(+70 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveViaAzkaban.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveViaAzkaban.java
new file mode 100644
index 0000000..a44424f
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveViaAzkaban.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban;
+
+import java.util.Properties;
+
+import azkaban.jobtype.hiveutils.HiveQueryExecutor;
+import azkaban.jobtype.hiveutils.HiveUtils;
+import azkaban.jobtype.hiveutils.azkaban.hive.actions.DropAllPartitionsAddLatest;
+import azkaban.jobtype.hiveutils.azkaban.hive.actions.ExecuteHiveQuery;
+import azkaban.jobtype.hiveutils.azkaban.hive.actions.UpdateTableLocationToLatest;
+
+/**
+ * Simple Java driver class to execute a Hive query provided via the Properties
+ * file. The query can be specified via: * hive.query = a single-line query that
+ * will be fed to Hive * hive.query.nn = a two-digit padded series of lines that
+ * will be joined and fed to to Hive as one big query
+ */
+public class HiveViaAzkaban {
+  final private static String AZK_HIVE_ACTION = "azk.hive.action";
+  public static final String EXECUTE_QUERY = "execute.query";
+
+  private Properties p;
+
+  public HiveViaAzkaban(String jobName, Properties p) {
+    this.p = p;
+  }
+
+  public void run() throws HiveViaAzkabanException {
+    if (p == null) {
+      throw new HiveViaAzkabanException("Properties is null.  Can't continue");
+    }
+
+    if (!p.containsKey(AZK_HIVE_ACTION)) {
+      throw new HiveViaAzkabanException("Must specify a " + AZK_HIVE_ACTION
+          + " key and value.");
+    }
+    HiveQueryExecutor hqe = HiveUtils.getHiveQueryExecutor();
+    HiveAction action = null;
+    String hive_action = p.getProperty(AZK_HIVE_ACTION);
+    // TODO: Factory time
+    if (hive_action.equals(EXECUTE_QUERY)) {
+      action = new ExecuteHiveQuery(p, hqe);
+    } else if (hive_action.equals(DropAllPartitionsAddLatest.DROP_AND_ADD)) {
+      action = new DropAllPartitionsAddLatest(p, hqe);
+    } else if (hive_action
+        .equals(UpdateTableLocationToLatest.UPDATE_TABLE_LOCATION_TO_LATEST)) {
+      action = new UpdateTableLocationToLatest(p, hqe);
+    } else {
+      throw new HiveViaAzkabanException("Unknown value (" + hive_action
+          + ") for value " + AZK_HIVE_ACTION);
+    }
+
+    action.execute();
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveViaAzkabanException.java 33(+33 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveViaAzkabanException.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveViaAzkabanException.java
new file mode 100644
index 0000000..0e7f6f2
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/HiveViaAzkabanException.java
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban;
+
+public class HiveViaAzkabanException extends Exception {
+  private static final long serialVersionUID = 1L;
+
+  public HiveViaAzkabanException(String s) {
+    super(s);
+  }
+
+  public HiveViaAzkabanException(Exception e) {
+    super(e);
+  }
+
+  public HiveViaAzkabanException(String s, Exception e) {
+    super(s, e);
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/Utils.java 52(+52 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/Utils.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/Utils.java
new file mode 100644
index 0000000..49d69b5
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/azkaban/Utils.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.azkaban;
+
+import java.util.Collection;
+import java.util.Properties;
+
+public class Utils {
+  public static String joinNewlines(Collection<String> strings) {
+    if (strings == null || strings.size() == 0)
+      return null;
+
+    StringBuilder sb = new StringBuilder();
+
+    for (String s : strings) {
+      String trimmed = s.trim();
+      sb.append(trimmed);
+      if (!trimmed.endsWith("\n"))
+        sb.append("\n");
+    }
+
+    return sb.toString();
+  }
+
+  // Hey, look! It's this method again! It's the freaking Where's Waldo of
+  // methods...
+  public static String verifyProperty(Properties p, String key)
+      throws HiveViaAzkabanException {
+    String value = p.getProperty(key);
+    if (value == null) {
+      throw new HiveViaAzkabanException("Can't find property " + key
+          + " in provided Properties. Bailing");
+    }
+    // TODO: Add a log entry here for the value
+    return value;
+
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveMetaStoreBrowserException.java 36(+36 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveMetaStoreBrowserException.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveMetaStoreBrowserException.java
new file mode 100644
index 0000000..09a4dc6
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveMetaStoreBrowserException.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils;
+
+/**
+ * Thrown when unexpected Hive metastore browsing problems come up
+ */
+public class HiveMetaStoreBrowserException extends Exception {
+  private static final long serialVersionUID = 1L;
+
+  public HiveMetaStoreBrowserException(String msg) {
+    super(msg);
+  }
+
+  public HiveMetaStoreBrowserException(Throwable t) {
+    super(t);
+  }
+
+  public HiveMetaStoreBrowserException(String msg, Throwable t) {
+    super(msg, t);
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveModule.java 47(+47 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveModule.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveModule.java
new file mode 100644
index 0000000..e86ac44
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveModule.java
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.apache.hadoop.hive.ql.session.SessionState;
+
+/**
+ * Guice-like module for creating a Hive instance. Easily turned back into a
+ * full Guice module when we have need of it.
+ */
+class HiveModule {
+  /**
+   * Return a Driver that's connected to the real, honest-to-goodness Hive
+   *
+   * @TODO: Better error checking
+   * @return Driver that's connected to Hive
+   */
+  Driver provideHiveDriver() {
+    HiveConf hiveConf = provideHiveConf();
+    SessionState.start(hiveConf);
+
+    return new Driver(hiveConf);
+  }
+
+  HiveConf provideHiveConf() {
+    return new HiveConf(SessionState.class);
+  }
+
+  protected void configure() { /* Nothing to do */
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryException.java 49(+49 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryException.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryException.java
new file mode 100644
index 0000000..39eb84d
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryException.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils;
+
+public class HiveQueryException extends Exception {
+  private static final long serialVersionUID = 1L;
+  private final String query;
+  private final int code;
+  private final String message;
+
+  public HiveQueryException(String query, int code, String message) {
+    this.query = query;
+    this.code = code;
+    this.message = message;
+  }
+
+  public int getCode() {
+    return code;
+  }
+
+  @Override
+  public String getMessage() {
+    return message;
+  }
+
+  public String getQuery() {
+    return query;
+  }
+
+  @Override
+  public String toString() {
+    return "HiveQueryException{" + "query='" + query + '\'' + ", code=" + code
+        + ", message='" + message + '\'' + '}';
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutionException.java 53(+53 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutionException.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutionException.java
new file mode 100644
index 0000000..f7831f2
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutionException.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils;
+
+/**
+ * Thrown when a query sent for execution ends unsuccessfully.
+ */
+public class HiveQueryExecutionException extends Exception {
+  private static final long serialVersionUID = 1L;
+
+  /**
+   * Query that caused the failure.
+   */
+  private final String query;
+
+  /**
+   * Error code defined by Hive
+   */
+  private final int returnCode;
+
+  public HiveQueryExecutionException(int returnCode, String query) {
+    this.returnCode = returnCode;
+    this.query = query;
+  }
+
+  public String getLine() {
+    return query;
+  }
+
+  public int getReturnCode() {
+    return returnCode;
+  }
+
+  @Override
+  public String toString() {
+    return "HiveQueryExecutionException{" + "query='" + query + '\''
+        + ", returnCode=" + returnCode + '}';
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutor.java 56(+56 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutor.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutor.java
new file mode 100644
index 0000000..08205d1
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutor.java
@@ -0,0 +1,56 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils;
+
+import java.io.InputStream;
+import java.io.PrintStream;
+
+/**
+ * Utility to execute queries against a Hive database.
+ */
+public interface HiveQueryExecutor {
+  /**
+   * Execute the specified quer(y|ies).
+   *
+   * @param q Query to be executed. Queries may include \n and mutliple,
+   *          ;-delimited statements. The entire string is passed to Hive.
+   *
+   * @throws HiveQueryExecutionException if Hive cannont execute a query.
+   */
+  public void executeQuery(String q) throws HiveQueryExecutionException;
+
+  /**
+   * Redirect the query execution's stdout
+   *
+   * @param out
+   */
+  public void setOut(PrintStream out);
+
+  /**
+   * Redirect the query execution's stdin
+   *
+   * @param in
+   */
+  public void setIn(InputStream in);
+
+  /**
+   * Redirect the query execution's stderr
+   *
+   * @param err
+   */
+  public void setErr(PrintStream err);
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutorModule.java 74(+74 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutorModule.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutorModule.java
new file mode 100644
index 0000000..78ad6d0
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveQueryExecutorModule.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils;
+
+import org.apache.hadoop.hive.cli.CliSessionState;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
+
+import static azkaban.security.commons.SecurityUtils.MAPREDUCE_JOB_CREDENTIALS_BINARY;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEHISTORYFILELOC;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.SCRATCHDIR;
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+/**
+ * Guice-like module for creating a Hive instance. Easily turned back into a
+ * full Guice module when we have need of it.
+ */
+class HiveQueryExecutorModule {
+  private HiveConf hiveConf = null;
+  private CliSessionState ss = null;
+
+  HiveConf provideHiveConf() {
+    if (this.hiveConf != null) {
+      return this.hiveConf;
+    } else {
+      this.hiveConf = new HiveConf(SessionState.class);
+    }
+
+    troublesomeConfig(HIVEHISTORYFILELOC, hiveConf);
+    troublesomeConfig(SCRATCHDIR, hiveConf);
+
+    if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
+      System.out.println("Setting hadoop tokens ... ");
+      hiveConf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY,
+          System.getenv(HADOOP_TOKEN_FILE_LOCATION));
+      System.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY,
+          System.getenv(HADOOP_TOKEN_FILE_LOCATION));
+    }
+
+    return hiveConf;
+  }
+
+  private void troublesomeConfig(HiveConf.ConfVars value, HiveConf hc) {
+    System.out.println("Troublesome config " + value + " = "
+        + HiveConf.getVar(hc, value));
+  }
+
+  CliSessionState provideCliSessionState() {
+    if (ss != null) {
+      return ss;
+    }
+    ss = new CliSessionState(provideHiveConf());
+    SessionState.start(ss);
+    return ss;
+  }
+
+  protected void configure() {
+    /** Nothing to do **/
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveUtils.java 80(+80 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveUtils.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveUtils.java
new file mode 100644
index 0000000..d34b264
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/HiveUtils.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.cli.CliDriver;
+import org.apache.log4j.Logger;
+
+/**
+ * Grab bag of utilities for working with Hive. End users should obtain
+ * instances of the provided interfaces from these methods.
+ */
+public class HiveUtils {
+  private final static Logger LOG =
+      Logger.getLogger("com.linkedin.hive.HiveUtils");
+
+  private HiveUtils() {
+  }
+
+  public static HiveQueryExecutor getHiveQueryExecutor() {
+    HiveQueryExecutorModule hqem = new HiveQueryExecutorModule();
+    try {
+      return new RealHiveQueryExecutor(hqem.provideHiveConf(),
+          hqem.provideCliSessionState(), new CliDriver());
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /**
+   * Normally hive.aux.jars.path is expanded from just being a path to the full
+   * list of files in the directory by the hive shell script. Since we normally
+   * won't be running from the script, it's up to us to do that work here. We
+   * use a heuristic that if there is no occurrence of ".jar" in the original,
+   * it needs expansion. Otherwise it's already been done for us.
+   *
+   * Also, surround the files with uri niceities.
+   */
+  static String expandHiveAuxJarsPath(String original) throws IOException {
+    if (original == null || original.contains(".jar"))
+      return original;
+
+    File[] files = new File(original).listFiles();
+
+    if (files == null || files.length == 0) {
+      LOG.info("No files in to expand in aux jar path. Returning original parameter");
+      return original;
+    }
+
+    return filesToURIString(files);
+
+  }
+
+  static String filesToURIString(File[] files) throws IOException {
+    StringBuffer sb = new StringBuffer();
+    for (int i = 0; i < files.length; i++) {
+      sb.append("file:///").append(files[i].getCanonicalPath());
+      if (i != files.length - 1)
+        sb.append(",");
+    }
+
+    return sb.toString();
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/RealHiveQueryExecutor.java 132(+132 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/RealHiveQueryExecutor.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/RealHiveQueryExecutor.java
new file mode 100644
index 0000000..05ccfc6
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/RealHiveQueryExecutor.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.cli.CliDriver;
+import org.apache.hadoop.hive.cli.CliSessionState;
+import org.apache.hadoop.hive.cli.OptionsProcessor;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.log4j.Logger;
+
+import java.io.InputStream;
+import java.io.PrintStream;
+
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVEAUXJARS;
+import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.METASTORECONNECTURLKEY;
+
+class RealHiveQueryExecutor implements HiveQueryExecutor {
+  private final static Logger LOG = Logger
+      .getLogger("com.linkedin.hive.HiveQueryExecutor");
+  private final CliDriver cli;
+  private final CliSessionState ss;
+
+  public RealHiveQueryExecutor(HiveConf hiveConf, CliSessionState ss,
+      CliDriver cli) throws Exception {
+    LOG.info("HiveConf = " + hiveConf);
+    LOG.info("According to the conf, we're talking to the Hive hosted at: "
+        + HiveConf.getVar(hiveConf, METASTORECONNECTURLKEY));
+
+    // Expand out the hive aux jars since there was no shell script to do it
+    // for us
+    String orig = HiveConf.getVar(hiveConf, HIVEAUXJARS);
+    String expanded = HiveUtils.expandHiveAuxJarsPath(orig);
+    if (orig == null || orig.equals(expanded)) {
+      LOG.info("Hive aux jars variable not expanded");
+    } else {
+      LOG.info("Expanded aux jars variable from [" + orig + "] to [" + expanded
+          + "]");
+      HiveConf.setVar(hiveConf, HIVEAUXJARS, expanded);
+    }
+
+    OptionsProcessor op = new OptionsProcessor();
+
+    if (!op.process_stage1(new String[] {})) {
+      throw new IllegalArgumentException("Can't process empty args?!?");
+    }
+
+    if (!ShimLoader.getHadoopShims().usesJobShell()) {
+      // hadoop-20 and above - we need to augment classpath using hiveconf
+      // components
+      // see also: code in ExecDriver.java
+      ClassLoader loader = hiveConf.getClassLoader();
+      String auxJars = HiveConf.getVar(hiveConf, HiveConf.ConfVars.HIVEAUXJARS);
+      LOG.info("Got auxJars = " + auxJars);
+
+      if (StringUtils.isNotBlank(auxJars)) {
+        loader =
+            Utilities.addToClassPath(loader, StringUtils.split(auxJars, ","));
+      }
+      hiveConf.setClassLoader(loader);
+      Thread.currentThread().setContextClassLoader(loader);
+    }
+
+    this.ss = ss;
+    LOG.info("SessionState = " + ss);
+    ss.out = System.out;
+    ss.err = System.err;
+    ss.in = System.in;
+
+    if (!op.process_stage2(ss)) {
+      throw new IllegalArgumentException(
+          "Can't process arguments from session state");
+    }
+    this.cli = cli;
+    LOG.info("Cli = " + cli);
+  }
+
+  /**
+   * @inheritDoc
+   */
+  @Override
+  public void executeQuery(String q) throws HiveQueryExecutionException {
+    LOG.info("Executing query: " + q);
+
+    int returnCode = cli.processLine(q);
+    if (returnCode != 0) {
+      LOG.warn("Got exception " + returnCode + " from line: " + q);
+      throw new HiveQueryExecutionException(returnCode, q);
+    }
+
+  }
+
+  /**
+   * @inheritDoc
+   */
+  @Override
+  public void setOut(PrintStream out) {
+    ss.out = out;
+  }
+
+  /**
+   * @inheritDoc
+   */
+  @Override
+  public void setIn(InputStream in) {
+    ss.in = in;
+  }
+
+  /**
+   * @inheritDoc
+   */
+  @Override
+  public void setErr(PrintStream err) {
+    ss.err = err;
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/ResultSchema.java 48(+48 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/ResultSchema.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/ResultSchema.java
new file mode 100644
index 0000000..ea520a8
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/ResultSchema.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils;
+
+/**
+ * Simple class to represent the resulting schema of a Hive query, which may or
+ * may not have been run. We use this simple version of the results rather than
+ * exposing Hive's internal classes in order to avoid tying end users to any
+ * particular version of Hive or its classes.
+ */
+public class ResultSchema {
+  final String name;
+  final String type;
+  final String comment;
+
+  public ResultSchema(String name, String type, String comment) {
+    this.name = name;
+    this.type = type;
+    this.comment = comment;
+  }
+
+  public String getComment() {
+    return comment;
+  }
+
+  public String getName() {
+    return name;
+  }
+
+  public String getType() {
+    return type;
+  }
+
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/AzkabanJobPropertyDescription.java 30(+30 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/AzkabanJobPropertyDescription.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/AzkabanJobPropertyDescription.java
new file mode 100644
index 0000000..5664ce2
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/AzkabanJobPropertyDescription.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.util;
+
+import java.lang.annotation.Documented;
+
+/**
+ * Description of parameter passed to this class via the Azkaban property to
+ * which the annotation is attached.
+ */
+@Documented
+public @interface AzkabanJobPropertyDescription {
+  // @TODO: Actually add the value in since it doesn't show up in the
+  // javadoc... siargh.
+  String value();
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/AzkHiveAction.java 24(+24 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/AzkHiveAction.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/AzkHiveAction.java
new file mode 100644
index 0000000..ac266be
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/AzkHiveAction.java
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.util;
+
+import java.lang.annotation.Documented;
+
+@Documented
+public @interface AzkHiveAction {
+  String value();
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/IntendedAudience.java 28(+28 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/IntendedAudience.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/IntendedAudience.java
new file mode 100644
index 0000000..0c2a909
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/hiveutils/util/IntendedAudience.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.hiveutils.util;
+
+import java.lang.annotation.Documented;
+
+/**
+ * Who in LinkedIn this class is aimed at. If specified, other users may have
+ * their complaints fall upon deaf ears. Caveat utilitor!
+ */
+@Documented
+public @interface IntendedAudience {
+  String value();
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JavaJob.java 165(+165 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JavaJob.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JavaJob.java
new file mode 100644
index 0000000..78fc041
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JavaJob.java
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import java.io.File;
+import java.util.List;
+import java.util.StringTokenizer;
+
+import org.apache.log4j.Logger;
+
+import azkaban.flow.CommonJobProperties;
+import azkaban.jobExecutor.JavaProcessJob;
+import azkaban.security.commons.SecurityUtils;
+import azkaban.utils.Props;
+
+public class JavaJob extends JavaProcessJob {
+
+  public static final String RUN_METHOD_PARAM = "method.run";
+  public static final String CANCEL_METHOD_PARAM = "method.cancel";
+  public static final String PROGRESS_METHOD_PARAM = "method.progress";
+
+  public static final String JOB_CLASS = "job.class";
+  public static final String DEFAULT_CANCEL_METHOD = "cancel";
+  public static final String DEFAULT_RUN_METHOD = "run";
+  public static final String DEFAULT_PROGRESS_METHOD = "getProgress";
+
+  private String _runMethod;
+  private String _cancelMethod;
+  private String _progressMethod;
+
+  private Object _javaObject = null;
+  private String props;
+
+  public JavaJob(String jobid, Props sysProps, Props jobProps, Logger log) {
+    super(jobid, sysProps, new Props(sysProps, jobProps), log);
+
+    getJobProps().put(CommonJobProperties.JOB_ID, jobid);
+  }
+
+  @Override
+  protected String getJVMArguments() {
+    String args = super.getJVMArguments();
+
+    String typeGlobalJVMArgs =
+        getSysProps().getString("jobtype.global.jvm.args", null);
+    if (typeGlobalJVMArgs != null) {
+      args += " " + typeGlobalJVMArgs;
+    }
+    return args;
+  }
+
+  @Override
+  @SuppressWarnings("CollectionIncompatibleType")
+  protected List<String> getClassPaths() {
+    List<String> classPath = super.getClassPaths();
+
+    classPath.add(getSourcePathFromClass(JavaJobRunnerMain.class));
+    // To add az-core jar classpath
+    classPath.add(getSourcePathFromClass(Props.class));
+
+    // To add az-common jar classpath
+    classPath.add(getSourcePathFromClass(JavaProcessJob.class));
+    classPath.add(getSourcePathFromClass(SecurityUtils.class));
+
+    classPath.add(HadoopConfigurationInjector.getPath(getJobProps(),
+        getWorkingDirectory()));
+
+    String loggerPath = getSourcePathFromClass(Logger.class);
+    if (!classPath.contains(loggerPath)) {
+      classPath.add(loggerPath);
+    }
+
+    // Add hadoop home to classpath
+    String hadoopHome = System.getenv("HADOOP_HOME");
+    if (hadoopHome == null) {
+      info("HADOOP_HOME not set, using default hadoop config.");
+    } else {
+      info("Using hadoop config found in " + hadoopHome);
+      classPath.add(new File(hadoopHome, "conf").getPath());
+    }
+
+    List<String> typeClassPath =
+        getSysProps().getStringList("jobtype.classpath", null, ",");
+    if (typeClassPath != null) {
+      // fill in this when load this jobtype
+      String pluginDir = getSysProps().get("plugin.dir");
+      for (String jar : typeClassPath) {
+        File jarFile = new File(jar);
+        if (!jarFile.isAbsolute()) {
+          jarFile = new File(pluginDir + File.separatorChar + jar);
+        }
+
+        if (!classPath.contains(jarFile.getAbsoluteFile())) {
+          classPath.add(jarFile.getAbsolutePath());
+        }
+      }
+    }
+
+    List<String> typeGlobalClassPath =
+        getSysProps().getStringList("jobtype.global.classpath", null, ",");
+    if (typeGlobalClassPath != null) {
+      for (String jar : typeGlobalClassPath) {
+        if (!classPath.contains(jar)) {
+          classPath.add(jar);
+        }
+      }
+    }
+
+    return classPath;
+  }
+
+  private static String getSourcePathFromClass(Class<?> containedClass) {
+    File file =
+        new File(containedClass.getProtectionDomain().getCodeSource()
+            .getLocation().getPath());
+
+    if (!file.isDirectory() && file.getName().endsWith(".class")) {
+      String name = containedClass.getName();
+      StringTokenizer tokenizer = new StringTokenizer(name, ".");
+      while (tokenizer.hasMoreTokens()) {
+        tokenizer.nextElement();
+        file = file.getParentFile();
+      }
+      return file.getPath();
+    } else {
+      return containedClass.getProtectionDomain().getCodeSource().getLocation()
+          .getPath();
+    }
+  }
+
+  @Override
+  protected String getJavaClass() {
+    return JavaJobRunnerMain.class.getName();
+  }
+
+  @Override
+  public String toString() {
+    return "JavaJob{" + "_runMethod='" + _runMethod + '\''
+        + ", _cancelMethod='" + _cancelMethod + '\'' + ", _progressMethod='"
+        + _progressMethod + '\'' + ", _javaObject=" + _javaObject + ", props="
+        + props + '}';
+  }
+
+
+  @Override
+  public void run() throws Exception {
+    HadoopConfigurationInjector.prepareResourcesToInject(getJobProps(),
+        getWorkingDirectory());
+    super.run();
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JavaJobRunnerMain.java 360(+360 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JavaJobRunnerMain.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JavaJobRunnerMain.java
new file mode 100644
index 0000000..0634e2c
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JavaJobRunnerMain.java
@@ -0,0 +1,360 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import azkaban.jobExecutor.ProcessJob;
+import azkaban.utils.JSONUtils;
+import azkaban.utils.Props;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.log4j.ConsoleAppender;
+import org.apache.log4j.Layout;
+import org.apache.log4j.Logger;
+import org.apache.log4j.PatternLayout;
+
+import azkaban.security.commons.SecurityUtils;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.Writer;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.security.PrivilegedExceptionAction;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Properties;
+
+public class JavaJobRunnerMain {
+
+  public static final String JOB_CLASS = "job.class";
+  public static final String DEFAULT_RUN_METHOD = "run";
+  public static final String DEFAULT_CANCEL_METHOD = "cancel";
+
+  // This is the Job interface method to get the properties generated by the
+  // job.
+  public static final String GET_GENERATED_PROPERTIES_METHOD =
+      "getJobGeneratedProperties";
+
+  public static final String CANCEL_METHOD_PARAM = "method.cancel";
+  public static final String RUN_METHOD_PARAM = "method.run";
+  public static final String[] PROPS_CLASSES = new String[] {
+    "azkaban.utils.Props",
+    "azkaban.common.utils.Props"
+  };
+
+  private static final Layout DEFAULT_LAYOUT = new PatternLayout("%p %m\n");
+
+  public final Logger _logger;
+
+  public String _cancelMethod;
+  public String _jobName;
+  public Object _javaObject;
+  private boolean _isFinished = false;
+
+  public static void main(String[] args) throws Exception {
+    @SuppressWarnings("unused")
+    JavaJobRunnerMain wrapper = new JavaJobRunnerMain();
+  }
+
+  @SuppressWarnings("DefaultCharset")
+  public JavaJobRunnerMain() throws Exception {
+    Runtime.getRuntime().addShutdownHook(new Thread() {
+      @Override
+      public void run() {
+        cancelJob();
+      }
+    });
+
+    try {
+      _jobName = System.getenv(ProcessJob.JOB_NAME_ENV);
+      String propsFile = System.getenv(ProcessJob.JOB_PROP_ENV);
+
+      _logger = Logger.getRootLogger();
+      _logger.removeAllAppenders();
+      ConsoleAppender appender = new ConsoleAppender(DEFAULT_LAYOUT);
+      appender.activateOptions();
+      _logger.addAppender(appender);
+
+      Properties props = new Properties();
+      props.load(new BufferedReader(new FileReader(propsFile)));
+
+      _logger.info("Running job " + _jobName);
+      String className = props.getProperty(JOB_CLASS);
+      if (className == null) {
+        throw new Exception("Class name is not set.");
+      }
+      _logger.info("Class name " + className);
+
+      HadoopConfigurationInjector.injectResources(new Props(null, props));
+
+      // Create the object using proxy
+      if (SecurityUtils.shouldProxy(props)) {
+        _javaObject = getObjectAsProxyUser(props, _logger, _jobName, className);
+      } else {
+        _javaObject = getObject(_jobName, className, props, _logger);
+      }
+      if (_javaObject == null) {
+        _logger.info("Could not create java object to run job: " + className);
+        throw new Exception("Could not create running object");
+      }
+
+      _cancelMethod =
+          props.getProperty(CANCEL_METHOD_PARAM, DEFAULT_CANCEL_METHOD);
+
+      final String runMethod =
+          props.getProperty(RUN_METHOD_PARAM, DEFAULT_RUN_METHOD);
+      _logger.info("Invoking method " + runMethod);
+
+      if (SecurityUtils.shouldProxy(props)) {
+        _logger.info("Proxying enabled.");
+        runMethodAsProxyUser(props, _javaObject, runMethod);
+      } else {
+        _logger.info("Proxy check failed, not proxying run.");
+        runMethod(_javaObject, runMethod);
+      }
+      _isFinished = true;
+
+      // Get the generated properties and store them to disk, to be read
+      // by ProcessJob.
+      try {
+        final Method generatedPropertiesMethod =
+            _javaObject.getClass().getMethod(GET_GENERATED_PROPERTIES_METHOD,
+                new Class<?>[] {});
+        Object outputGendProps =
+            generatedPropertiesMethod.invoke(_javaObject, new Object[] {});
+        if (outputGendProps != null) {
+          final Method toPropertiesMethod =
+              outputGendProps.getClass().getMethod("toProperties",
+                  new Class<?>[] {});
+          Properties properties =
+              (Properties) toPropertiesMethod.invoke(outputGendProps,
+                  new Object[] {});
+
+          Props outputProps = new Props(null, properties);
+          outputGeneratedProperties(outputProps);
+        } else {
+          outputGeneratedProperties(new Props());
+        }
+
+      } catch (NoSuchMethodException e) {
+        _logger.info(String.format(
+            "Apparently there isn't a method[%s] on object[%s], "
+                + "using empty Props object instead.",
+            GET_GENERATED_PROPERTIES_METHOD, _javaObject));
+        outputGeneratedProperties(new Props());
+      }
+    } catch (Exception e) {
+      _isFinished = true;
+      throw e;
+    }
+  }
+
+  private void runMethodAsProxyUser(Properties props, final Object obj,
+      final String runMethod) throws IOException, InterruptedException {
+    UserGroupInformation ugi =
+        SecurityUtils.getProxiedUser(props, _logger, new Configuration());
+    _logger.info("user " + ugi + " authenticationMethod "
+        + ugi.getAuthenticationMethod());
+    _logger.info("user " + ugi + " hasKerberosCredentials "
+        + ugi.hasKerberosCredentials());
+    SecurityUtils.getProxiedUser(props, _logger, new Configuration()).doAs(
+        new PrivilegedExceptionAction<Void>() {
+          @Override
+          public Void run() throws Exception {
+            runMethod(obj, runMethod);
+            return null;
+          }
+        });
+  }
+
+  private void runMethod(Object obj, String runMethod)
+      throws IllegalAccessException, InvocationTargetException,
+      NoSuchMethodException {
+    obj.getClass().getMethod(runMethod, new Class<?>[] {}).invoke(obj);
+  }
+
+  @SuppressWarnings("DefaultCharset")
+  private void outputGeneratedProperties(Props outputProperties) {
+
+    if (outputProperties == null) {
+      _logger.info("  no gend props");
+      return;
+    }
+    for (String key : outputProperties.getKeySet()) {
+      _logger
+          .info("  gend prop " + key + " value:" + outputProperties.get(key));
+    }
+
+    String outputFileStr = System.getenv(ProcessJob.JOB_OUTPUT_PROP_FILE);
+    if (outputFileStr == null) {
+      return;
+    }
+
+    _logger.info("Outputting generated properties to " + outputFileStr);
+
+    Map<String, String> properties = new LinkedHashMap<String, String>();
+    for (String key : outputProperties.getKeySet()) {
+      properties.put(key, outputProperties.get(key));
+    }
+
+    Writer writer = null;
+    try {
+      writer = new BufferedWriter(new FileWriter(outputFileStr));
+      JSONUtils.writePropsNoJarDependency(properties, writer);
+    } catch (Exception e) {
+    } finally {
+      if (writer != null) {
+        try {
+          writer.close();
+        } catch (IOException e) {
+        }
+      }
+    }
+  }
+
+  public void cancelJob() {
+    if (_isFinished) {
+      return;
+    }
+    _logger.info("Attempting to call cancel on this job");
+    if (_javaObject == null) {
+      return;
+    }
+
+    Method method = null;
+    try {
+      method = _javaObject.getClass().getMethod(_cancelMethod);
+    } catch (SecurityException e) {
+    } catch (NoSuchMethodException e) {
+    }
+
+    if (method != null) {
+      try {
+        method.invoke(_javaObject);
+      } catch (Exception e) {
+        if (_logger != null) {
+          _logger.error("Cancel method failed! ", e);
+        }
+      }
+    } else {
+      throw new RuntimeException("Job " + _jobName
+          + " does not have cancel method " + _cancelMethod);
+    }
+  }
+
+  private static Object getObjectAsProxyUser(final Properties props,
+      final Logger logger, final String jobName, final String className)
+      throws Exception {
+    Object obj =
+        SecurityUtils.getProxiedUser(props, logger, new Configuration()).doAs(
+            new PrivilegedExceptionAction<Object>() {
+              @Override
+              public Object run() throws Exception {
+                return getObject(jobName, className, props, logger);
+              }
+            });
+
+    return obj;
+  }
+
+  private static Object getObject(String jobName, String className,
+      Properties properties, Logger logger) throws Exception {
+
+    Class<?> runningClass =
+        JavaJobRunnerMain.class.getClassLoader().loadClass(className);
+
+    if (runningClass == null) {
+      throw new Exception("Class " + className
+          + " was not found. Cannot run job.");
+    }
+
+    Class<?> propsClass = null;
+    for (String propClassName : PROPS_CLASSES) {
+      try {
+        propsClass =
+            JavaJobRunnerMain.class.getClassLoader().loadClass(propClassName);
+      } catch (ClassNotFoundException e) {
+      }
+
+      if (propsClass != null
+          && getConstructor(runningClass, String.class, propsClass) != null) {
+        // is this the props class
+        break;
+      }
+      propsClass = null;
+    }
+
+    Object obj = null;
+    if (propsClass != null
+        && getConstructor(runningClass, String.class, propsClass) != null) {
+      // Create props class
+      Constructor<?> propsCon =
+          getConstructor(propsClass, propsClass, Properties[].class);
+      Object props =
+          propsCon.newInstance(null, new Properties[] { properties });
+
+      Constructor<?> con =
+          getConstructor(runningClass, String.class, propsClass);
+      logger.info("Constructor found " + con.toGenericString());
+      obj = con.newInstance(jobName, props);
+    } else if (getConstructor(runningClass, String.class, Properties.class) != null) {
+      Constructor<?> con =
+          getConstructor(runningClass, String.class, Properties.class);
+      logger.info("Constructor found " + con.toGenericString());
+      obj = con.newInstance(jobName, properties);
+    } else if (getConstructor(runningClass, String.class, Map.class) != null) {
+      Constructor<?> con =
+          getConstructor(runningClass, String.class, Map.class);
+      logger.info("Constructor found " + con.toGenericString());
+
+      HashMap<Object, Object> map = new HashMap<Object, Object>();
+      for (Map.Entry<Object, Object> entry : properties.entrySet()) {
+        map.put(entry.getKey(), entry.getValue());
+      }
+      obj = con.newInstance(jobName, map);
+    } else if (getConstructor(runningClass, String.class) != null) {
+      Constructor<?> con = getConstructor(runningClass, String.class);
+      logger.info("Constructor found " + con.toGenericString());
+      obj = con.newInstance(jobName);
+    } else if (getConstructor(runningClass) != null) {
+      Constructor<?> con = getConstructor(runningClass);
+      logger.info("Constructor found " + con.toGenericString());
+      obj = con.newInstance();
+    } else {
+      logger.error("Constructor not found. Listing available Constructors.");
+      for (Constructor<?> c : runningClass.getConstructors()) {
+        logger.info(c.toGenericString());
+      }
+    }
+    return obj;
+  }
+
+  private static Constructor<?> getConstructor(Class<?> c, Class<?>... args) {
+    try {
+      Constructor<?> cons = c.getConstructor(args);
+      return cons;
+    } catch (NoSuchMethodException e) {
+      return null;
+    }
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/AbstractHadoopJob.java 340(+340 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/AbstractHadoopJob.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/AbstractHadoopJob.java
new file mode 100644
index 0000000..1380b0e
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/AbstractHadoopJob.java
@@ -0,0 +1,340 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.javautils;
+
+import java.io.IOException;
+import java.io.File;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.filecache.DistributedCache;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.Counters.Counter;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobID;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.TaskReport;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.log4j.Logger;
+
+import azkaban.jobtype.MapReduceJobState;
+import azkaban.jobtype.StatsUtils;
+import azkaban.utils.Props;
+import azkaban.utils.JSONUtils;
+
+import static azkaban.security.commons.SecurityUtils.MAPREDUCE_JOB_CREDENTIALS_BINARY;
+import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
+
+public abstract class AbstractHadoopJob {
+  private static final Logger logger = Logger
+      .getLogger(AbstractHadoopJob.class);
+
+  public static String COMMON_FILE_DATE_PATTERN = "yyyy-MM-dd-HH-mm";
+  public static final String HADOOP_PREFIX = "hadoop-conf.";
+
+  private RunningJob runningJob;
+  private final Props props;
+  private final String jobName;
+
+  private JobConf jobconf;
+  private JobClient jobClient;
+  private Configuration conf;
+
+  private boolean visualizer;
+  private MapReduceJobState mapReduceJobState;
+  private String jobStatsFileName;
+
+  public AbstractHadoopJob(String name, Props props) {
+    this.props = props;
+    this.jobName = name;
+    conf = new Configuration();
+    jobconf = new JobConf(conf);
+    jobconf.setJobName(name);
+
+    visualizer = props.getBoolean("mr.listener.visualizer", false) == true;
+    if (visualizer == true) {
+      jobStatsFileName = props.getString("azkaban.job.attachment.file");
+    }
+  }
+
+  public JobConf getJobConf() {
+    return jobconf;
+  }
+
+  public Configuration getConf() {
+    return conf;
+  }
+
+  public String getJobName() {
+    return this.jobName;
+  }
+
+  public void run() throws Exception {
+    JobConf conf = getJobConf();
+
+    if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
+      conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY,
+          System.getenv(HADOOP_TOKEN_FILE_LOCATION));
+    }
+
+    jobClient = new JobClient(conf);
+    runningJob = jobClient.submitJob(conf);
+    logger.info("See " + runningJob.getTrackingURL() + " for details.");
+    jobClient.monitorAndPrintJob(conf, runningJob);
+
+    if (!runningJob.isSuccessful()) {
+      throw new Exception("Hadoop job:" + getJobName() + " failed!");
+    }
+
+    // dump all counters
+    Counters counters = runningJob.getCounters();
+    for (String groupName : counters.getGroupNames()) {
+      Counters.Group group = counters.getGroup(groupName);
+      logger.info("Group: " + group.getDisplayName());
+      for (Counter counter : group)
+        logger.info(counter.getDisplayName() + ":\t" + counter.getValue());
+    }
+    updateMapReduceJobState(conf);
+  }
+
+  @SuppressWarnings("rawtypes")
+  public JobConf createJobConf(Class<? extends Mapper> mapperClass)
+      throws IOException, URISyntaxException {
+    JobConf conf = createJobConf(mapperClass, null);
+    conf.setNumReduceTasks(0);
+    return conf;
+  }
+
+  @SuppressWarnings("rawtypes")
+  public JobConf createJobConf(Class<? extends Mapper> mapperClass,
+      Class<? extends Reducer> reducerClass,
+      Class<? extends Reducer> combinerClass) throws IOException,
+      URISyntaxException {
+    JobConf conf = createJobConf(mapperClass, reducerClass);
+    conf.setCombinerClass(combinerClass);
+    return conf;
+  }
+
+  @SuppressWarnings("rawtypes")
+  public JobConf createJobConf(Class<? extends Mapper> mapperClass,
+      Class<? extends Reducer> reducerClass) throws IOException,
+      URISyntaxException {
+    JobConf conf = new JobConf();
+    // set custom class loader with custom find resource strategy.
+
+    conf.setJobName(getJobName());
+    conf.setMapperClass(mapperClass);
+    if (reducerClass != null) {
+      conf.setReducerClass(reducerClass);
+    }
+
+    if (props.getBoolean("is.local", false)) {
+      conf.set("mapred.job.tracker", "local");
+      conf.set("fs.default.name", "file:///");
+      conf.set("mapred.local.dir", "/tmp/map-red");
+
+      logger.info("Running locally, no hadoop jar set.");
+    } else {
+      HadoopUtils.setClassLoaderAndJar(conf, getClass());
+      logger.info("Setting hadoop jar file for class:" + getClass()
+          + "  to " + conf.getJar());
+      logger.info("*************************************************************************");
+      logger.info("          Running on Real Hadoop Cluster("
+          + conf.get("mapred.job.tracker") + ")           ");
+      logger.info("*************************************************************************");
+    }
+
+    // set JVM options if present
+    if (props.containsKey("mapred.child.java.opts")) {
+      conf.set("mapred.child.java.opts",
+          props.getString("mapred.child.java.opts"));
+      logger.info("mapred.child.java.opts set to "
+          + props.getString("mapred.child.java.opts"));
+    }
+
+    // set input and output paths if they are present
+    if (props.containsKey("input.paths")) {
+      List<String> inputPaths = props.getStringList("input.paths");
+      if (inputPaths.size() == 0)
+        throw new IllegalArgumentException(
+            "Must specify at least one value for property 'input.paths'");
+      for (String path : inputPaths) {
+        HadoopUtils.addAllSubPaths(conf, new Path(path));
+      }
+    }
+
+    if (props.containsKey("output.path")) {
+      String location = props.get("output.path");
+      FileOutputFormat.setOutputPath(conf, new Path(location));
+
+      // For testing purpose only remove output file if exists
+      if (props.getBoolean("force.output.overwrite", false)) {
+        FileSystem fs =
+            FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
+        fs.delete(FileOutputFormat.getOutputPath(conf), true);
+      }
+    }
+
+    // Adds External jars to hadoop classpath
+    String externalJarList = props.getString("hadoop.external.jarFiles", null);
+    if (externalJarList != null) {
+      FileSystem fs = FileSystem.get(conf);
+      String[] jarFiles = externalJarList.split(",");
+      for (String jarFile : jarFiles) {
+        logger.info("Adding extenral jar File:" + jarFile);
+        DistributedCache.addFileToClassPath(new Path(jarFile), conf, fs);
+      }
+    }
+
+    // Adds distributed cache files
+    String cacheFileList = props.getString("hadoop.cache.files", null);
+    if (cacheFileList != null) {
+      String[] cacheFiles = cacheFileList.split(",");
+      for (String cacheFile : cacheFiles) {
+        logger.info("Adding Distributed Cache File:" + cacheFile);
+        DistributedCache.addCacheFile(new URI(cacheFile), conf);
+      }
+    }
+
+    // Adds distributed cache files
+    String archiveFileList = props.getString("hadoop.cache.archives", null);
+    if (archiveFileList != null) {
+      String[] archiveFiles = archiveFileList.split(",");
+      for (String archiveFile : archiveFiles) {
+        logger.info("Adding Distributed Cache Archive File:" + archiveFile);
+        DistributedCache.addCacheArchive(new URI(archiveFile), conf);
+      }
+    }
+
+    String hadoopCacheJarDir =
+        props.getString("hdfs.default.classpath.dir", null);
+    if (hadoopCacheJarDir != null) {
+      FileSystem fs = FileSystem.get(conf);
+      if (fs != null) {
+        FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir));
+
+        if (status != null) {
+          for (int i = 0; i < status.length; ++i) {
+            if (!status[i].isDir()) {
+              Path path =
+                  new Path(hadoopCacheJarDir, status[i].getPath().getName());
+              logger.info("Adding Jar to Distributed Cache Archive File:"
+                  + path);
+
+              DistributedCache.addFileToClassPath(path, conf, fs);
+            }
+          }
+        } else {
+          logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir
+              + " is empty.");
+        }
+      } else {
+        logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir
+            + " filesystem doesn't exist");
+      }
+    }
+
+    for (String key : getProps().getKeySet()) {
+      String lowerCase = key.toLowerCase();
+      if (lowerCase.startsWith(HADOOP_PREFIX)) {
+        String newKey = key.substring(HADOOP_PREFIX.length());
+        conf.set(newKey, getProps().get(key));
+      }
+    }
+
+    HadoopUtils.setPropsInJob(conf, getProps());
+
+    // put in tokens
+    if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
+      conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY,
+          System.getenv(HADOOP_TOKEN_FILE_LOCATION));
+    }
+
+    return conf;
+  }
+
+  public Props getProps() {
+    return this.props;
+  }
+
+  public void cancel() throws Exception {
+    if (runningJob != null) {
+      runningJob.killJob();
+    }
+  }
+
+  private void updateMapReduceJobState(JobConf jobConf) {
+    if (runningJob == null || visualizer == false) {
+      return;
+    }
+
+    try {
+      JobID jobId = runningJob.getID();
+      TaskReport[] mapTaskReport = jobClient.getMapTaskReports(jobId);
+      TaskReport[] reduceTaskReport = jobClient.getReduceTaskReports(jobId);
+      mapReduceJobState =
+          new MapReduceJobState(runningJob, mapTaskReport, reduceTaskReport);
+      writeMapReduceJobState(jobConf);
+    } catch (IOException e) {
+      logger.error("Cannot update MapReduceJobState");
+    }
+  }
+
+  private Object statsToJson(JobConf jobConf) {
+    List<Object> jsonObj = new ArrayList<Object>();
+    Map<String, Object> jobJsonObj = new HashMap<String, Object>();
+    Properties conf = StatsUtils.getJobConf(jobConf);
+    jobJsonObj.put("state", mapReduceJobState.toJson());
+    jobJsonObj.put("conf", StatsUtils.propertiesToJson(conf));
+    jsonObj.add(jobJsonObj);
+    return jsonObj;
+  }
+
+  private void writeMapReduceJobState(JobConf jobConf) {
+    File mrStateFile = null;
+    try {
+      mrStateFile = new File(jobStatsFileName);
+      JSONUtils.toJSON(statsToJson(jobConf), mrStateFile);
+    } catch (Exception e) {
+      logger.error("Cannot write JSON file.");
+    }
+  }
+
+  public double getProgress() throws IOException {
+    if (runningJob == null) {
+      return 0.0;
+    }
+    return (double) (runningJob.mapProgress() + runningJob.reduceProgress()) / 2.0d;
+  }
+
+  public Counters getCounters() throws IOException {
+    return runningJob.getCounters();
+  }
+
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/FileUtils.java 86(+86 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/FileUtils.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/FileUtils.java
new file mode 100644
index 0000000..74094ac
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/FileUtils.java
@@ -0,0 +1,86 @@
+package azkaban.jobtype.javautils;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.commons.io.filefilter.AndFileFilter;
+import org.apache.commons.io.filefilter.FileFileFilter;
+import org.apache.commons.io.filefilter.WildcardFileFilter;
+import org.apache.log4j.Logger;
+
+
+public class FileUtils {
+  private static Logger logger = Logger.getLogger(FileUtils.class);
+
+  /**
+   * Delete file or directory.
+   * (Apache FileUtils.deleteDirectory has a bug and is not working.)
+   *
+   * @param file
+   * @throws IOException
+   */
+  public static void deleteFileOrDirectory(File file) throws IOException {
+    if (!file.isDirectory()) {
+      file.delete();
+      return;
+    }
+
+    if (file.list().length == 0) { //Nothing under directory. Just delete it.
+      file.delete();
+      return;
+    }
+
+    for (String temp : file.list()) { //Delete files or directory under current directory.
+      File fileDelete = new File(file, temp);
+      deleteFileOrDirectory(fileDelete);
+    }
+    //Now there is nothing under directory, delete it.
+    deleteFileOrDirectory(file);
+  }
+
+  public static boolean tryDeleteFileOrDirectory(File file) {
+    try {
+      deleteFileOrDirectory(file);
+      return true;
+    } catch (Exception e) {
+      logger.warn("Failed to delete " + file.getAbsolutePath(), e);
+      return false;
+    }
+  }
+
+  /**
+   * Find files while input can use wildcard * or ?
+   *
+   * @param filesStr File path(s) delimited by delimiter
+   * @param delimiter Separator of file paths.
+   * @return List of absolute path of files
+   */
+  public static Collection<String> listFiles(String filesStr, String delimiter) {
+    ValidationUtils.validateNotEmpty(filesStr, "fileStr");
+
+    List<String> files = new ArrayList<String>();
+    for (String s : filesStr.split(delimiter)) {
+      File f = new File(s);
+      if (!f.getName().contains("*") && !f.getName().contains("?")) {
+        files.add(f.getAbsolutePath());
+        continue;
+      }
+
+      FileFilter fileFilter = new AndFileFilter(new WildcardFileFilter(f.getName()), FileFileFilter.FILE);
+      File parent = f.getParentFile() == null ? f : f.getParentFile();
+      File[] filteredFiles = parent.listFiles(fileFilter);
+      if(filteredFiles == null) {
+        continue;
+      }
+
+      for (File file : filteredFiles) {
+        files.add(file.getAbsolutePath());
+      }
+    }
+    return files;
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/HadoopUtils.java 139(+139 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/HadoopUtils.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/HadoopUtils.java
new file mode 100644
index 0000000..d4ad29e
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/HadoopUtils.java
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.javautils;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.util.Enumeration;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.log4j.Logger;
+
+import azkaban.utils.Props;
+
+public class HadoopUtils {
+
+  private static final Logger logger = Logger.getLogger(HadoopUtils.class);
+
+  public static void setClassLoaderAndJar(JobConf conf, Class<?> jobClass) {
+    conf.setClassLoader(Thread.currentThread().getContextClassLoader());
+    String jar =
+        findContainingJar(jobClass, Thread.currentThread()
+            .getContextClassLoader());
+    if (jar != null) {
+      conf.setJar(jar);
+    }
+  }
+
+  public static String findContainingJar(String fileName, ClassLoader loader) {
+    try {
+      for (Enumeration<?> itr = loader.getResources(fileName); itr
+          .hasMoreElements();) {
+        URL url = (URL) itr.nextElement();
+        logger.info("findContainingJar finds url:" + url);
+        if ("jar".equals(url.getProtocol())) {
+          String toReturn = url.getPath();
+          if (toReturn.startsWith("file:")) {
+            toReturn = toReturn.substring("file:".length());
+          }
+          toReturn = URLDecoder.decode(toReturn, "UTF-8");
+          return toReturn.replaceAll("!.*$", "");
+        }
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+    return null;
+  }
+
+  public static String findContainingJar(Class<?> my_class, ClassLoader loader) {
+    String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
+    return findContainingJar(class_file, loader);
+  }
+
+  public static boolean shouldPathBeIgnored(Path path) throws IOException {
+    return path.getName().startsWith("_");
+  }
+
+  public static JobConf addAllSubPaths(JobConf conf, Path path)
+      throws IOException {
+    if (shouldPathBeIgnored(path)) {
+      throw new IllegalArgumentException(String.format(
+          "Path[%s] should be ignored.", path));
+    }
+
+    final FileSystem fs = path.getFileSystem(conf);
+
+    if (fs.exists(path)) {
+      for (FileStatus status : fs.listStatus(path)) {
+        if (!shouldPathBeIgnored(status.getPath())) {
+          if (status.isDir()) {
+            addAllSubPaths(conf, status.getPath());
+          } else {
+            FileInputFormat.addInputPath(conf, status.getPath());
+          }
+        }
+      }
+    }
+    return conf;
+  }
+
+  public static void setPropsInJob(Configuration conf, Props props) {
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+    try {
+      props.storeFlattened(output);
+      conf.set("azkaban.props", new String(output.toByteArray(), "UTF-8"));
+    } catch (IOException e) {
+      throw new RuntimeException("This is not possible!", e);
+    }
+  }
+
+  public static void saveProps(Props props, String file) throws IOException {
+    Path path = new Path(file);
+
+    FileSystem fs = null;
+    fs = path.getFileSystem(new Configuration());
+
+    saveProps(fs, props, file);
+  }
+
+  public static void saveProps(FileSystem fs, Props props, String file)
+      throws IOException {
+    Path path = new Path(file);
+
+    // create directory if it does not exist.
+    Path parent = path.getParent();
+    if (!fs.exists(parent))
+      fs.mkdirs(parent);
+
+    // write out properties
+    OutputStream output = fs.create(path);
+    try {
+      props.storeFlattened(output);
+    } finally {
+      output.close();
+    }
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/JobUtils.java 17(+17 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/JobUtils.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/JobUtils.java
new file mode 100644
index 0000000..409cb5a
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/JobUtils.java
@@ -0,0 +1,17 @@
+package azkaban.jobtype.javautils;
+
+import org.apache.log4j.ConsoleAppender;
+import org.apache.log4j.Logger;
+import org.apache.log4j.PatternLayout;
+
+public class JobUtils {
+
+  public static Logger initJobLogger() {
+    Logger rootLogger = Logger.getRootLogger();
+    rootLogger.removeAllAppenders();
+    ConsoleAppender appender = new ConsoleAppender(new PatternLayout("%p %m\n"));
+    appender.activateOptions();
+    rootLogger.addAppender(appender);
+    return rootLogger;
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/ValidationUtils.java 88(+88 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/ValidationUtils.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/ValidationUtils.java
new file mode 100644
index 0000000..73c886d
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/ValidationUtils.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2014-2016 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.javautils;
+
+import java.util.Arrays;
+import java.util.Objects;
+
+import org.apache.commons.lang.StringUtils;
+
+import azkaban.utils.Props;
+
+public class ValidationUtils {
+
+  public static void validateNotEmpty(String s, String name) {
+    if(StringUtils.isEmpty(s)) {
+      throw new IllegalArgumentException(name + " cannot be empty.");
+    }
+  }
+
+  /**
+   * Validates if all of the keys exist of none of them exist
+   * @param props
+   * @param keys
+   * @throws IllegalArgumentException only if some of the keys exist
+   */
+  public static void validateAllOrNone(Props props, String... keys) {
+    Objects.requireNonNull(keys);
+
+    boolean allExist = true;
+    boolean someExist = false;
+    for(String key : keys) {
+      Object val = props.get(key);
+      allExist &= val != null;
+      someExist |= val != null;
+    }
+
+    if(someExist && !allExist) {
+      throw new IllegalArgumentException("Either all of properties exist or none of them should exist for " + Arrays.toString(keys));
+    }
+  }
+
+  /**
+   * Validates all keys present in props
+   * @param props
+   * @param keys
+   * @throws UndefinedPropertyException if key does not exist in properties
+   */
+  public static void validateAllNotEmpty(Props props, String... keys) {
+    for(String key : keys) {
+      props.getString(key);
+    }
+  }
+
+  public static void validateAtleastOneNotEmpty(Props props, String... keys) {
+    boolean exist = false;
+    for(String key : keys) {
+      Object val = props.get(key);
+      exist |= val != null;
+    }
+    if(!exist) {
+      throw new IllegalArgumentException("At least one of these keys should exist " + Arrays.toString(keys));
+    }
+  }
+
+  public static void validateSomeValuesNotEmpty(int notEmptyVals, String... vals) {
+    int count = 0;
+    for(String val : vals) {
+      if(!StringUtils.isEmpty(val)) {
+        count++;
+      }
+    }
+    if (count != notEmptyVals) {
+      throw new IllegalArgumentException("Number of not empty vals " + count + " is not desired number " + notEmptyVals);
+    }
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/Whitelist.java 122(+122 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/Whitelist.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/Whitelist.java
new file mode 100644
index 0000000..a96afb2
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/javautils/Whitelist.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2016 LinkedIn Corp. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+ * this file except in compliance with the License. You may obtain a copy of the
+ * License at  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied.
+ */
+package azkaban.jobtype.javautils;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.log4j.Logger;
+
+import azkaban.flow.CommonJobProperties;
+import azkaban.utils.Props;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Sets;
+
+/**
+ * Whitelist util. It uses file (new line separated) to construct whitelist and validates if id is whitelisted.
+ * Main use case is to control users onboarding on connector job types via their "user.to.proxy" value.
+ */
+public class Whitelist {
+  public static final String WHITE_LIST_FILE_PATH_KEY = "whitelist.file.path";
+
+  private static final String PROXY_USER_KEY = "user.to.proxy";
+  private static Logger logger = Logger.getLogger(Whitelist.class);
+
+  private final Set<String> whitelistSet;
+
+  /**
+   * Creates whitelist instance.
+   *
+   * @param whitelistFilePath
+   * @param fs
+   * @param pollingPeriodSec
+   */
+  public Whitelist(String whitelistFilePath, FileSystem fs) {
+    this.whitelistSet = retrieveWhitelist(fs, new Path(whitelistFilePath));
+    if(logger.isDebugEnabled()) {
+      logger.debug("Whitelist: " + whitelistSet);
+    }
+  }
+
+  public Whitelist(Props props, FileSystem fs) {
+    this(props.getString(WHITE_LIST_FILE_PATH_KEY), fs);
+  }
+
+  /**
+   * Checks if id is in whitelist.
+   * @param id
+   * @throws UnsupportedOperationException if id is not whitelisted
+   */
+  public void validateWhitelisted(String id) {
+    if (whitelistSet.contains(id)) {
+      return;
+    }
+    throw new UnsupportedOperationException(id + " is not authorized");
+  }
+
+  /**
+   * Use proxy user or submit user(if proxy user does not exist) from property and check if it is whitelisted.
+   * @param props
+   * @return
+   */
+  public void validateWhitelisted(Props props) {
+    String id = null;
+    if (props.containsKey(PROXY_USER_KEY)) {
+      id = props.get(PROXY_USER_KEY);
+      Preconditions.checkArgument(!StringUtils.isEmpty(id), PROXY_USER_KEY + " is required.");
+    } else if (props.containsKey(CommonJobProperties.SUBMIT_USER)) {
+      id = props.get(CommonJobProperties.SUBMIT_USER);
+      Preconditions.checkArgument(!StringUtils.isEmpty(id), CommonJobProperties.SUBMIT_USER + " is required.");
+    } else {
+      throw new IllegalArgumentException("Property neither has " + PROXY_USER_KEY + " nor " + CommonJobProperties.SUBMIT_USER);
+    }
+    validateWhitelisted(id);
+  }
+
+  /**
+   * Updates whitelist if there's any change. If it needs to update whitelist, it enforces writelock to make sure
+   * there's an exclusive access on shared variables.
+   *
+   */
+  @VisibleForTesting
+  Set<String> retrieveWhitelist(FileSystem fs, Path path) {
+    try {
+      Preconditions.checkArgument(fs.exists(path), "File does not exist at " + path);
+      Preconditions.checkArgument(fs.isFile(path), "Whitelist path is not a file. " + path);
+
+      Set<String> result = Sets.newHashSet();
+      try (BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(path),
+          StandardCharsets.UTF_8))) {
+        String s = null;
+        while (!StringUtils.isEmpty((s = br.readLine()))) {
+          result.add(s);
+        }
+      }
+      return result;
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "Whitelist [whitelistSet=" + whitelistSet + "]";
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JobDagNode.java 140(+140 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JobDagNode.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JobDagNode.java
new file mode 100644
index 0000000..0afb5c0
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/JobDagNode.java
@@ -0,0 +1,140 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+public class JobDagNode {
+  protected String name;
+
+  protected List<String> parents = new ArrayList<String>();
+  protected List<String> successors = new ArrayList<String>();
+
+  protected MapReduceJobState mapReduceJobState;
+  protected Properties jobConfiguration;
+
+  protected int level = 0;
+
+  public JobDagNode() {
+  }
+
+  public JobDagNode(String name) {
+    this.name = name;
+  }
+
+  public String getName() {
+    return name;
+  }
+
+  public void setName(String name) {
+    this.name = name;
+  }
+
+  public int getLevel() {
+    return level;
+  }
+
+  public void setLevel(int level) {
+    this.level = level;
+  }
+
+  public void addParent(JobDagNode parent) {
+    parents.add(parent.getName());
+  }
+
+  public void setParents(List<String> parents) {
+    this.parents = parents;
+  }
+
+  public List<String> getParents() {
+    return parents;
+  }
+
+  public void addSuccessor(JobDagNode successor) {
+    successors.add(successor.getName());
+  }
+
+  public void setSuccessors(List<String> successors) {
+    this.successors = successors;
+  }
+
+  public List<String> getSuccessors() {
+    return successors;
+  }
+
+  public void setMapReduceJobState(MapReduceJobState mapReduceJobState) {
+    this.mapReduceJobState = mapReduceJobState;
+  }
+
+  public MapReduceJobState getMapReduceJobState() {
+    return mapReduceJobState;
+  }
+
+  public void setJobConfiguration(Properties jobConfiguration) {
+    this.jobConfiguration = jobConfiguration;
+  }
+
+  public Properties getJobConfiguration() {
+    return jobConfiguration;
+  }
+
+  public Object toJson() {
+    Map<String, Object> jsonObj = new HashMap<String, Object>();
+    jsonObj.put("name", name);
+    jsonObj.put("level", Integer.toString(level));
+    jsonObj.put("parents", parents);
+    jsonObj.put("successors", successors);
+    if (jobConfiguration != null) {
+      jsonObj.put("jobConfiguration",
+          StatsUtils.propertiesToJson(jobConfiguration));
+    }
+    if (mapReduceJobState != null) {
+      jsonObj.put("mapReduceJobState", mapReduceJobState.toJson());
+    }
+    return jsonObj;
+  }
+
+  @SuppressWarnings("unchecked")
+  public static JobDagNode fromJson(Object obj) throws Exception {
+    Map<String, Object> jsonObj = (HashMap<String, Object>) obj;
+    String name = (String) jsonObj.get("name");
+
+    JobDagNode node = new JobDagNode(name);
+    node.setParents((ArrayList<String>) jsonObj.get("parents"));
+    node.setSuccessors((ArrayList<String>) jsonObj.get("successors"));
+    node.setLevel(Integer.parseInt((String) jsonObj.get("level")));
+
+    // Grab configuration if it is available.
+    if (jsonObj.containsKey("jobConfiguration")) {
+      node.setJobConfiguration(StatsUtils.propertiesFromJson(jsonObj
+          .get("jobConfiguration")));
+    }
+
+    // Grab MapReduceJobState.
+    if (jsonObj.containsKey("mapReduceJobState")) {
+      MapReduceJobState mapReduceJobState =
+          MapReduceJobState.fromJson(jsonObj.get("mapReduceJobState"));
+      node.setMapReduceJobState(mapReduceJobState);
+    }
+
+    return node;
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/MapReduceJobState.java 301(+301 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/MapReduceJobState.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/MapReduceJobState.java
new file mode 100644
index 0000000..670bd80
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/MapReduceJobState.java
@@ -0,0 +1,301 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.hadoop.mapred.TIPStatus;
+import org.apache.hadoop.mapred.TaskReport;
+
+/**
+ * Container that holds state of a MapReduce job
+ */
+public class MapReduceJobState {
+  private String jobId;
+  private String jobName;
+  private String trackingURL;
+  private String failureInfo;
+  private boolean isComplete;
+  private boolean isSuccessful;
+  private float mapProgress;
+  private float reduceProgress;
+  private long jobStartTime;
+  private long jobLastUpdateTime;
+
+  private int totalMappers;
+  private int finishedMappersCount;
+
+  private int totalReducers;
+  private int finishedReducersCount;
+
+  private Counters counters;
+
+  public MapReduceJobState() {
+  }
+
+  public MapReduceJobState(String jobId, String jobName, String trackingURL,
+      String failureInfo, boolean isComplete, boolean isSuccessful,
+      float mapProgress, float reduceProgress, long jobStartTime,
+      long jobLastUpdateTime, int totalMappers, int finishedMappersCount,
+      int totalReducers, int finishedReducersCount, Counters counters) {
+    this.jobId = jobId;
+    this.jobName = jobName;
+    this.trackingURL = trackingURL;
+    this.failureInfo = failureInfo;
+    this.isComplete = isComplete;
+    this.isSuccessful = isSuccessful;
+    this.mapProgress = mapProgress;
+    this.reduceProgress = reduceProgress;
+    this.jobStartTime = jobStartTime;
+    this.jobLastUpdateTime = jobLastUpdateTime;
+    this.totalMappers = totalMappers;
+    this.finishedMappersCount = finishedMappersCount;
+    this.totalReducers = totalReducers;
+    this.finishedReducersCount = finishedReducersCount;
+    this.counters = counters;
+  }
+
+  public MapReduceJobState(RunningJob runningJob, TaskReport[] mapTaskReport,
+      TaskReport[] reduceTaskReport) throws IOException {
+    jobId = runningJob.getID().toString();
+    jobName = runningJob.getJobName();
+    trackingURL = runningJob.getTrackingURL();
+    isComplete = runningJob.isComplete();
+    isSuccessful = runningJob.isSuccessful();
+    mapProgress = runningJob.mapProgress();
+    reduceProgress = runningJob.reduceProgress();
+    failureInfo = runningJob.getFailureInfo();
+
+    totalMappers = mapTaskReport.length;
+    totalReducers = reduceTaskReport.length;
+
+    for (TaskReport report : mapTaskReport) {
+      if (report.getStartTime() < jobStartTime || jobStartTime == 0L) {
+        jobStartTime = report.getStartTime();
+      }
+
+      TIPStatus status = report.getCurrentStatus();
+      if (status != TIPStatus.PENDING && status != TIPStatus.RUNNING) {
+        finishedMappersCount++;
+      }
+    }
+
+    for (TaskReport report : reduceTaskReport) {
+      if (jobLastUpdateTime < report.getFinishTime()) {
+        jobLastUpdateTime = report.getFinishTime();
+      }
+
+      TIPStatus status = report.getCurrentStatus();
+      if (status != TIPStatus.PENDING && status != TIPStatus.RUNNING) {
+        finishedReducersCount++;
+      }
+    }
+
+    // If not all the reducers are finished.
+    if (finishedReducersCount != reduceTaskReport.length
+        || jobLastUpdateTime == 0) {
+      jobLastUpdateTime = System.currentTimeMillis();
+    }
+
+    counters = runningJob.getCounters();
+  }
+
+  public String getJobId() {
+    return jobId;
+  }
+
+  public void setJobId(String jobId) {
+    this.jobId = jobId;
+  }
+
+  public String getJobName() {
+    return jobName;
+  }
+
+  public void setJobName(String jobName) {
+    this.jobName = jobName;
+  }
+
+  public String getTrackingURL() {
+    return trackingURL;
+  }
+
+  public void setTrackingURL(String trackingURL) {
+    this.trackingURL = trackingURL;
+  }
+
+  public String getFailureInfo() {
+    return failureInfo;
+  }
+
+  public void setFailureInfo(String failureInfo) {
+    this.failureInfo = failureInfo;
+  }
+
+  public boolean isComplete() {
+    return isComplete;
+  }
+
+  public void setComplete(boolean complete) {
+    isComplete = complete;
+  }
+
+  public boolean isSuccessful() {
+    return isSuccessful;
+  }
+
+  public void setSuccessful(boolean successful) {
+    isSuccessful = successful;
+  }
+
+  public float getMapProgress() {
+    return mapProgress;
+  }
+
+  public void setMapProgress(float mapProgress) {
+    this.mapProgress = mapProgress;
+  }
+
+  public float getReduceProgress() {
+    return reduceProgress;
+  }
+
+  public void setReduceProgress(float reduceProgress) {
+    this.reduceProgress = reduceProgress;
+  }
+
+  public int getTotalMappers() {
+    return totalMappers;
+  }
+
+  public void setTotalMappers(int totalMappers) {
+    this.totalMappers = totalMappers;
+  }
+
+  public int getTotalReducers() {
+    return totalReducers;
+  }
+
+  public void setTotalReducers(int totalReducers) {
+    this.totalReducers = totalReducers;
+  }
+
+  public int getFinishedMappersCount() {
+    return finishedMappersCount;
+  }
+
+  public void setFinishedMappersCount(int finishedMappersCount) {
+    this.finishedMappersCount = finishedMappersCount;
+  }
+
+  public int getFinishedReducersCount() {
+    return finishedReducersCount;
+  }
+
+  public void setFinishedReducersCount(int finishedReducersCount) {
+    this.finishedReducersCount = finishedReducersCount;
+  }
+
+  public long getJobStartTime() {
+    return jobStartTime;
+  }
+
+  public void setJobStartTime(long jobStartTime) {
+    this.jobStartTime = jobStartTime;
+  }
+
+  public long getJobLastUpdateTime() {
+    return jobLastUpdateTime;
+  }
+
+  public void setJobLastUpdateTime(long jobLastUpdateTime) {
+    this.jobLastUpdateTime = jobLastUpdateTime;
+  }
+
+  public Counters getCounters() {
+    return this.counters;
+  }
+
+  public void setCounters(Counters counters) {
+    this.counters = counters;
+  }
+
+  public Object toJson() {
+    Map<String, Object> jsonObj = new HashMap<String, Object>();
+    jsonObj.put("jobId", jobId);
+    jsonObj.put("jobName", jobName);
+    jsonObj.put("trackingURL", trackingURL);
+    jsonObj.put("failureInfo", failureInfo);
+    jsonObj.put("isComplete", String.valueOf(isComplete));
+    jsonObj.put("isSuccessful", String.valueOf(isSuccessful));
+    jsonObj.put("mapProgress", String.valueOf(mapProgress));
+    jsonObj.put("reduceProgress", String.valueOf(reduceProgress));
+    jsonObj.put("jobStartTime", String.valueOf(jobStartTime));
+    jsonObj.put("jobLastUpdateTime", String.valueOf(jobLastUpdateTime));
+
+    jsonObj.put("totalMappers", String.valueOf(totalMappers));
+    jsonObj.put("finishedMappersCount", String.valueOf(finishedMappersCount));
+
+    jsonObj.put("totalReducers", String.valueOf(totalReducers));
+    jsonObj.put("finishedReducersCount", String.valueOf(finishedReducersCount));
+
+    jsonObj.put("counters", StatsUtils.countersToJson(counters));
+    String countersString =
+        (counters != null) ? counters.makeEscapedCompactString() : "";
+    jsonObj.put("countersString", countersString);
+    return jsonObj;
+  }
+
+  @SuppressWarnings("unchecked")
+  public static MapReduceJobState fromJson(Object obj) throws Exception {
+    Map<String, Object> jsonObj = (HashMap<String, Object>) obj;
+    String jobId = (String) jsonObj.get("jobId");
+    String jobName = (String) jsonObj.get("jobName");
+    String trackingUrl = (String) jsonObj.get("trackingURL");
+    boolean isComplete =
+        Boolean.parseBoolean((String) jsonObj.get("isComplete"));
+    boolean isSuccessful =
+        Boolean.parseBoolean((String) jsonObj.get("isSuccessful"));
+    String failureInfo = (String) jsonObj.get("failureInfo");
+    float mapProgress = Float.parseFloat((String) jsonObj.get("mapProgress"));
+    float reduceProgress =
+        Float.parseFloat((String) jsonObj.get("reduceProgress"));
+    long jobStartTime = Long.parseLong((String) jsonObj.get("jobStartTime"));
+    long jobLastUpdateTime =
+        Long.parseLong((String) jsonObj.get("jobLastUpdateTime"));
+
+    int totalMappers = Integer.parseInt((String) jsonObj.get("totalMappers"));
+    int finishedMappersCount =
+        Integer.parseInt((String) jsonObj.get("finishedMappersCount"));
+
+    int totalReducers = Integer.parseInt((String) jsonObj.get("totalReducers"));
+    int finishedReducersCount =
+        Integer.parseInt((String) jsonObj.get("finishedReducersCount"));
+
+    String countersString = (String) jsonObj.get("countersString");
+    Counters counters = Counters.fromEscapedCompactString(countersString);
+
+    return new MapReduceJobState(jobId, jobName, trackingUrl, failureInfo,
+        isComplete, isSuccessful, mapProgress, reduceProgress, jobStartTime,
+        jobLastUpdateTime, totalMappers, finishedMappersCount, totalReducers,
+        finishedReducersCount, counters);
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigIoStats.java 87(+87 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigIoStats.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigIoStats.java
new file mode 100644
index 0000000..2fe1f68
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigIoStats.java
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.pig;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.pig.tools.pigstats.InputStats;
+import org.apache.pig.tools.pigstats.OutputStats;
+
+public class PigIoStats {
+  private long bytes;
+  private long records;
+  private String location;
+  private String name;
+
+  public PigIoStats(OutputStats stats) {
+    this.bytes = stats.getBytes();
+    this.records = stats.getNumberRecords();
+    this.name = stats.getName();
+    this.location = stats.getLocation();
+  }
+
+  public PigIoStats(InputStats stats) {
+    this.bytes = stats.getBytes();
+    this.records = stats.getNumberRecords();
+    this.name = stats.getName();
+    this.location = stats.getLocation();
+  }
+
+  public PigIoStats(String name, String location, long bytes, long records) {
+    this.bytes = bytes;
+    this.records = records;
+    this.name = name;
+    this.location = location;
+  }
+
+  public long getBytes() {
+    return this.bytes;
+  }
+
+  public long getNumberRecords() {
+    return this.records;
+  }
+
+  public String getLocation() {
+    return this.location;
+  }
+
+  public String getName() {
+    return this.name;
+  }
+
+  public Object toJson() {
+    Map<String, String> jsonObj = new HashMap<String, String>();
+    jsonObj.put("bytes", Long.toString(bytes));
+    jsonObj.put("location", location);
+    jsonObj.put("name", name);
+    jsonObj.put("numberRecords", Long.toString(records));
+    return jsonObj;
+  }
+
+  public static PigIoStats fromJson(Object obj) {
+    @SuppressWarnings("unchecked")
+    Map<String, Object> jsonObj = (HashMap<String, Object>) obj;
+
+    String name = (String) jsonObj.get("name");
+    long bytes = Long.parseLong((String) jsonObj.get("bytes"));
+    long records = Long.parseLong((String) jsonObj.get("numberRecords"));
+    String location = (String) jsonObj.get("location");
+    return new PigIoStats(name, location, bytes, records);
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigJobDagNode.java 137(+137 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigJobDagNode.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigJobDagNode.java
new file mode 100644
index 0000000..5f364b9
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigJobDagNode.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.pig;
+
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import azkaban.jobtype.JobDagNode;
+import azkaban.jobtype.MapReduceJobState;
+import azkaban.jobtype.StatsUtils;
+
+import org.apache.pig.tools.pigstats.JobStats;
+
+public class PigJobDagNode extends JobDagNode {
+  private String jobId;
+
+  private List<String> features;
+  private List<String> aliases;
+
+  private PigJobStats jobStats;
+
+  public PigJobDagNode(String name, String[] aliases, String[] features) {
+    super(name);
+    this.aliases = Arrays.asList(aliases);
+    this.features = Arrays.asList(features);
+  }
+
+  public PigJobDagNode(String name, List<String> aliases, List<String> features) {
+    super(name);
+    this.aliases = aliases;
+    this.features = features;
+  }
+
+  public String getJobId() {
+    return jobId;
+  }
+
+  public void setJobId(String jobId) {
+    this.jobId = jobId;
+  }
+
+  public List<String> getAliases() {
+    return aliases;
+  }
+
+  public List<String> getFeatures() {
+    return features;
+  }
+
+  public void setJobStats(PigJobStats pigJobStats) {
+    this.jobStats = pigJobStats;
+  }
+
+  public void setJobStats(JobStats jobStats) {
+    this.jobStats = new PigJobStats(jobStats);
+  }
+
+  public PigJobStats getJobStats() {
+    return jobStats;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Object toJson() {
+    Map<String, Object> jsonObj = new HashMap<String, Object>();
+    jsonObj.put("name", name);
+    jsonObj.put("jobId", jobId);
+    jsonObj.put("level", Integer.toString(level));
+    jsonObj.put("aliases", Arrays.asList(aliases));
+    jsonObj.put("features", Arrays.asList(features));
+    jsonObj.put("parents", parents);
+    jsonObj.put("successors", successors);
+    if (jobConfiguration != null) {
+      jsonObj.put("jobConfiguration",
+          StatsUtils.propertiesToJson(jobConfiguration));
+    }
+    if (jobStats != null) {
+      jsonObj.put("jobStats", jobStats.toJson());
+    }
+    if (mapReduceJobState != null) {
+      jsonObj.put("mapReduceJobState", mapReduceJobState.toJson());
+    }
+    return jsonObj;
+  }
+
+  @SuppressWarnings("unchecked")
+  public static PigJobDagNode fromJson(Object obj) throws Exception {
+    Map<String, Object> jsonObj = (HashMap<String, Object>) obj;
+    String name = (String) jsonObj.get("name");
+    List<String> aliases = (ArrayList<String>) jsonObj.get("aliases");
+    List<String> features = (ArrayList<String>) jsonObj.get("features");
+
+    PigJobDagNode node = new PigJobDagNode(name, aliases, features);
+    node.setJobId((String) jsonObj.get("jobId"));
+    node.setParents((ArrayList<String>) jsonObj.get("parents"));
+    node.setSuccessors((ArrayList<String>) jsonObj.get("successors"));
+    node.setLevel(Integer.parseInt((String) jsonObj.get("level")));
+
+    // Grab configuration if it is available.
+    if (jsonObj.containsKey("jobConfiguration")) {
+      node.setJobConfiguration(StatsUtils.propertiesFromJson(jsonObj
+          .get("jobConfiguration")));
+    }
+
+    // Grab PigJobStats;
+    if (jsonObj.containsKey("jobStats")) {
+      PigJobStats pigJobStats = PigJobStats.fromJson(jsonObj.get("jobStats"));
+      node.setJobStats(pigJobStats);
+    }
+
+    // Grab MapReduceJobState.
+    if (jsonObj.containsKey("mapReduceJobState")) {
+      MapReduceJobState mapReduceJobState =
+          MapReduceJobState.fromJson(jsonObj.get("mapReduceJobState"));
+      node.setMapReduceJobState(mapReduceJobState);
+    }
+
+    return node;
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigJobStats.java 328(+328 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigJobStats.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigJobStats.java
new file mode 100644
index 0000000..9d42eaf
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/pig/PigJobStats.java
@@ -0,0 +1,328 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype.pig;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.pig.tools.pigstats.JobStats;
+import org.apache.pig.tools.pigstats.InputStats;
+import org.apache.pig.tools.pigstats.OutputStats;
+
+public class PigJobStats {
+  private int numberMaps;
+  private int numberReduces;
+
+  private long minMapTime;
+  private long maxMapTime;
+  private long avgMapTime;
+
+  private long minReduceTime;
+  private long maxReduceTime;
+  private long avgReduceTime;
+
+  private long bytesWritten;
+  private long hdfsBytesWritten;
+
+  private long mapInputRecords;
+  private long mapOutputRecords;
+  private long reduceInputRecords;
+  private long reduceOutputRecords;
+
+  private long proactiveSpillCountObjects;
+  private long proactiveSpillCountRecs;
+
+  private long recordsWritten;
+  private long smmSpillCount;
+
+  private String errorMessage;
+
+  private List<PigIoStats> inputStats;
+  private List<PigIoStats> outputStats;
+
+  public PigJobStats() {
+  }
+
+  public PigJobStats(
+      int numberMaps,
+      int numberReduces,
+      long minMapTime,
+      long maxMapTime,
+      long avgMapTime,
+      long minReduceTime,
+      long maxReduceTime,
+      long avgReduceTime,
+      long bytesWritten,
+      long hdfsBytesWritten,
+      long mapInputRecords,
+      long mapOutputRecords,
+      long reduceInputRecords,
+      long reduceOutputRecords,
+      long proactiveSpillCountObjects,
+      long proactiveSpillCountRecs,
+      long recordsWritten,
+      long smmSpillCount,
+      String errorMessage,
+      List<PigIoStats> inputStats,
+      List<PigIoStats> outputStats) {
+    this.numberMaps = numberMaps;
+    this.numberReduces = numberReduces;
+
+    this.minMapTime = minMapTime;
+    this.maxMapTime = maxMapTime;
+    this.avgMapTime = avgMapTime;
+
+    this.minReduceTime = minReduceTime;
+    this.maxReduceTime = maxReduceTime;
+    this.avgReduceTime = avgReduceTime;
+
+    this.bytesWritten = bytesWritten;
+    this.hdfsBytesWritten = hdfsBytesWritten;
+
+    this.mapInputRecords = mapInputRecords;
+    this.mapOutputRecords = mapOutputRecords;
+    this.reduceInputRecords = reduceInputRecords;
+    this.reduceOutputRecords = reduceOutputRecords;
+
+    this.proactiveSpillCountObjects = proactiveSpillCountObjects;
+    this.proactiveSpillCountRecs = proactiveSpillCountRecs;
+
+    this.recordsWritten = recordsWritten;
+    this.smmSpillCount = smmSpillCount;
+
+    this.errorMessage = errorMessage;
+    this.inputStats = inputStats;
+    this.outputStats = outputStats;
+  }
+
+  public PigJobStats(JobStats stats) {
+    numberMaps = stats.getNumberMaps();
+    minMapTime = stats.getMinMapTime();
+    maxMapTime = stats.getMaxMapTime();
+    avgMapTime = stats.getAvgMapTime();
+
+    numberReduces = stats.getNumberReduces();
+    minReduceTime = stats.getMinReduceTime();
+    maxReduceTime = stats.getMaxReduceTime();
+    avgReduceTime = stats.getAvgREduceTime();
+
+    bytesWritten = stats.getBytesWritten();
+    hdfsBytesWritten = stats.getHdfsBytesWritten();
+
+    mapInputRecords = stats.getMapInputRecords();
+    mapOutputRecords = stats.getMapOutputRecords();
+    reduceInputRecords = stats.getReduceInputRecords();
+    reduceOutputRecords = stats.getReduceOutputRecords();
+
+    proactiveSpillCountObjects = stats.getProactiveSpillCountObjects();
+    proactiveSpillCountRecs = stats.getProactiveSpillCountRecs();
+
+    recordsWritten = stats.getRecordWrittern();
+    smmSpillCount = stats.getSMMSpillCount();
+
+    errorMessage = stats.getErrorMessage();
+
+    List<InputStats> inputs = stats.getInputs();
+    inputStats = new ArrayList<PigIoStats>();
+    for (InputStats input : inputs) {
+      inputStats.add(new PigIoStats(input.getName(), input.getLocation(), input
+          .getBytes(), input.getNumberRecords()));
+    }
+
+    List<OutputStats> outputs = stats.getOutputs();
+    outputStats = new ArrayList<PigIoStats>();
+    for (OutputStats output : outputs) {
+      outputStats.add(new PigIoStats(output.getName(), output.getLocation(),
+          output.getBytes(), output.getNumberRecords()));
+    }
+  }
+
+  public int getNumberMaps() {
+    return numberMaps;
+  }
+
+  public int getNumberReduces() {
+    return numberReduces;
+  }
+
+  public long getMinMapTime() {
+    return minMapTime;
+  }
+
+  public long getMaxMapTime() {
+    return maxMapTime;
+  }
+
+  public long getAvgMapTime() {
+    return avgMapTime;
+  }
+
+  public long getMinReduceTime() {
+    return minReduceTime;
+  }
+
+  public long getMaxReduceTime() {
+    return maxReduceTime;
+  }
+
+  public long getAvgReduceTime() {
+    return avgReduceTime;
+  }
+
+  public long getBytesWritten() {
+    return bytesWritten;
+  }
+
+  public long getHdfsBytesWritten() {
+    return hdfsBytesWritten;
+  }
+
+  public long getMapInputRecords() {
+    return mapInputRecords;
+  }
+
+  public long getMapOutputRecords() {
+    return mapOutputRecords;
+  }
+
+  public long getReduceInputRecords() {
+    return reduceInputRecords;
+  }
+
+  public long getReduceOutputRecords() {
+    return reduceOutputRecords;
+  }
+
+  public long getProactiveSpillCountObjects() {
+    return proactiveSpillCountObjects;
+  }
+
+  public long getProactiveSpillCountRecs() {
+    return proactiveSpillCountRecs;
+  }
+
+  public long getRecordsWritten() {
+    return recordsWritten;
+  }
+
+  public long getSmmSpillCount() {
+    return smmSpillCount;
+  }
+
+  public String getErrorMessage() {
+    return errorMessage;
+  }
+
+  public List<PigIoStats> getInputStats() {
+    return inputStats;
+  }
+
+  public List<PigIoStats> getOutputStats() {
+    return outputStats;
+  }
+
+  private static List<Object> statsToJson(List<PigIoStats> stats) {
+    List<Object> jsonObj = new ArrayList<Object>();
+    for (PigIoStats stat : stats) {
+      jsonObj.add(stat.toJson());
+    }
+    return jsonObj;
+  }
+
+  private static List<PigIoStats> statsFromJson(Object obj) {
+    List<PigIoStats> stats = new ArrayList<PigIoStats>();
+
+    @SuppressWarnings("unchecked")
+    List<Object> jsonStats = (ArrayList<Object>) obj;
+
+    for (Object jsonStat : jsonStats) {
+      stats.add(PigIoStats.fromJson(jsonStat));
+    }
+    return stats;
+  }
+
+  public Object toJson() {
+    Map<String, Object> jsonObj = new HashMap<String, Object>();
+    jsonObj.put("numberMaps", Integer.toString(numberMaps));
+    jsonObj.put("numberReduces", Integer.toString(numberReduces));
+    jsonObj.put("minMapTime", Long.toString(minMapTime));
+    jsonObj.put("maxMapTime", Long.toString(maxMapTime));
+    jsonObj.put("avgMapTime", Long.toString(avgMapTime));
+    jsonObj.put("minReduceTime", Long.toString(minReduceTime));
+    jsonObj.put("maxReduceTime", Long.toString(maxReduceTime));
+    jsonObj.put("avgReduceTime", Long.toString(avgReduceTime));
+    jsonObj.put("bytesWritten", Long.toString(bytesWritten));
+    jsonObj.put("hdfsBytesWritten", Long.toString(hdfsBytesWritten));
+    jsonObj.put("mapInputRecords", Long.toString(mapInputRecords));
+    jsonObj.put("mapOutputRecords", Long.toString(mapOutputRecords));
+    jsonObj.put("reduceInputRecords", Long.toString(reduceInputRecords));
+    jsonObj.put("reduceOutputRecords", Long.toString(reduceOutputRecords));
+    jsonObj.put("proactiveSpillCountObjects",
+        Long.toString(proactiveSpillCountObjects));
+    jsonObj.put("proactiveSpillCountRecs",
+        Long.toString(proactiveSpillCountRecs));
+    jsonObj.put("recordsWritten", Long.toString(recordsWritten));
+    jsonObj.put("smmSpillCount", Long.toString(smmSpillCount));
+    jsonObj.put("errorMessage", errorMessage);
+    jsonObj.put("inputStats", statsToJson(inputStats));
+    jsonObj.put("outputStats", statsToJson(outputStats));
+    return jsonObj;
+  }
+
+  @SuppressWarnings("unchecked")
+  public static PigJobStats fromJson(Object obj) throws Exception {
+    Map<String, Object> jsonObj = (HashMap<String, Object>) obj;
+    int numberMaps = Integer.parseInt((String) jsonObj.get("numberMaps"));
+    int numberReduces = Integer.parseInt((String) jsonObj.get("numberReduces"));
+    long minMapTime = Long.parseLong((String) jsonObj.get("minMapTime"));
+    long maxMapTime = Long.parseLong((String) jsonObj.get("maxMapTime"));
+    long avgMapTime = Long.parseLong((String) jsonObj.get("avgMapTime"));
+    long minReduceTime = Long.parseLong((String) jsonObj.get("minReduceTime"));
+    long maxReduceTime = Long.parseLong((String) jsonObj.get("maxReduceTime"));
+    long avgReduceTime = Long.parseLong((String) jsonObj.get("avgReduceTime"));
+    long bytesWritten = Long.parseLong((String) jsonObj.get("bytesWritten"));
+    long hdfsBytesWritten =
+        Long.parseLong((String) jsonObj.get("hdfsBytesWritten"));
+    long mapInputRecords =
+        Long.parseLong((String) jsonObj.get("mapInputRecords"));
+    long mapOutputRecords =
+        Long.parseLong((String) jsonObj.get("mapOutputRecords"));
+    long reduceInputRecords =
+        Long.parseLong((String) jsonObj.get("reduceInputRecords"));
+    long reduceOutputRecords =
+        Long.parseLong((String) jsonObj.get("reduceOutputRecords"));
+    long proactiveSpillCountObjects =
+        Long.parseLong((String) jsonObj.get("proactiveSpillCountObjects"));
+    long proactiveSpillCountRecs =
+        Long.parseLong((String) jsonObj.get("proactiveSpillCountRecs"));
+    long recordsWritten =
+        Long.parseLong((String) jsonObj.get("recordsWritten"));
+    long smmSpillCount = Long.parseLong((String) jsonObj.get("smmSpillCount"));
+    List<PigIoStats> inputs = statsFromJson(jsonObj.get("inputStats"));
+    List<PigIoStats> outputs = statsFromJson(jsonObj.get("outputStats"));
+    String errorMessage = (String) jsonObj.get("errorMessage");
+
+    return new PigJobStats(numberMaps, numberReduces, minMapTime, maxMapTime,
+        avgMapTime, minReduceTime, maxReduceTime, avgReduceTime, bytesWritten,
+        hdfsBytesWritten, mapInputRecords, mapOutputRecords,
+        reduceInputRecords, reduceOutputRecords, proactiveSpillCountObjects,
+        proactiveSpillCountRecs, recordsWritten, smmSpillCount, errorMessage,
+        inputs, outputs);
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/PigProcessJob.java 258(+258 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/PigProcessJob.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/PigProcessJob.java
new file mode 100644
index 0000000..0f6b0af
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/PigProcessJob.java
@@ -0,0 +1,258 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import azkaban.jobExecutor.JavaProcessJob;
+import azkaban.security.commons.HadoopSecurityManager;
+import azkaban.security.commons.SecurityUtils;
+import azkaban.utils.Props;
+import azkaban.utils.StringUtils;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.StringTokenizer;
+import org.apache.log4j.Logger;
+
+public class PigProcessJob extends JavaProcessJob {
+
+  public static final String PIG_SCRIPT = "pig.script";
+  public static final String UDF_IMPORT = "udf.import.list";
+  public static final String PIG_ADDITIONAL_JARS = "pig.additional.jars";
+  public static final String PIG_PARAM_PREFIX = "param.";
+  public static final String PIG_PARAM_FILES = "paramfile";
+  public static final String HADOOP_UGI = "hadoop.job.ugi";
+  public static final String DEBUG = "debug";
+
+  public static final String PIG_JAVA_CLASS = "org.apache.pig.Main";
+  public static final String SECURE_PIG_WRAPPER =
+      "azkaban.jobtype.SecurePigWrapper";
+
+  public PigProcessJob(final String jobid, final Props sysProps, final Props jobProps,
+      final Logger log) {
+    super(jobid, sysProps, new Props(sysProps, jobProps), log);
+  }
+
+  private static String getSourcePathFromClass(final Class<?> containedClass) {
+    File file =
+        new File(containedClass.getProtectionDomain().getCodeSource()
+            .getLocation().getPath());
+
+    if (!file.isDirectory() && file.getName().endsWith(".class")) {
+      final String name = containedClass.getName();
+      final StringTokenizer tokenizer = new StringTokenizer(name, ".");
+      while (tokenizer.hasMoreTokens()) {
+        tokenizer.nextElement();
+        file = file.getParentFile();
+      }
+
+      return file.getPath();
+    } else {
+      return containedClass.getProtectionDomain().getCodeSource().getLocation()
+          .getPath();
+    }
+  }
+
+  @Override
+  protected String getJavaClass() {
+    return SecurityUtils.shouldProxy(getSysProps().toProperties())
+        ? SECURE_PIG_WRAPPER
+        : PIG_JAVA_CLASS;
+  }
+
+  @Override
+  protected String getJVMArguments() {
+    String args = super.getJVMArguments();
+    final String typeGlobalJVMArgs =
+        getSysProps().getString("jobtype.global.jvm.args", null);
+    if (typeGlobalJVMArgs != null) {
+      args += " " + typeGlobalJVMArgs;
+    }
+
+    final List<String> udfImport = getUDFImportList();
+    if (udfImport != null) {
+      args += " -Dudf.import.list=" + super.createArguments(udfImport, ":");
+    }
+
+    final List<String> additionalJars = getAdditionalJarsList();
+    if (additionalJars.size() > 0) {
+      args +=
+          " -Dpig.additional.jars="
+              + super.createArguments(additionalJars, ":");
+    }
+
+    final String hadoopUGI = getHadoopUGI();
+    if (hadoopUGI != null) {
+      args += " -Dhadoop.job.ugi=" + hadoopUGI;
+    }
+
+    if (SecurityUtils.shouldProxy(getSysProps().toProperties())) {
+      info("Setting up secure proxy info for child process");
+      String secure;
+      final Properties p = getSysProps().toProperties();
+      secure =
+          " -D" + SecurityUtils.PROXY_USER + "="
+              + p.getProperty(SecurityUtils.PROXY_USER);
+      secure +=
+          " -D" + SecurityUtils.PROXY_KEYTAB_LOCATION + "="
+              + p.getProperty(SecurityUtils.PROXY_KEYTAB_LOCATION);
+
+      secure +=
+          " -D" + HadoopSecurityManager.USER_TO_PROXY + "="
+              + getJobProps().get(HadoopSecurityManager.USER_TO_PROXY);
+
+      final String extraToken = p.getProperty(SecurityUtils.OBTAIN_BINARY_TOKEN);
+      if (extraToken != null) {
+        secure += " -D" + SecurityUtils.OBTAIN_BINARY_TOKEN + "=" + extraToken;
+      }
+      info("Secure settings = " + secure);
+      args += secure;
+    } else {
+      info("Not setting up secure proxy info for child process");
+    }
+
+    return args;
+  }
+
+  @Override
+  protected String getMainArguments() {
+    final ArrayList<String> list = new ArrayList<>();
+    final Map<String, String> map = getPigParams();
+    if (map != null) {
+      for (final Map.Entry<String, String> entry : map.entrySet()) {
+        list.add("-param "
+            + StringUtils.shellQuote(entry.getKey() + "=" + entry.getValue(),
+                StringUtils.SINGLE_QUOTE));
+      }
+    }
+
+    final List<String> paramFiles = getPigParamFiles();
+    if (paramFiles != null) {
+      for (final String paramFile : paramFiles) {
+        list.add("-param_file " + paramFile);
+      }
+    }
+
+    if (getDebug()) {
+      list.add("-debug");
+    }
+
+    list.add(getScript());
+
+    return StringUtils.join((Collection<String>) list, " ");
+  }
+
+  @Override
+  @SuppressWarnings("CollectionIncompatibleType")
+  protected List<String> getClassPaths() {
+    final List<String> classPath = super.getClassPaths();
+
+    // Add hadoop home setting.
+    final String hadoopHome = System.getenv("HADOOP_HOME");
+    if (hadoopHome == null) {
+      info("HADOOP_HOME not set, using default hadoop config.");
+    } else {
+      info("Using hadoop config found in " + hadoopHome);
+      classPath.add(new File(hadoopHome, "conf").getPath());
+    }
+
+    classPath.add(getSourcePathFromClass(Props.class));
+    if (SecurityUtils.shouldProxy(getSysProps().toProperties())) {
+      classPath.add(getSourcePathFromClass(SecurePigWrapper.class));
+    }
+
+    final List<String> typeClassPath =
+        getSysProps().getStringList("jobtype.classpath", null, ",");
+    if (typeClassPath != null) {
+      // fill in this when load this jobtype
+      final String pluginDir = getSysProps().get("plugin.dir");
+      for (final String jar : typeClassPath) {
+        File jarFile = new File(jar);
+        if (!jarFile.isAbsolute()) {
+          jarFile = new File(pluginDir + File.separatorChar + jar);
+        }
+
+        if (!classPath.contains(jarFile.getAbsoluteFile())) {
+          classPath.add(jarFile.getAbsolutePath());
+        }
+      }
+    }
+
+    final List<String> typeGlobalClassPath =
+        getSysProps().getStringList("jobtype.global.classpath", null, ",");
+    if (typeGlobalClassPath != null) {
+      for (final String jar : typeGlobalClassPath) {
+        if (!classPath.contains(jar)) {
+          classPath.add(jar);
+        }
+      }
+    }
+    return classPath;
+  }
+
+  protected boolean getDebug() {
+    return getJobProps().getBoolean(DEBUG, false);
+  }
+
+  protected String getScript() {
+    return getJobProps().getString(PIG_SCRIPT);
+  }
+
+  protected List<String> getUDFImportList() {
+    final List<String> udfImports = new ArrayList<>();
+    final List<String> typeImports =
+        getSysProps().getStringList(UDF_IMPORT, null, ",");
+    final List<String> jobImports =
+        getJobProps().getStringList(UDF_IMPORT, null, ",");
+    if (typeImports != null) {
+      udfImports.addAll(typeImports);
+    }
+    if (jobImports != null) {
+      udfImports.addAll(jobImports);
+    }
+    return udfImports;
+  }
+
+  protected List<String> getAdditionalJarsList() {
+    final List<String> additionalJars = new ArrayList<>();
+    final List<String> typeJars =
+        getSysProps().getStringList(PIG_ADDITIONAL_JARS, null, ",");
+    final List<String> jobJars =
+        getJobProps().getStringList(PIG_ADDITIONAL_JARS, null, ",");
+    if (typeJars != null) {
+      additionalJars.addAll(typeJars);
+    }
+    if (jobJars != null) {
+      additionalJars.addAll(jobJars);
+    }
+    return additionalJars;
+  }
+
+  protected String getHadoopUGI() {
+    return getJobProps().getString(HADOOP_UGI, null);
+  }
+
+  protected Map<String, String> getPigParams() {
+    return getJobProps().getMapByPrefix(PIG_PARAM_PREFIX);
+  }
+
+  protected List<String> getPigParamFiles() {
+    return getJobProps().getStringList(PIG_PARAM_FILES, null, ",");
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/SecurePigWrapper.java 102(+102 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/SecurePigWrapper.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/SecurePigWrapper.java
new file mode 100644
index 0000000..49ff618
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/SecurePigWrapper.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier;
+import org.apache.hadoop.security.token.Token;
+import org.apache.log4j.Logger;
+
+import azkaban.security.commons.SecurityUtils;
+
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.security.PrivilegedExceptionAction;
+import java.util.Properties;
+
+public class SecurePigWrapper {
+
+  public static final String OBTAIN_BINARY_TOKEN = "obtain.binary.token";
+  public static final String MAPREDUCE_JOB_CREDENTIALS_BINARY =
+      "mapreduce.job.credentials.binary";
+
+  public static void main(final String[] args) throws IOException,
+      InterruptedException {
+    final Logger logger = Logger.getRootLogger();
+    final Properties p = System.getProperties();
+    final Configuration conf = new Configuration();
+
+    SecurityUtils.getProxiedUser(p, logger, conf).doAs(
+        new PrivilegedExceptionAction<Void>() {
+          @Override
+          public Void run() throws Exception {
+            prefetchToken();
+            org.apache.pig.Main.main(args);
+            return null;
+          }
+
+          // For Pig jobs that need to do extra communication with the
+          // JobTracker, it's necessary to pre-fetch a token and include it in
+          // the credentials cache
+          private void prefetchToken() throws InterruptedException, IOException {
+            String shouldPrefetch = p.getProperty(OBTAIN_BINARY_TOKEN);
+            if (shouldPrefetch != null && shouldPrefetch.equals("true")) {
+              logger.info("Pre-fetching token");
+              Job job =
+                  new Job(conf, "totally phony, extremely fake, not real job");
+
+              JobConf jc = new JobConf(conf);
+              JobClient jobClient = new JobClient(jc);
+              logger.info("Pre-fetching: Got new JobClient: " + jc);
+              Token<DelegationTokenIdentifier> mrdt =
+                  jobClient.getDelegationToken(new Text("hi"));
+              job.getCredentials().addToken(new Text("howdy"), mrdt);
+
+              File temp = File.createTempFile("mr-azkaban", ".token");
+              temp.deleteOnExit();
+
+              FileOutputStream fos = null;
+              DataOutputStream dos = null;
+              try {
+                fos = new FileOutputStream(temp);
+                dos = new DataOutputStream(fos);
+                job.getCredentials().writeTokenStorageToStream(dos);
+              } finally {
+                if (dos != null) {
+                  dos.close();
+                }
+                if (fos != null) {
+                  fos.close();
+                }
+              }
+              logger.info("Setting " + MAPREDUCE_JOB_CREDENTIALS_BINARY
+                  + " to " + temp.getAbsolutePath());
+              System.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY,
+                  temp.getAbsolutePath());
+            } else {
+              logger.info("Not pre-fetching token");
+            }
+          }
+        });
+  }
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/SparkJobArg.java 59(+59 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/SparkJobArg.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/SparkJobArg.java
new file mode 100644
index 0000000..8db12d8
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/SparkJobArg.java
@@ -0,0 +1,59 @@
+package azkaban.jobtype;
+
+public enum SparkJobArg {
+
+  // standard spark submit arguments, ordered in the spark-submit --help order
+  MASTER("master", false), // just to trick the eclipse formatter
+  DEPLOY_MODE("deploy-mode", false), //
+  CLASS("class", false), //
+  NAME("name", false), //
+  SPARK_JARS("jars", true), //
+  SPARK_PACKAGES("packages", false),
+  PACKAGES("packages", false), //
+  REPOSITORIES("repositories", false), //
+  PY_FILES("py-files", false), //
+  FILES("files", false), //
+  SPARK_CONF_PREFIX("conf.", "--conf", true), //
+  PROPERTIES_FILE("properties-file", false), //
+  DRIVER_MEMORY("driver-memory", false), //
+  DRIVER_JAVA_OPTIONS("driver-java-options", true), //
+  DRIVER_LIBRARY_PATH("driver-library-path", false), //
+  DRIVER_CLASS_PATH("driver-class-path", false), //
+  EXECUTOR_MEMORY("executor-memory", false), //
+  PROXY_USER("proxy-user", false), //
+  SPARK_FLAG_PREFIX("flag.", "--", true), // --help, --verbose, --supervise, --version
+
+  // Yarn only Arguments
+  EXECUTOR_CORES("executor-cores", false), //
+  DRIVER_CORES("driver-cores", false), //
+  QUEUE("queue", false), //
+  NUM_EXECUTORS("num-executors", false), //
+  ARCHIVES("archives", false), //
+  PRINCIPAL("principal", false), //
+  KEYTAB("keytab", false), //
+
+  // Not SparkSubmit arguments: only exists in azkaban
+  EXECUTION_JAR("execution-jar", null, true), //
+  PARAMS("params", null, true), //
+  SPARK_VERSION("spark-version", null, true),
+  ;
+
+  public static final String delimiter = "\u001A";
+
+  SparkJobArg(String propName, boolean specialTreatment) {
+    this(propName, "--" + propName, specialTreatment);
+  }
+
+  SparkJobArg(String azPropName, String sparkParamName, boolean specialTreatment) {
+    this.azPropName = azPropName;
+    this.sparkParamName = sparkParamName;
+    this.needSpecialTreatment = specialTreatment;
+  }
+
+  final String azPropName;
+
+  final String sparkParamName;
+
+  final boolean needSpecialTreatment;
+
+}

az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/StatsUtils.java 147(+147 -0)

diff --git a/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/StatsUtils.java b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/StatsUtils.java
new file mode 100644
index 0000000..20839eb
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/main/java/azkaban/jobtype/StatsUtils.java
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2012 LinkedIn Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package azkaban.jobtype;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.Counters;
+import org.apache.hadoop.mapred.Counters.Counter;
+import org.apache.hadoop.mapred.Counters.Group;
+import org.apache.hadoop.mapred.RunningJob;
+import org.apache.log4j.Logger;
+import org.apache.pig.impl.util.ObjectSerializer;
+
+public class StatsUtils {
+
+  private static Logger logger = Logger.getLogger(StatsUtils.class);
+
+  private static final Set<String> JOB_CONF_KEYS = new HashSet<String>(
+      Arrays.asList(new String[] {
+          "mapred.job.map.memory.mb",
+          "mapred.job.reduce.memory.mb",
+          "mapred.child.java.opts",
+          "mapred.cache.files",
+          "mapred.cache.archives",
+          "mapred.cache.files.filesizes",
+          "mapred.min.split.size",
+          "mapred.max.split.size",
+          "mapred.output.compress",
+          "mapred.output.compression.type",
+          "mapred.output.compression.codec",
+          "mapred.compress.map.output",
+          "mapred.map.output.compression.codec",
+          "mapred.queue.names",
+          "mapred.job.queue.name",
+          "io.sort.mb"
+      }));
+
+  public static Properties getJobConf(RunningJob runningJob) {
+    try {
+      Path path = new Path(runningJob.getJobFile());
+      Configuration conf = new Configuration(false);
+      FileSystem fs = FileSystem.get(new Configuration());
+      InputStream in = fs.open(path);
+      conf.addResource(in);
+      return getJobConf(conf);
+    } catch (FileNotFoundException e) {
+      logger.warn("Job conf not found.");
+    } catch (IOException e) {
+      logger.warn("Error while retrieving job conf: " + e.getMessage());
+    }
+    return null;
+  }
+
+  public static Properties getJobConf(Configuration conf) {
+    if (conf == null) {
+      return null;
+    }
+
+    Properties jobConfProperties = null;
+    try {
+      jobConfProperties = new Properties();
+      for (Map.Entry<String, String> entry : conf) {
+        if (entry.getKey().equals("pig.mapPlan")
+            || entry.getKey().equals("pig.reducePlan")) {
+          jobConfProperties.setProperty(entry.getKey(), ObjectSerializer
+              .deserialize(entry.getValue()).toString());
+        } else if (JOB_CONF_KEYS.contains(entry.getKey())) {
+          jobConfProperties.setProperty(entry.getKey(), entry.getValue());
+        }
+      }
+    } catch (IOException e) {
+      logger.warn("Error while reading job conf: " + e.getMessage());
+    }
+    return jobConfProperties;
+  }
+
+  public static Object propertiesToJson(Properties properties) {
+    Map<String, String> jsonObj = new HashMap<String, String>();
+    if (properties != null) {
+      Set<String> keys = properties.stringPropertyNames();
+      for (String key : keys) {
+        jsonObj.put(key, properties.getProperty(key));
+      }
+    }
+    return jsonObj;
+  }
+
+  public static Properties propertiesFromJson(Object obj) {
+    @SuppressWarnings("unchecked")
+    Map<String, String> jsonObj = (HashMap<String, String>) obj;
+
+    Properties properties = new Properties();
+    for (Map.Entry<String, String> entry : jsonObj.entrySet()) {
+      properties.setProperty(entry.getKey(), entry.getValue());
+    }
+    return properties;
+  }
+
+  public static Object countersToJson(Counters counters) {
+    Map<String, Object> jsonObj = new HashMap<String, Object>();
+
+    if (counters == null) {
+      return jsonObj;
+    }
+
+    Collection<String> counterGroups = counters.getGroupNames();
+    for (String groupName : counterGroups) {
+      Map<String, String> counterStats = new HashMap<String, String>();
+      Group group = counters.getGroup(groupName);
+      Iterator<Counter> it = group.iterator();
+      while (it.hasNext()) {
+        Counter counter = it.next();
+        counterStats.put(counter.getDisplayName(),
+            String.valueOf(counter.getCounter()));
+      }
+      jsonObj.put(groupName, counterStats);
+    }
+    return jsonObj;
+  }
+}

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/connectors/gobblin/TestGobblinHadoopJob.java 38(+38 -0)

diff --git a/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/connectors/gobblin/TestGobblinHadoopJob.java b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/connectors/gobblin/TestGobblinHadoopJob.java
new file mode 100644
index 0000000..f537d93
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/connectors/gobblin/TestGobblinHadoopJob.java
@@ -0,0 +1,38 @@
+package azkaban.jobtype.connectors.gobblin;
+
+
+public class TestGobblinHadoopJob {
+//  private static final String JOB_ID = "test_job_id";
+//  private static final Logger LOG = Logger.getLogger(TestGobblinHadoopJob.class);
+//
+//  @Test
+//  public void testPrintableJobProperties() {
+//    Set<String> passWordKeys = Sets.newHashSet("source.conn.password", "jdbc.publisher.password", "password", "pass_word", "pass.word", "passWord");
+
+//    Props sysPros = new Props();
+//    //Add dummy directory path so that GobblinHadoopJob can be instantiated.
+//    sysPros.put(GobblinConstants.GOBBLIN_PRESET_DIR_KEY, this.getClass()
+//                                                             .getProtectionDomain()
+//                                                             .getCodeSource()
+//                                                             .getLocation()
+//                                                             .getPath() + "/" + "dummy");
+//    Props jobPros = new Props();
+//
+//    GobblinHadoopJob job = new GobblinHadoopJob(JOB_ID, sysPros, jobPros, LOG);
+//    Set<String> normalKeys = Sets.newHashSet(UUID.randomUUID().toString(), UUID.randomUUID().toString(), UUID.randomUUID().toString(), UUID.randomUUID().toString());
+//
+//    Props props = new Props();
+//    putDummyValue(props, normalKeys);
+//    Assert.assertTrue(job.printableJobProperties(props).keySet().containsAll(normalKeys) && job.printableJobProperties(props).keySet().size() == normalKeys.size());
+//
+//    putDummyValue(props, passWordKeys);
+//    //Adding password keys should not change output of printableJobProperties
+//    Assert.assertTrue(job.printableJobProperties(props).keySet().containsAll(normalKeys) && job.printableJobProperties(props).keySet().size() == normalKeys.size());
+//  }
+//
+//  private void putDummyValue(Props props, Set<String> keys) {
+//    for (String key : keys) {
+//      props.put(key, "dummy");
+//    }
+//  }
+}

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestFileUtils.java 92(+92 -0)

diff --git a/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestFileUtils.java b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestFileUtils.java
new file mode 100644
index 0000000..a1445eb
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestFileUtils.java
@@ -0,0 +1,92 @@
+package azkaban.jobtype;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+
+import org.apache.commons.lang.RandomStringUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+import azkaban.jobtype.javautils.FileUtils;
+
+import com.google.common.io.Files;
+
+@SuppressWarnings("DefaultCharset")
+public class TestFileUtils {
+  private static final int DIRECTORY_LEVEL = 5;
+  private static final int NUM_FILES = 5;
+  private static final String DELIMITER = ",";
+
+  @Test
+  public void testDirectoryDelete() throws IOException {
+    File root = Files.createTempDir();
+    File parent = root;
+    File child = null;
+    for (int i=0; i < DIRECTORY_LEVEL; i++) {
+      child = createTmpDirWithRandomFiles(parent);
+      parent = child;
+    }
+
+    Assert.assertTrue("Failed to create " + root.getAbsolutePath(), root.exists());
+    FileUtils.deleteFileOrDirectory(root);
+    Assert.assertTrue("Failed to delete " + root.getAbsolutePath(), !root.exists());
+  }
+
+  @Test
+  public void testFileDelete() throws IOException {
+    File f = File.createTempFile(this.getClass().getSimpleName(), ".txt");
+    BufferedWriter w = new BufferedWriter(new FileWriter(f));
+    w.write(RandomStringUtils.randomAlphanumeric(1024));
+    w.close();
+
+    Assert.assertTrue("Failed to create " + f.getAbsolutePath(), f.exists());
+    FileUtils.deleteFileOrDirectory(f);
+    Assert.assertTrue("Failed to delete " + f.getAbsolutePath(), !f.exists());
+  }
+
+  private File createTmpDirWithRandomFiles(File parentDir) throws IOException {
+    File dir = Files.createTempDir();
+    for (int i = 0; i < NUM_FILES; i++) {
+      File f = new File(dir, ""+i+".txt");
+      f.createNewFile();
+
+      BufferedWriter w = new BufferedWriter(new FileWriter(f));
+      w.write(RandomStringUtils.randomAlphanumeric(1024));
+      w.close();
+    }
+
+    File tmp = new File(parentDir, dir.getName());
+    Files.move(dir, tmp);
+    return tmp;
+  }
+
+  @Test
+  public void testlistFiles() throws IOException {
+    File root = Files.createTempDir();
+    root.deleteOnExit();
+
+    File dir = createTmpDirWithRandomFiles(root);
+
+    //List using wild card
+    Collection<String> actual = FileUtils.listFiles(dir.getAbsolutePath() + File.separator + "*", DELIMITER);
+    Collection<String> expected = new HashSet<String>();
+    for (int i = 0; i < NUM_FILES; i++) {
+      expected.add(dir.getAbsolutePath() + File.separator + i +".txt");
+    }
+    Assert.assertEquals("Failed to list all files with wildcard", expected, new HashSet<String>(actual));
+
+    //List using explicit path
+    actual = FileUtils.listFiles(dir.getAbsolutePath() + File.separator + "1.txt" + DELIMITER + dir.getAbsolutePath() + File.separator + "2.txt", DELIMITER);
+    expected = new HashSet<String>();
+    expected.add(dir.getAbsolutePath() + File.separator + "1.txt");
+    expected.add(dir.getAbsolutePath() + File.separator + "2.txt");
+
+    Assert.assertEquals("Failed to list all files", expected, new HashSet<String>(actual));
+
+    FileUtils.deleteFileOrDirectory(root);
+  }
+}

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsAdditionalNamenodes.java 23(+23 -0)

diff --git a/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsAdditionalNamenodes.java b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsAdditionalNamenodes.java
new file mode 100644
index 0000000..dce233c
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsAdditionalNamenodes.java
@@ -0,0 +1,23 @@
+package azkaban.jobtype;
+
+import azkaban.utils.Props;
+import org.junit.Assert;
+import org.junit.Test;
+
+
+public class TestHadoopJobUtilsAdditionalNamenodes {
+
+  @Test
+  public void testAdditionalNamenodes() {
+    Props testProps = new Props();
+    HadoopJobUtils.addAdditionalNamenodesToProps(testProps, "hdfs://testNN:9000");
+    Assert.assertEquals("hdfs://testNN:9000", testProps.get("other_namenodes"));
+
+    testProps = new Props();
+    testProps.put("other_namenodes", "hdfs://testNN1:9000,hdfs://testNN2:9000");
+    HadoopJobUtils.addAdditionalNamenodesToProps(testProps, "hdfs://testNN:9000");
+    Assert.assertEquals("hdfs://testNN1:9000,hdfs://testNN2:9000,hdfs://testNN:9000",
+        testProps.get("other_namenodes"));
+  }
+
+}

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsExecutionJar.java 117(+117 -0)

diff --git a/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsExecutionJar.java b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsExecutionJar.java
new file mode 100644
index 0000000..feb3c2e
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsExecutionJar.java
@@ -0,0 +1,117 @@
+package azkaban.jobtype;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Set;
+import java.util.HashSet;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import azkaban.utils.Props;
+
+public class TestHadoopJobUtilsExecutionJar {
+  Props jobProps = null;
+
+  Logger logger = Logger.getRootLogger();
+
+  String workingDirString = "/tmp/TestHadoopSpark";
+
+  File workingDirFile = new File(workingDirString);
+
+  File libFolderFile = new File(workingDirFile, "lib");
+
+  String executionJarName = "hadoop-spark-job-test-execution-x.y.z-a.b.c.jar";
+
+  File executionJarFile = new File(libFolderFile, "hadoop-spark-job-test-execution-x.y.z-a.b.c.jar");
+
+  File libraryJarFile = new File(libFolderFile, "library.jar");
+
+  String delim = SparkJobArg.delimiter;
+
+  @Before
+  public void beforeMethod() throws IOException {
+    if (workingDirFile.exists())
+      FileUtils.deleteDirectory(workingDirFile);
+    workingDirFile.mkdirs();
+    libFolderFile.mkdirs();
+    executionJarFile.createNewFile();
+    libraryJarFile.createNewFile();
+
+  }
+
+  // nothing should happen
+  @Test
+  public void testNoLibFolder() throws IOException {
+    FileUtils.deleteDirectory(libFolderFile);
+    String retval = HadoopJobUtils.resolveWildCardForJarSpec(workingDirString, "./lib/*", logger);
+
+    Assert.assertEquals(retval, "");
+  }
+
+  // nothing should happen
+  @Test
+  public void testLibFolderHasNothingInIt() throws IOException {
+    FileUtils.deleteDirectory(libFolderFile);
+    libFolderFile.mkdirs();
+    String retval = HadoopJobUtils.resolveWildCardForJarSpec(workingDirString, "./lib/*", logger);
+
+    Assert.assertEquals(retval, "");
+  }
+
+
+  @Test
+  public void testOneLibFolderExpansion() throws IOException {
+    String retval = HadoopJobUtils.resolveWildCardForJarSpec(workingDirString, "./lib/*", logger);
+    Set<String> retvalSet = new HashSet<String>(Arrays.asList(retval.split(",")));
+
+    Set<String> expected = new HashSet<String>();
+    expected.add("/tmp/TestHadoopSpark/./lib/library.jar");
+    expected.add("/tmp/TestHadoopSpark/./lib/hadoop-spark-job-test-execution-x.y.z-a.b.c.jar");
+
+    Assert.assertTrue("Expected size is different from retrieval size. Expected: " + expected + " , Actual: " + retvalSet,
+                      expected.size() == retvalSet.size());
+    expected.removeAll(retvalSet);
+    Assert.assertTrue("Expected values are not equal to Actual values. Expected: " + expected + " , Actual: " + retvalSet,
+                      expected.isEmpty() );
+  }
+
+  @Test
+  public void testTwoLibFolderExpansionAllFilesResolved() throws IOException {
+    File lib2FolderFile = new File(workingDirFile, "lib2");
+    lib2FolderFile.mkdirs();
+    File lib2test1Jar = new File(lib2FolderFile, "test1.jar");
+    lib2test1Jar.createNewFile();
+    File lib2test2Jar = new File(lib2FolderFile, "test2.jar");
+    lib2test2Jar.createNewFile();
+    String retval = HadoopJobUtils.resolveWildCardForJarSpec(workingDirString, "./lib/*,./lib2/*",
+            logger);
+
+    Assert.assertTrue(retval.contains("/tmp/TestHadoopSpark/./lib/library.jar"));
+    Assert.assertTrue(retval.contains("/tmp/TestHadoopSpark/./lib/hadoop-spark-job-test-execution-x.y.z-a.b.c.jar"));
+    Assert.assertTrue(retval.contains("/tmp/TestHadoopSpark/./lib2/test1.jar"));
+    Assert.assertTrue(retval.contains("/tmp/TestHadoopSpark/./lib2/test2.jar"));
+  }
+
+    @Test
+    public void testTwoLibFolderExpansionExpandsInOrder() throws IOException {
+
+      executionJarFile.delete();
+
+      File lib2FolderFile = new File(workingDirFile, "lib2");
+      lib2FolderFile.mkdirs();
+      File lib2test1Jar = new File(lib2FolderFile, "test1.jar");
+      lib2test1Jar.createNewFile();
+
+      String retval = HadoopJobUtils.resolveWildCardForJarSpec(workingDirString, "./lib/*,./lib2/*",
+              logger);
+
+      Assert.assertEquals(
+              retval,
+              "/tmp/TestHadoopSpark/./lib/library.jar,/tmp/TestHadoopSpark/./lib2/test1.jar");
+  }
+}

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsFilterCommands.java 71(+71 -0)

diff --git a/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsFilterCommands.java b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsFilterCommands.java
new file mode 100644
index 0000000..6967c4f
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsFilterCommands.java
@@ -0,0 +1,71 @@
+package azkaban.jobtype;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.log4j.Logger;
+import org.junit.Before;
+import org.junit.Test;
+
+import junit.framework.Assert;
+
+/**
+ * Test class for filterCommands method in HadoopJobUtils
+ */
+public class TestHadoopJobUtilsFilterCommands {
+  private Logger logger = Logger.getRootLogger();
+
+  private List<String> originalCommands;
+
+  @Before
+  public void beforeMethod() throws IOException {
+    originalCommands = new LinkedList<String>();
+    originalCommands.add("kinit blah@blah");
+    originalCommands.add("hadoop fs -ls");
+    originalCommands.add("hadoop fs -mkdir");
+    originalCommands.add("kdestroy");
+  }
+
+  @Test
+  public void testEmptyInputList() {
+    List<String> filteredCommands = HadoopJobUtils.filterCommands(Collections.<String> emptyList(),
+            HadoopJobUtils.MATCH_ALL_REGEX, HadoopJobUtils.MATCH_NONE_REGEX, logger);
+    Assert.assertTrue("filtering output of an empty collection should be empty collection",
+            filteredCommands.isEmpty());
+  }
+
+  @Test
+  public void testNoCommandMatchCriteria() {
+    List<String> filteredCommands = HadoopJobUtils.filterCommands(originalCommands, "hadoop.*",
+            "hadoop.*", logger);
+    Assert.assertTrue("filtering output of with no matching command should be empty collection",
+            filteredCommands.isEmpty());
+  }
+
+  @Test
+  public void testWhitelistCriteria() {
+    List<String> filteredCommands = HadoopJobUtils.filterCommands(originalCommands, "hadoop.*",
+            HadoopJobUtils.MATCH_NONE_REGEX, logger);
+    Assert.assertEquals(filteredCommands.get(0), "hadoop fs -ls");
+    Assert.assertEquals(filteredCommands.get(1), "hadoop fs -mkdir");
+  }
+
+  @Test
+  public void testBlackListCriteria() {
+    List<String> filteredCommands = HadoopJobUtils.filterCommands(originalCommands,
+            HadoopJobUtils.MATCH_ALL_REGEX, ".*kinit.*", logger);
+    Assert.assertEquals(filteredCommands.get(0), "hadoop fs -ls");
+    Assert.assertEquals(filteredCommands.get(1), "hadoop fs -mkdir");
+    Assert.assertEquals(filteredCommands.get(2), "kdestroy");
+  }
+
+  @Test
+  public void testMultipleCriterias() {
+    List<String> filteredCommands = HadoopJobUtils.filterCommands(originalCommands, "hadoop.*",
+            ".*kinit.*", logger);
+    Assert.assertEquals(filteredCommands.get(0), "hadoop fs -ls");
+    Assert.assertEquals(filteredCommands.get(1), "hadoop fs -mkdir");
+  }
+}

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsFindApplicationIdFromLog.java 121(+121 -0)

diff --git a/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsFindApplicationIdFromLog.java b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsFindApplicationIdFromLog.java
new file mode 100644
index 0000000..258f5b7
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsFindApplicationIdFromLog.java
@@ -0,0 +1,121 @@
+package azkaban.jobtype;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Set;
+
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+@SuppressWarnings("DefaultCharset")
+public class TestHadoopJobUtilsFindApplicationIdFromLog {
+
+  File tempFile = null;
+
+  BufferedWriter bw = null;
+
+  Logger logger = Logger.getRootLogger();
+
+  @Before
+  public void beforeMethod() throws IOException {
+    tempFile = File.createTempFile("test_hadoop_job_utils_find_application_id_from_log", null);
+    bw = new BufferedWriter(new FileWriter(tempFile));
+
+  }
+
+  @Test
+  public void testNoApplicationId() throws IOException {
+    bw.write("28-08-2015 14:05:24 PDT spark INFO - 15/08/28 21:05:24 INFO client.RMProxy: Connecting to ResourceManager at eat1-nertzrm02.grid.linkedin.com/172.20.158.95:8032\n");
+    bw.write("28-08-2015 14:05:24 PDT spark INFO - 15/08/28 21:05:24 INFO yarn.Client: Requesting a new application from cluster with 134 NodeManagers\n");
+    bw.write("28-08-2015 14:05:24 PDT spark INFO - 15/08/28 21:05:24 INFO yarn.Client: Verifying our application has not requested more than the maximum memory capability of the cluster (55296 MB per container)\n");
+    bw.write("28-08-2015 14:05:24 PDT spark INFO - 15/08/28 21:05:24 INFO yarn.Client: Will allocate AM container, with 4505 MB memory including 409 MB overhead\n");
+    bw.write("28-08-2015 14:05:24 PDT spark INFO - 15/08/28 21:05:24 INFO yarn.Client: Setting up container launch context for our AM\n");
+    bw.write("28-08-2015 14:05:24 PDT spark INFO - 15/08/28 21:05:24 INFO yarn.Client: Preparing resources for our AM container\n");
+    bw.close();
+
+    Set<String> appId = HadoopJobUtils.findApplicationIdFromLog(tempFile.toString(), logger);
+
+    Assert.assertEquals(0, appId.size());
+
+  }
+
+  @Test
+  public void testOneApplicationId() throws IOException {
+    bw.write("28-08-2015 14:05:32 PDT spark INFO - 15/08/28 21:05:32 INFO spark.SecurityManager: SecurityManager: authentication enabled; ui acls enabled; users with view permissions: Set(*); users with modify permissions: Set(azkaban, jyu)\n");
+    bw.write("28-08-2015 14:05:32 PDT spark INFO - 15/08/28 21:05:32 INFO yarn.Client: Submitting application 3099 to ResourceManager\n");
+    bw.write("28-08-2015 14:05:33 PDT spark INFO - 15/08/28 21:05:33 INFO impl.YarnClientImpl: Submitted application application_1440264346270_3099\n");
+    bw.close();
+
+    Set<String> appId = HadoopJobUtils.findApplicationIdFromLog(tempFile.toString(), logger);
+
+    Assert.assertEquals(1, appId.size());
+    Assert.assertTrue(appId.contains("application_1440264346270_3099"));
+  }
+
+  @Test
+  public void testMultipleSameApplicationIdWhenSparkStarts() throws IOException {
+    bw.write("28-08-2015 14:05:34 PDT spark INFO - 15/08/28 21:05:34 INFO yarn.Client: Application report for application_1440264346270_3099 (state: ACCEPTED)\n");
+    bw.write("28-08-2015 14:05:34 PDT spark INFO - 15/08/28 21:05:34 INFO yarn.Client: \n");
+    bw.write("28-08-2015 14:05:34 PDT spark INFO -   client token: Token { kind: YARN_CLIENT_TOKEN, service:  }\n");
+    bw.write("28-08-2015 14:05:34 PDT spark INFO -   diagnostics: N/A\n");
+    bw.write("28-08-2015 14:05:34 PDT spark INFO -   ApplicationMaster host: N/A\n");
+    bw.write("28-08-2015 14:05:34 PDT spark INFO -   ApplicationMaster RPC port: -1\n");
+    bw.write("28-08-2015 14:05:34 PDT spark INFO -   queue: default\n");
+    bw.write("28-08-2015 14:05:34 PDT spark INFO -   start time: 1440795932813\n");
+    bw.write("28-08-2015 14:05:34 PDT spark INFO -   final status: UNDEFINED\n");
+    bw.write("28-08-2015 14:05:34 PDT spark INFO -   tracking URL: http://eat1-nertzwp02.grid.linkedin.com:8080/proxy/application_1440264346270_3099/\n");
+    bw.write("28-08-2015 14:05:34 PDT spark INFO -   user: jyu\n");
+    bw.write("28-08-2015 14:05:35 PDT spark INFO - 15/08/28 21:05:35 INFO yarn.Client: Application report for application_1440264346270_3099 (state: ACCEPTED)\n");
+    bw.close();
+
+    Set<String> appId = HadoopJobUtils.findApplicationIdFromLog(tempFile.toString(), logger);
+
+    Assert.assertEquals(1, appId.size());
+    Assert.assertTrue(appId.contains("application_1440264346270_3099"));
+  }
+
+  @Test
+  public void testMultipleSameApplicationIdForSparkAfterRunningFor17Hours() throws IOException {
+    bw.write("28-08-2015 14:11:50 PDT spark INFO - 15/08/28 21:11:50 INFO yarn.Client: Application report for application_1440264346270_3099 (state: RUNNING)\n");
+    bw.write("28-08-2015 14:11:51 PDT spark INFO - 15/08/28 21:11:51 INFO yarn.Client: Application report for application_1440264346270_3099 (state: RUNNING)\n");
+    bw.write("28-08-2015 14:11:52 PDT spark INFO - 15/08/28 21:11:52 INFO yarn.Client: Application report for application_1440264346270_3099 (state: RUNNING)\n");
+    bw.write("28-08-2015 14:11:53 PDT spark INFO - 15/08/28 21:11:53 INFO yarn.Client: Application report for application_1440264346270_3099 (state: RUNNING)\n");
+    bw.write("28-08-2015 14:11:54 PDT spark INFO - 15/08/28 21:11:54 INFO yarn.Client: Application report for application_1440264346270_3099 (state: RUNNING)\n");
+    bw.close();
+
+    Set<String> appId = HadoopJobUtils.findApplicationIdFromLog(tempFile.toString(), logger);
+
+    Assert.assertEquals(1, appId.size());
+    Assert.assertTrue(appId.contains("application_1440264346270_3099"));
+  }
+
+  @Test
+  public void testLogWithMultipleApplicationIdsAppearingMultipleTimes() throws IOException {
+    bw.write("28-08-2015 12:29:38 PDT Training_clickSelectFeatures INFO - INFO Submitted application application_1440264346270_3044\n");
+    bw.write("28-08-2015 12:29:38 PDT Training_clickSelectFeatures INFO - INFO The url to track the job: http://eat1-nertzwp02.grid.linkedin.com:8080/proxy/application_1440264346270_3044/\n");
+    bw.write("28-08-2015 12:29:38 PDT Training_clickSelectFeatures INFO - INFO See http://eat1-nertzwp02.grid.linkedin.com:8080/proxy/application_1440264346270_3044/ for details.\n");
+    bw.write("28-08-2015 12:29:38 PDT Training_clickSelectFeatures INFO - INFO Running job: job_1440264346270_3044\n");
+    bw.write("28-08-2015 12:30:21 PDT Training_clickSelectFeatures INFO - INFO Closing idle connection Socket[addr=eat1-hcl5481.grid.linkedin.com/172.20.138.228,port=42492,localport=42382] to server eat1-hcl5481.grid.linkedin.com/172.20.138.228:42492\n");
+    bw.write("28-08-2015 12:30:37 PDT Training_clickSelectFeatures INFO - INFO Closing idle connection Socket[addr=eat1-nertznn01.grid.linkedin.com/172.20.158.57,port=9000,localport=30453] to server eat1-nertznn01.grid.linkedin.com/172.20.158.57:9000\n");
+    bw.write("28-08-2015 12:31:09 PDT Training_clickSelectFeatures INFO - INFO Job job_1440264346270_3044 running in uber mode : false\n");
+    bw.write("28-08-2015 12:29:38 PDT Training_clickSelectFeatures INFO - INFO Submitted application application_1440264346270_3088\n");
+    bw.write("28-08-2015 12:29:38 PDT Training_clickSelectFeatures INFO - INFO The url to track the job: http://eat1-nertzwp02.grid.linkedin.com:8080/proxy/application_1440264346270_3088/\n");
+    bw.write("28-08-2015 12:29:38 PDT Training_clickSelectFeatures INFO - INFO See http://eat1-nertzwp02.grid.linkedin.com:8080/proxy/application_1440264346270_3088/ for details.\n");
+    bw.write("28-08-2015 12:29:38 PDT Training_clickSelectFeatures INFO - INFO Running job: job_1440264346270_3088\n");
+    bw.write("28-08-2015 12:30:21 PDT Training_clickSelectFeatures INFO - INFO Closing idle connection Socket[addr=eat1-hcl5481.grid.linkedin.com/172.20.138.228,port=42492,localport=42382] to server eat1-hcl5481.grid.linkedin.com/172.20.138.228:42492\n");
+    bw.write("28-08-2015 12:30:37 PDT Training_clickSelectFeatures INFO - INFO Closing idle connection Socket[addr=eat1-nertznn01.grid.linkedin.com/172.20.158.57,port=9000,localport=30453] to server eat1-nertznn01.grid.linkedin.com/172.20.158.57:9000\n");
+    bw.write("28-08-2015 12:31:09 PDT Training_clickSelectFeatures INFO - INFO Job job_1440264346270_3088 running in uber mode : false\n");
+    bw.close();
+
+    Set<String> appId = HadoopJobUtils.findApplicationIdFromLog(tempFile.toString(), logger);
+
+    Assert.assertEquals(2, appId.size());
+    Assert.assertTrue(appId.contains("application_1440264346270_3044"));
+    Assert.assertTrue(appId.contains("application_1440264346270_3088"));
+  }
+
+}

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsResolveJarSpec.java 55(+55 -0)

diff --git a/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsResolveJarSpec.java b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsResolveJarSpec.java
new file mode 100644
index 0000000..a87bcc5
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopJobUtilsResolveJarSpec.java
@@ -0,0 +1,55 @@
+package azkaban.jobtype;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.log4j.Logger;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import azkaban.utils.Props;
+
+public class TestHadoopJobUtilsResolveJarSpec {
+  Props jobProps = null;
+
+  Logger logger = Logger.getRootLogger();
+
+  String workingDirString = "/tmp/TestHadoopSpark";
+
+  File workingDirFile = new File(workingDirString);
+
+  File libFolderFile = new File(workingDirFile, "lib");
+
+  String executionJarName = "hadoop-spark-job-test-execution-x.y.z-a.b.c.jar";
+
+  File executionJarFile = new File(libFolderFile, "hadoop-spark-job-test-execution-x.y.z-a.b.c.jar");
+
+  File libraryJarFile = new File(libFolderFile, "library.jar");
+
+  String delim = SparkJobArg.delimiter;
+
+  @Before
+  public void beforeMethod() throws IOException {
+    if (workingDirFile.exists())
+      FileUtils.deleteDirectory(workingDirFile);
+    workingDirFile.mkdirs();
+    libFolderFile.mkdirs();
+    executionJarFile.createNewFile();
+    libraryJarFile.createNewFile();
+
+  }
+
+  // nothing should happen
+  @Test(expected = IllegalStateException.class)
+  public void testJarDoesNotExist() throws IOException {
+    HadoopJobUtils.resolveExecutionJarName(workingDirString, "./lib/abc.jar", logger);
+  }
+
+  @Test(expected = IllegalStateException.class)
+  public void testNoLibFolder() throws IOException {
+    FileUtils.deleteDirectory(libFolderFile);
+    HadoopJobUtils.resolveExecutionJarName(workingDirString, "./lib/abc.jar", logger);
+  }
+}

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopSecureHiveWrapper.java 23(+23 -0)

diff --git a/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopSecureHiveWrapper.java b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopSecureHiveWrapper.java
new file mode 100644
index 0000000..5d48578
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestHadoopSecureHiveWrapper.java
@@ -0,0 +1,23 @@
+package azkaban.jobtype;
+
+import java.util.Map;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestHadoopSecureHiveWrapper {
+  // Ensure that hivevars with equal signs in them are parsed
+  // properly, as the string may be split incorrectly around them.
+  @Test
+  public void testParamsWithEqualSigns() {
+    String[] args = {"-hivevar", "'testKey1=testVal1'",
+        "-hivevar", "'testKey2=testVal2==something=anything'",
+        "-hivevar", "'testKey3=testVal3=anything'"};
+    Map<String, String> hiveVarMap = HadoopSecureHiveWrapper.getHiveVarMap(args);
+    Assert.assertTrue(hiveVarMap.size() == 3);
+    Assert.assertTrue(hiveVarMap.get("testKey1").equals("testVal1"));
+    Assert.assertTrue(hiveVarMap.get("testKey2").equals("testVal2==something=anything"));
+    Assert.assertTrue(hiveVarMap.get("testKey3").equals("testVal3=anything"));
+  }
+}
+

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestValidationUtils.java 41(+41 -0)

diff --git a/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestValidationUtils.java b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestValidationUtils.java
new file mode 100644
index 0000000..a5fbfaf
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestValidationUtils.java
@@ -0,0 +1,41 @@
+package azkaban.jobtype;
+
+import org.junit.Test;
+
+import azkaban.jobtype.javautils.ValidationUtils;
+import azkaban.utils.Props;
+import azkaban.utils.UndefinedPropertyException;
+
+public class TestValidationUtils {
+
+  private static final Props PROPS = new Props();
+  static {
+    PROPS.put("a", "a");
+    PROPS.put("b", "b");
+    PROPS.put("c", "c");
+    PROPS.put("d", "d");
+  }
+
+  @Test
+  public void testAllExistSucess() {
+    String[] keys = {"a", "b", "c", "d"};
+    ValidationUtils.validateAllOrNone(PROPS, keys);
+    ValidationUtils.validateAllNotEmpty(PROPS, keys);
+  }
+
+  @Test(expected=UndefinedPropertyException.class)
+  public void testAllExistFail() {
+    ValidationUtils.validateAllNotEmpty(PROPS, "x", "y");
+  }
+
+  @Test(expected=UndefinedPropertyException.class)
+  public void testAllExistFail2() {
+    ValidationUtils.validateAllNotEmpty(PROPS, "a", "y");
+  }
+
+  @Test
+  public void testNoneExistSuccess() {
+    ValidationUtils.validateAllOrNone(PROPS, "z");
+    ValidationUtils.validateAllOrNone(PROPS, "e", "f", "g");
+  }
+}

az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestWhitelist.java 117(+117 -0)

diff --git a/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestWhitelist.java b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestWhitelist.java
new file mode 100644
index 0000000..a615b5c
--- /dev/null
+++ b/az-hadoop-jobtype-plugin/src/test/java/azkaban/jobtype/TestWhitelist.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2016 LinkedIn Corp. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
+ * this file except in compliance with the License. You may obtain a copy of the
+ * License at  http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied.
+ */
+package azkaban.jobtype;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import azkaban.flow.CommonJobProperties;
+import azkaban.jobtype.javautils.Whitelist;
+import azkaban.utils.Props;
+
+public class TestWhitelist {
+  private static final String PROXY_USER_KEY = "user.to.proxy";
+  private String[] whitelisted = {"whitelisted_1", "whitelisted_2"};
+  private File temp;
+  private Whitelist whitelist;
+
+  @Before
+  @SuppressWarnings("DefaultCharset")
+  public void setup() throws IOException, URISyntaxException {
+    temp = File.createTempFile(TestWhitelist.class.getSimpleName(), null);
+    temp.deleteOnExit();
+
+    try (BufferedWriter bw = new BufferedWriter(new FileWriter(temp))) {
+      for (String s : whitelisted) {
+        bw.write(s);
+        bw.newLine();
+      }
+    }
+
+    FileSystem fs = FileSystem.get(new URI("file:///"), new Configuration());
+    whitelist = new Whitelist(temp.getAbsolutePath(), fs);
+  }
+
+  @After
+  public void cleanup() {
+    if (temp != null) {
+      temp.delete();
+    }
+  }
+
+  @Test
+  public void testWhiteListed() throws IOException, URISyntaxException {
+    for (String s : whitelisted) {
+      whitelist.validateWhitelisted(s);
+
+      Props props = new Props();
+      props.put(PROXY_USER_KEY, s);
+      whitelist.validateWhitelisted(props);
+
+      props = new Props();
+      props.put(CommonJobProperties.SUBMIT_USER, s);
+      whitelist.validateWhitelisted(props);
+    }
+  }
+
+  @Test
+  public void testNotWhiteListed() throws IOException, URISyntaxException {
+
+    String id = "not_white_listed";
+    try {
+      whitelist.validateWhitelisted(id);
+      Assert.fail("Should throw UnsupportedOperationException");
+    } catch (Exception e) {
+      Assert.assertTrue(e instanceof UnsupportedOperationException);
+    }
+  }
+
+  @Test
+  public void testProxyUserWhitelisted() throws IOException, URISyntaxException {
+    String notAuthorized = "not_white_listed";
+
+    for (String s : whitelisted) {
+      Props props = new Props();
+      props.put(PROXY_USER_KEY, s);
+      props.put(CommonJobProperties.SUBMIT_USER, notAuthorized);
+      whitelist.validateWhitelisted(props);
+    }
+  }
+
+  @Test
+  public void testProxyUserNotAuthorized() throws IOException, URISyntaxException {
+    String notAuthorized = "not_white_listed";
+
+    for (String authorized : whitelisted) {
+      Props props = new Props();
+      props.put(PROXY_USER_KEY, notAuthorized);
+      props.put(CommonJobProperties.SUBMIT_USER, authorized);
+      try {
+        whitelist.validateWhitelisted(props);
+        Assert.fail("Should throw UnsupportedOperationException");
+      } catch (Exception e) {
+        Assert.assertTrue(e instanceof UnsupportedOperationException);
+      }
+    }
+  }
+}

az-jobtype-plugin/src/examples/pig-wc/src/wordcountpig.pig 10(+10 -0)

diff --git a/az-jobtype-plugin/src/examples/pig-wc/src/wordcountpig.pig b/az-jobtype-plugin/src/examples/pig-wc/src/wordcountpig.pig
new file mode 100644
index 0000000..dabbfa1
--- /dev/null
+++ b/az-jobtype-plugin/src/examples/pig-wc/src/wordcountpig.pig
@@ -0,0 +1,10 @@
+RMF -skipTrash $inData;
+RMF -skipTrash $outData;
+copyFromLocal $inDataLocal $inData;
+
+A = load '$inData';
+B = foreach A generate flatten(TOKENIZE((chararray)$0)) as word;
+C = filter B by word matches '\\w+';
+D = group C by word;
+E = foreach D generate COUNT(C), group;
+store E into '$outData';

azkaban-hadoop-security-plugin/src/main/java/azkaban/security/commons/HadoopSecurityManager.java 1(+1 -0)

diff --git a/azkaban-hadoop-security-plugin/src/main/java/azkaban/security/commons/HadoopSecurityManager.java b/azkaban-hadoop-security-plugin/src/main/java/azkaban/security/commons/HadoopSecurityManager.java
index 226b1d0..68b7475 100644
--- a/azkaban-hadoop-security-plugin/src/main/java/azkaban/security/commons/HadoopSecurityManager.java
+++ b/azkaban-hadoop-security-plugin/src/main/java/azkaban/security/commons/HadoopSecurityManager.java
@@ -30,6 +30,7 @@ public abstract class HadoopSecurityManager {
 
   public static final String PROXY_KEYTAB_LOCATION = "proxy.keytab.location";
   public static final String PROXY_USER = "proxy.user";
+  public static final String USER_TO_PROXY = "user.to.proxy";
   public static final String OBTAIN_BINARY_TOKEN = "obtain.binary.token";
   public static final String MAPREDUCE_JOB_CREDENTIALS_BINARY =
       "mapreduce.job.credentials.binary";

build.gradle 4(+3 -1)

diff --git a/build.gradle b/build.gradle
index 9f33f3f..e9a5cbb 100644
--- a/build.gradle
+++ b/build.gradle
@@ -54,7 +54,7 @@ allprojects {
 ext.versions = [
         hadoop: '2.6.1',
         hive  : '1.1.0',
-        pig   : '0.15.0',
+        pig   : '0.11.0',
         restli: '1.15.7',
         slf4j : '1.7.18',
 ]
@@ -64,6 +64,7 @@ ext.deps = [
         assertj             : 'org.assertj:assertj-core:3.8.0',
         awaitility          : 'org.awaitility:awaitility:3.0.0',
         collections         : 'commons-collections:commons-collections:3.2.2',
+        commonsCli          : 'commons-cli:commons-cli:1.3.1',
         commonsLang         : 'commons-lang:commons-lang:2.6',
         commonsCompress     : 'org.apache.commons:commons-compress:1.16.1',
         dbcp2               : 'org.apache.commons:commons-dbcp2:2.1.1',
@@ -108,6 +109,7 @@ ext.deps = [
         restliTools         : 'com.linkedin.pegasus:restli-tools:' + versions.restli,
         slf4j               : 'org.slf4j:slf4j-api:' + versions.slf4j,
         slf4jLog4j          : 'org.slf4j:slf4j-log4j12:' + versions.slf4j,
+        sparkCore           : 'org.apache.spark:spark-core_2.10:1.4.0',
         snakeyaml           : 'org.yaml:snakeyaml:1.18',
         velocity            : 'org.apache.velocity:velocity:1.7',
         velocityTools       : 'org.apache.velocity:velocity-tools:2.0',

settings.gradle 2(+1 -1)

diff --git a/settings.gradle b/settings.gradle
index def0782..a0d63cf 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -28,4 +28,4 @@ include 'azkaban-web-server'
 include 'az-flow-trigger-dependency-plugin'
 include 'test'
 include 'az-reportal'
-
+include 'az-hadoop-jobtype-plugin'