azkaban-aplcache

Bring back "New status: KILLING (#1172)" (#1509) * Bring

9/29/2017 1:38:21 PM

Details

diff --git a/azkaban-common/src/main/java/azkaban/executor/ExecutorManager.java b/azkaban-common/src/main/java/azkaban/executor/ExecutorManager.java
index 0942600..50f238f 100644
--- a/azkaban-common/src/main/java/azkaban/executor/ExecutorManager.java
+++ b/azkaban-common/src/main/java/azkaban/executor/ExecutorManager.java
@@ -1206,7 +1206,7 @@ public class ExecutorManager extends EventHandler implements
           continue;
           // case UNKNOWN:
         case READY:
-          node.setStatus(Status.KILLED);
+          node.setStatus(Status.KILLING);
           break;
         default:
           node.setStatus(Status.FAILED);
diff --git a/azkaban-common/src/main/java/azkaban/executor/Status.java b/azkaban-common/src/main/java/azkaban/executor/Status.java
index e8ce784..bd7eac3 100644
--- a/azkaban-common/src/main/java/azkaban/executor/Status.java
+++ b/azkaban-common/src/main/java/azkaban/executor/Status.java
@@ -25,6 +25,7 @@ public enum Status {
   RUNNING(30),
   PAUSED(40),
   SUCCEEDED(50),
+  KILLING(55),
   KILLED(60),
   FAILED(70),
   FAILED_FINISHING(80),
diff --git a/azkaban-common/src/main/java/azkaban/utils/WebUtils.java b/azkaban-common/src/main/java/azkaban/utils/WebUtils.java
index 2ff8e5a..e365c98 100644
--- a/azkaban-common/src/main/java/azkaban/utils/WebUtils.java
+++ b/azkaban-common/src/main/java/azkaban/utils/WebUtils.java
@@ -97,6 +97,8 @@ public class WebUtils {
         return "Paused";
       case SKIPPED:
         return "Skipped";
+      case KILLING:
+        return "Killing";
       default:
     }
     return "Unknown";
diff --git a/azkaban-common/src/test/java/azkaban/executor/InteractiveTestJob.java b/azkaban-common/src/test/java/azkaban/executor/InteractiveTestJob.java
index d2daa38..506cffd 100644
--- a/azkaban-common/src/test/java/azkaban/executor/InteractiveTestJob.java
+++ b/azkaban-common/src/test/java/azkaban/executor/InteractiveTestJob.java
@@ -34,6 +34,7 @@ public class InteractiveTestJob extends AbstractProcessJob {
   private Props generatedProperties = new Props();
   private volatile boolean isWaiting = true;
   private volatile boolean succeed = true;
+  private boolean ignoreCancel = false;
 
   public InteractiveTestJob(final String jobId, final Props sysProps, final Props jobProps,
       final Logger log) {
@@ -156,6 +157,12 @@ public class InteractiveTestJob extends AbstractProcessJob {
     }
   }
 
+  public void ignoreCancel() {
+    synchronized (this) {
+      this.ignoreCancel = true;
+    }
+  }
+
   @Override
   public Props getJobGeneratedProperties() {
     return this.generatedProperties;
@@ -164,6 +171,8 @@ public class InteractiveTestJob extends AbstractProcessJob {
   @Override
   public void cancel() throws InterruptedException {
     info("Killing job");
-    failJob();
+    if (!this.ignoreCancel) {
+      failJob();
+    }
   }
 }
diff --git a/azkaban-exec-server/src/main/java/azkaban/execapp/FlowRunner.java b/azkaban-exec-server/src/main/java/azkaban/execapp/FlowRunner.java
index 76be252..4bb2c3d 100644
--- a/azkaban-exec-server/src/main/java/azkaban/execapp/FlowRunner.java
+++ b/azkaban-exec-server/src/main/java/azkaban/execapp/FlowRunner.java
@@ -113,14 +113,13 @@ public class FlowRunner extends EventHandler implements Runnable {
 
   private String jobLogFileSize = "5MB";
   private int jobLogNumFiles = 4;
-
-  private boolean flowPaused = false;
-  private boolean flowFailed = false;
-  private boolean flowFinished = false;
-  private boolean flowKilled = false;
+  private volatile boolean flowPaused = false;
+  private volatile boolean flowFailed = false;
+  private volatile boolean flowFinished = false;
+  private volatile boolean flowKilled = false;
 
   // The following is state that will trigger a retry of all failed jobs
-  private boolean retryFailedJobs = false;
+  private volatile boolean retryFailedJobs = false;
 
   /**
    * Constructor. This will create its own ExecutorService for thread pools
@@ -468,8 +467,7 @@ public class FlowRunner extends EventHandler implements Runnable {
     // Instant kill or skip if necessary.
     boolean jobsRun = false;
     for (final ExecutableNode node : nodesToCheck) {
-      if (Status.isStatusFinished(node.getStatus())
-          || Status.isStatusRunning(node.getStatus())) {
+      if (notReadyToRun(node.getStatus())) {
         // Really shouldn't get in here.
         continue;
       }
@@ -485,6 +483,12 @@ public class FlowRunner extends EventHandler implements Runnable {
     return false;
   }
 
+  private boolean notReadyToRun(final Status status) {
+    return Status.isStatusFinished(status)
+        || Status.isStatusRunning(status)
+        || Status.KILLING == status;
+  }
+
   private boolean runReadyJob(final ExecutableNode node) throws IOException {
     if (Status.isStatusFinished(node.getStatus())
         || Status.isStatusRunning(node.getStatus())) {
@@ -547,7 +551,7 @@ public class FlowRunner extends EventHandler implements Runnable {
   }
 
   private void propagateStatus(final ExecutableFlowBase base, final Status status) {
-    if (!Status.isStatusFinished(base.getStatus())) {
+    if (!Status.isStatusFinished(base.getStatus()) && base.getStatus() != Status.KILLING) {
       this.logger.info("Setting " + base.getNestedId() + " to " + status);
       base.setStatus(status);
       if (base.getParentFlow() != null) {
@@ -574,6 +578,7 @@ public class FlowRunner extends EventHandler implements Runnable {
       final ExecutableNode node = flow.getExecutableNode(end);
 
       if (node.getStatus() == Status.KILLED
+          || node.getStatus() == Status.KILLING
           || node.getStatus() == Status.FAILED
           || node.getStatus() == Status.CANCELLED) {
         succeeded = false;
@@ -601,6 +606,11 @@ public class FlowRunner extends EventHandler implements Runnable {
             + durationSec + " seconds");
         flow.setStatus(Status.FAILED);
         break;
+      case KILLING:
+        this.logger
+            .info("Setting flow '" + id + "' status to KILLED in " + durationSec + " seconds");
+        flow.setStatus(Status.KILLED);
+        break;
       case FAILED:
       case KILLED:
       case CANCELLED:
@@ -869,7 +879,7 @@ public class FlowRunner extends EventHandler implements Runnable {
         if (this.flowFailed) {
           this.flow.setStatus(Status.FAILED_FINISHING);
         } else if (this.flowKilled) {
-          this.flow.setStatus(Status.KILLED);
+          this.flow.setStatus(Status.KILLING);
         } else {
           this.flow.setStatus(Status.RUNNING);
         }
@@ -892,7 +902,7 @@ public class FlowRunner extends EventHandler implements Runnable {
         return;
       }
       this.logger.info("Kill has been called on flow " + this.execId);
-      this.flow.setStatus(Status.KILLED);
+      this.flow.setStatus(Status.KILLING);
       // If the flow is paused, then we'll also unpause
       this.flowPaused = false;
       this.flowKilled = true;
@@ -943,6 +953,8 @@ public class FlowRunner extends EventHandler implements Runnable {
         continue;
       } else if (node.getStatus() == Status.RUNNING) {
         continue;
+      } else if (node.getStatus() == Status.KILLING) {
+        continue;
       } else if (node.getStatus() == Status.SKIPPED) {
         node.setStatus(Status.DISABLED);
         node.setEndTime(-1);
diff --git a/azkaban-exec-server/src/main/java/azkaban/execapp/JobRunner.java b/azkaban-exec-server/src/main/java/azkaban/execapp/JobRunner.java
index 85f5a4c..d570c98 100644
--- a/azkaban-exec-server/src/main/java/azkaban/execapp/JobRunner.java
+++ b/azkaban-exec-server/src/main/java/azkaban/execapp/JobRunner.java
@@ -405,7 +405,7 @@ public class JobRunner extends EventHandler implements Runnable {
         nodeStatus = changeStatus(Status.SKIPPED, time);
         quickFinish = true;
       } else if (this.isKilled()) {
-        nodeStatus = changeStatus(Status.KILLED, time);
+        nodeStatus = changeStatus(Status.KILLING, time);
         quickFinish = true;
       }
 
@@ -743,9 +743,10 @@ public class JobRunner extends EventHandler implements Runnable {
   }
 
   private Status runJob() {
-    Status finalStatus = this.node.getStatus();
+    Status finalStatus;
     try {
       this.job.run();
+      finalStatus = this.node.getStatus();
     } catch (final Throwable e) {
       synchronized (this.syncObject) {
         if (this.props.getBoolean("job.succeed.on.failure", false)) {
@@ -770,7 +771,7 @@ public class JobRunner extends EventHandler implements Runnable {
     }
 
     synchronized (this.syncObject) {
-      // If the job is still running, set the status to Success.
+      // If the job is still running (but not killed), set the status to Success.
       if (!Status.isStatusFinished(finalStatus) && !isKilled()) {
         finalStatus = changeStatus(Status.SUCCEEDED);
       }
@@ -811,6 +812,7 @@ public class JobRunner extends EventHandler implements Runnable {
         return;
       }
       logError("Kill has been called.");
+      this.changeStatus(Status.KILLING);
       this.killed = true;
 
       final BlockingStatus status = this.currentBlockStatus;
@@ -836,7 +838,6 @@ public class JobRunner extends EventHandler implements Runnable {
             "Failed trying to cancel job. Maybe it hasn't started running yet or just finished.");
       }
 
-      this.changeStatus(Status.KILLED);
     }
   }
 
diff --git a/azkaban-exec-server/src/test/java/azkaban/execapp/FlowRunnerTest.java b/azkaban-exec-server/src/test/java/azkaban/execapp/FlowRunnerTest.java
index f1cf66c..73fd90f 100644
--- a/azkaban-exec-server/src/test/java/azkaban/execapp/FlowRunnerTest.java
+++ b/azkaban-exec-server/src/test/java/azkaban/execapp/FlowRunnerTest.java
@@ -247,7 +247,12 @@ public class FlowRunnerTest extends FlowRunnerTestBase {
     assertStatus("job2", Status.SUCCEEDED);
     waitJobsStarted(this.runner, "job3", "job4", "job6");
 
+    InteractiveTestJob.getTestJob("job3").ignoreCancel();
     this.runner.kill("me");
+    assertStatus("job3", Status.KILLING);
+    assertFlowStatus(this.runner.getExecutableFlow(), Status.KILLING);
+    InteractiveTestJob.getTestJob("job3").failJob();
+
     Assert.assertTrue(this.runner.isKilled());
 
     assertStatus("job5", Status.CANCELLED);
diff --git a/azkaban-web-server/src/main/less/azkaban-graph.less b/azkaban-web-server/src/main/less/azkaban-graph.less
index 29af355..16af17a 100644
--- a/azkaban-web-server/src/main/less/azkaban-graph.less
+++ b/azkaban-web-server/src/main/less/azkaban-graph.less
@@ -94,6 +94,15 @@
   fill: #FFF;
 }
 
+.KILLING > g > rect {
+  fill: #FF9999;
+  stroke: #FF9999;
+}
+
+.KILLING > g > text {
+  fill: #FFF;
+}
+
 .CANCELLED > g > rect {
   fill: #FF9999;
   stroke: #FF9999;
diff --git a/azkaban-web-server/src/main/less/flow.less b/azkaban-web-server/src/main/less/flow.less
index 8829a14..351bf8a 100644
--- a/azkaban-web-server/src/main/less/flow.less
+++ b/azkaban-web-server/src/main/less/flow.less
@@ -65,6 +65,18 @@
     background-color: @flow-killed-color;
   }
 
+  // #ff9999 = killing vs. #3398cc = running
+  &.KILLING {
+    background-color: @flow-killing-color;
+    background-image: -webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));
+    background-image: -webkit-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);
+    background-image: -moz-linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);
+    background-image: linear-gradient(45deg, rgba(255, 255, 255, 0.15) 25%, transparent 25%, transparent 50%, rgba(255, 255, 255, 0.15) 50%, rgba(255, 255, 255, 0.15) 75%, transparent 75%, transparent);
+    background-size: 40px 40px;
+    -webkit-animation: progress-bar-stripes 2s linear infinite;
+            animation: progress-bar-stripes 2s linear infinite;
+  }
+
   &.RUNNING {
     background-color: @flow-running-color;
     background-image: -webkit-gradient(linear, 0 100%, 100% 0, color-stop(0.25, rgba(255, 255, 255, 0.15)), color-stop(0.25, transparent), color-stop(0.5, transparent), color-stop(0.5, rgba(255, 255, 255, 0.15)), color-stop(0.75, rgba(255, 255, 255, 0.15)), color-stop(0.75, transparent), to(transparent));
@@ -119,6 +131,10 @@ td {
       background-color: @flow-killed-color;
     }
 
+    &.KILLING {
+      background-color: @flow-killing-color;
+    }
+
     &.PAUSED {
       background-color: @flow-paused-color;
     }
@@ -177,6 +193,10 @@ td {
     color: @flow-killed-color;
   }
 
+  &.KILLING {
+    color: @flow-killing-color;
+  }
+
   &.CANCELLED {
     color: @flow-cancelled-color;
   }
@@ -322,6 +342,10 @@ li.tree-list-item {
       background-position: 0px 0px;
     }
 
+    &.KILLING .icon {
+      background-position: 0px 0px;
+    }
+
     &.CANCELLED .icon {
       background-position: 0px 0px;
       opacity: 0.5;
diff --git a/azkaban-web-server/src/main/less/variables.less b/azkaban-web-server/src/main/less/variables.less
index 2b7c694..15edc23 100644
--- a/azkaban-web-server/src/main/less/variables.less
+++ b/azkaban-web-server/src/main/less/variables.less
@@ -2,6 +2,7 @@
 @flow-succeeded-color: #5cb85c;
 @flow-failed-color: #d9534f;
 @flow-killed-color: #d9534f;
+@flow-killing-color: #ff9999;
 @flow-paused-color: #c82123;
 @flow-running-color: #3398cc;
 @flow-failed-finishing-color: #f19153;
diff --git a/azkaban-web-server/src/main/resources/azkaban/webapp/servlet/velocity/historypage.vm b/azkaban-web-server/src/main/resources/azkaban/webapp/servlet/velocity/historypage.vm
index 752cb1f..3147a5b 100644
--- a/azkaban-web-server/src/main/resources/azkaban/webapp/servlet/velocity/historypage.vm
+++ b/azkaban-web-server/src/main/resources/azkaban/webapp/servlet/velocity/historypage.vm
@@ -245,6 +245,7 @@
                     <option value=30>Running</option>
                     <option value=40>Paused</option>
                     <option value=50>Succeed</option>
+                    <option value=55>Killing</option>
                     <option value=60>Killed</option>
                     <option value=70>Failed</option>
                     <option value=80>Failed Finishing</option>
diff --git a/azkaban-web-server/src/web/js/azkaban/util/job-status.js b/azkaban-web-server/src/web/js/azkaban/util/job-status.js
index 0203d10..304d6bc 100644
--- a/azkaban-web-server/src/web/js/azkaban/util/job-status.js
+++ b/azkaban-web-server/src/web/js/azkaban/util/job-status.js
@@ -26,6 +26,7 @@ var statusStringMap = {
   "FAILED_FINISHING": "Running w/Failure",
   "RUNNING": "Running",
   "WAITING": "Waiting",
+  "KILLING": "Killing",
   "KILLED": "Killed",
   "CANCELLED": "Cancelled",
   "DISABLED": "Disabled",
diff --git a/azkaban-web-server/src/web/js/azkaban/view/exflow.js b/azkaban-web-server/src/web/js/azkaban/view/exflow.js
index 4b0d4fd..cae3c45 100644
--- a/azkaban-web-server/src/web/js/azkaban/view/exflow.js
+++ b/azkaban-web-server/src/web/js/azkaban/view/exflow.js
@@ -206,6 +206,8 @@ azkaban.FlowTabView = Backbone.View.extend({
     else if (data.status == "KILLED") {
       $("#executebtn").show();
     }
+    else if (data.status == "KILLING") {
+    }
   },
 
   handleCancelClick: function (evt) {
@@ -457,6 +459,12 @@ var updaterFunction = function () {
         updaterFunction();
       }, 2 * 60 * 1000);
     }
+    else if (data.status == "KILLING") {
+      // 30 s updates - should finish soon now
+      setTimeout(function () {
+        updaterFunction();
+      }, 30 * 1000);
+    }
     else if (data.status != "SUCCEEDED" && data.status != "FAILED") {
       // 2 min updates
       setTimeout(function () {
diff --git a/azkaban-web-server/src/web/js/azkaban/view/time-graph.js b/azkaban-web-server/src/web/js/azkaban/view/time-graph.js
index fe8a2b6..26e99b6 100644
--- a/azkaban-web-server/src/web/js/azkaban/view/time-graph.js
+++ b/azkaban-web-server/src/web/js/azkaban/view/time-graph.js
@@ -93,6 +93,9 @@ azkaban.TimeGraphView = Backbone.View.extend({
       else if (status == 'PAUSED') {
         return '#c92123';
       }
+      else if (status == 'KILLING') {
+        return '#ff9999';
+      }
       else if (status == 'FAILED' ||
           status == 'FAILED_FINISHING' ||
           status == 'KILLED') {