GoogleContainerTools · tejal29 · Nov 15, 2021 · Nov 11, 2021 · Nov 12, 2021 · Nov 12, 2021
@@ -980,6 +980,7 @@ For Cancelled Error code, use range 800 to 850.<br>
 | STATUSCHECK_UNKNOWN_UNSCHEDULABLE | 502 | Container is unschedulable due to unknown reasons |
 | STATUSCHECK_CONTAINER_WAITING_UNKNOWN | 503 | Container is waiting due to unknown reason |
 | STATUSCHECK_UNKNOWN_EVENT | 509 | Container event reason unknown |
+| STATUSCHECK_INTERNAL_ERROR | 514 | Status Check internal error |
 | DEPLOY_UNKNOWN | 504 | Deploy failed due to unknown reason |
 | SYNC_UNKNOWN | 505 | SYNC failed due to known reason |
 | BUILD_UNKNOWN | 506 | Build failed due to unknown reason |

@@ -67,9 +67,10 @@ const (
 )
 
 type counter struct {
-	total   int
-	pending int32
-	failed  int32
+	total     int
+	pending   int32
+	failed    int32
+	cancelled int32
 }
 
 type Config interface {
@@ -220,17 +221,19 @@ func (s *monitor) statusCheck(ctx context.Context, out io.Writer) (proto.StatusC
 
 	ctx, cancel := context.WithCancel(ctx)
 	defer cancel()
+	var exitStatusCode proto.StatusCode
 
 	for _, d := range resources {
 		wg.Add(1)
 		go func(r *resource.Resource) {
 			defer wg.Done()
 			// keep updating the resource status until it fails/succeeds/times out
 			pollResourceStatus(ctx, s.cfg, r)
-			rcCopy := c.markProcessed(r.Status().Error())
+			rcCopy := c.markProcessed(ctx, r.StatusCode())
 			s.printStatusCheckSummary(out, r, rcCopy)
-			// if one deployment fails, cancel status checks for all deployments.
-			if r.Status().Error() != nil && r.StatusCode() != proto.StatusCode_STATUSCHECK_USER_CANCELLED {
+			// if one resource fails, cancel status checks for all resources.
+			if resourceFailed(r.StatusCode()) {
+				exitStatusCode = r.StatusCode()
 				cancel()
 			}
 		}(d)
@@ -243,8 +246,7 @@ func (s *monitor) statusCheck(ctx context.Context, out io.Writer) (proto.StatusC
 
 	// Wait for all deployment statuses to be fetched
 	wg.Wait()
-	cancel()
-	return getSkaffoldDeployStatus(c, resources)
+	return getSkaffoldDeployStatus(c, exitStatusCode)
 }
 
 func getStandalonePods(ctx context.Context, client kubernetes.Interface, ns string, l *label.DefaultLabeller, deadlineDuration time.Duration) ([]*resource.Resource, error) {
@@ -382,18 +384,21 @@ func pollResourceStatus(ctx context.Context, cfg kubectl.Config, r *resource.Res
 	}
 }
 
-func getSkaffoldDeployStatus(c *counter, rs []*resource.Resource) (proto.StatusCode, error) {
+func getSkaffoldDeployStatus(c *counter, sc proto.StatusCode) (proto.StatusCode, error) {
+	// return overall code cancelled if status check for all resources was cancelled
+	if int(c.cancelled) == c.total && c.total > 0 {
+		return proto.StatusCode_STATUSCHECK_USER_CANCELLED, fmt.Errorf("status check cancelled")
+	}
+	// return success if no failures find.
 	if c.failed == 0 {
 		return proto.StatusCode_STATUSCHECK_SUCCESS, nil
 	}
+	// construct an error message and return appropriate error code
 	err := fmt.Errorf("%d/%d deployment(s) failed", c.failed, c.total)
-	for _, r := range rs {
-		if r.StatusCode() != proto.StatusCode_STATUSCHECK_SUCCESS &&
-			r.StatusCode() != proto.StatusCode_STATUSCHECK_USER_CANCELLED {
-			return r.StatusCode(), err
-		}
+	if sc == proto.StatusCode_STATUSCHECK_SUCCESS || sc == 0 {
+		return proto.StatusCode_STATUSCHECK_INTERNAL_ERROR, err
 	}
-	return proto.StatusCode_STATUSCHECK_USER_CANCELLED, err
+	return sc, err
 }
 
 func getDeadline(d int) time.Duration {
@@ -483,8 +488,12 @@ func newCounter(i int) *counter {
 	}
 }
 
-func (c *counter) markProcessed(err error) counter {
-	if err != nil && err != context.Canceled {
+func (c *counter) markProcessed(ctx context.Context, sc proto.StatusCode) counter {
+	if resourceCancelled(sc) {
+		log.Entry(ctx).Debug("marking resource status check cancelled", sc)
+		atomic.AddInt32(&c.cancelled, 1)
+	} else if resourceFailed(sc) {
+		log.Entry(ctx).Debugf("marking resource failed due to error code %s", sc)
 		atomic.AddInt32(&c.failed, 1)
 	}
 	atomic.AddInt32(&c.pending, -1)
@@ -493,12 +502,21 @@ func (c *counter) markProcessed(err error) counter {
 
 func (c *counter) copy() counter {
 	return counter{
-		total:   c.total,
-		pending: c.pending,
-		failed:  c.failed,
+		total:     c.total,
+		pending:   c.pending,
+		failed:    c.failed,
+		cancelled: c.cancelled,
 	}
 }
 
+func resourceFailed(sc proto.StatusCode) bool {
+	return sc != proto.StatusCode_STATUSCHECK_SUCCESS && sc != proto.StatusCode_STATUSCHECK_USER_CANCELLED
+}
+
+func resourceCancelled(sc proto.StatusCode) bool {
+	return sc == proto.StatusCode_STATUSCHECK_USER_CANCELLED
+}
+
 type NoopMonitor struct {
 	status.NoopMonitor
 }

@@ -230,97 +230,82 @@ func TestGetDeployments(t *testing.T) {
 	}
 }
 
-func TestGetDeployStatus(t *testing.T) {
+func TestExitErrorMessage(t *testing.T) {
 	tests := []struct {
 		description  string
 		counter      *counter
-		deployments  []*resource.Resource
+		sc           proto.StatusCode
 		expected     string
 		expectedCode proto.StatusCode
 		shouldErr    bool
 	}{
 		{
-			description: "one error",
-			counter:     &counter{total: 2, failed: 1},
-			deployments: []*resource.Resource{
-				resource.NewResource("foo", resource.ResourceTypes.Deployment, "test", time.Second).
-					WithPodStatuses([]proto.StatusCode{proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE}),
-			},
+			description:  "one error",
+			counter:      &counter{total: 2, failed: 1},
 			expected:     "1/2 deployment(s) failed",
-			expectedCode: proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE,
+			sc:           proto.StatusCode_STATUSCHECK_POD_INITIALIZING,
+			expectedCode: proto.StatusCode_STATUSCHECK_POD_INITIALIZING,
 			shouldErr:    true,
 		},
 		{
-			description: "no error",
-			counter:     &counter{total: 2},
-			deployments: []*resource.Resource{
-				withStatus(
-					resource.NewResource("r1", resource.ResourceTypes.Deployment, "test", 1),
-					&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS},
-				),
-				withStatus(
-					resource.NewResource("r2", resource.ResourceTypes.Deployment, "test", 1),
-					&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS},
-				),
-			},
+			description:  "no error",
+			sc:           proto.StatusCode_STATUSCHECK_SUCCESS,
+			expectedCode: proto.StatusCode_STATUSCHECK_SUCCESS,
+			counter:      &counter{total: 2},
 		},
 		{
-			description: "multiple errors",
-			counter:     &counter{total: 3, failed: 2},
-			expected:    "2/3 deployment(s) failed",
-			deployments: []*resource.Resource{
-				resource.NewResource("foo", resource.ResourceTypes.Deployment, "test", time.Second).
-					WithPodStatuses([]proto.StatusCode{proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE}),
-			},
-			expectedCode: proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE,
+			description:  "multiple errors",
+			counter:      &counter{total: 3, failed: 2},
+			expected:     "2/3 deployment(s) failed",
+			sc:           proto.StatusCode_STATUSCHECK_CONFIG_CONNECTOR_FAILED,
+			expectedCode: proto.StatusCode_STATUSCHECK_CONFIG_CONNECTOR_FAILED,
 			shouldErr:    true,
 		},
 		{
-			description: "0 deployments",
-			counter:     &counter{},
+			description:  "0 deployments",
+			counter:      &counter{total: 0},
+			expectedCode: proto.StatusCode_STATUSCHECK_SUCCESS,
 		},
 		{
-			description: "unable to retrieve pods for deployment",
-			counter:     &counter{total: 1, failed: 1},
-			deployments: []*resource.Resource{
-				withStatus(
-					resource.NewResource("deployment", resource.ResourceTypes.Deployment, "test", 1),
-					&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_DEPLOYMENT_FETCH_ERR},
-				),
-			},
-			shouldErr:    true,
+			description:  "unable to retrieve pods for deployment",
+			counter:      &counter{total: 1, failed: 1},
+			sc:           proto.StatusCode_STATUSCHECK_DEPLOYMENT_FETCH_ERR,
 			expectedCode: proto.StatusCode_STATUSCHECK_DEPLOYMENT_FETCH_ERR,
+			shouldErr:    true,
 		},
 		{
-			description: "one deployment failed and others cancelled and or succeeded",
-			counter:     &counter{total: 3, failed: 2},
-			deployments: []*resource.Resource{
-				withStatus(
-					resource.NewResource("deployment-cancelled", resource.ResourceTypes.Deployment, "test", 1),
-					&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_USER_CANCELLED},
-				),
-				withStatus(
-					resource.NewResource("deployment-success", resource.ResourceTypes.Deployment, "test", 1),
-					&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_SUCCESS},
-				),
-				withStatus(
-					resource.NewResource("deployment", resource.ResourceTypes.Deployment, "test", 1),
-					&proto.ActionableErr{ErrCode: proto.StatusCode_STATUSCHECK_DEPLOYMENT_FETCH_ERR},
-				),
-			},
+			description:  "one deployment failed and others cancelled and or succeeded",
+			counter:      &counter{total: 3, failed: 2},
+			sc:           proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE,
+			expectedCode: proto.StatusCode_STATUSCHECK_NODE_DISK_PRESSURE,
+			expected:     "2/3 deployment(s) failed",
+			shouldErr:    true,
+		},
+		{
+			description:  "deployments did not stabilize within deadline returns the pod error",
+			counter:      &counter{total: 1, failed: 1},
+			sc:           proto.StatusCode_STATUSCHECK_UNHEALTHY,
+			expected:     "1/1 deployment(s) failed",
+			expectedCode: proto.StatusCode_STATUSCHECK_UNHEALTHY,
+			shouldErr:    true,
+		},
+		{
+			description:  "user cancelled session",
+			counter:      &counter{total: 2, failed: 0, cancelled: 2},
+			sc:           proto.StatusCode_STATUSCHECK_USER_CANCELLED,
+			expected:     "status check cancelled",
+			expectedCode: proto.StatusCode_STATUSCHECK_USER_CANCELLED,
 			shouldErr:    true,
-			expectedCode: proto.StatusCode_STATUSCHECK_DEPLOYMENT_FETCH_ERR,
 		},
 	}
 
 	for _, test := range tests {
 		testutil.Run(t, test.description, func(t *testutil.T) {
-			testEvent.InitializeState([]latestV1.Pipeline{{}})
-			errCode, err := getSkaffoldDeployStatus(test.counter, test.deployments)
+			actual, err := getSkaffoldDeployStatus(test.counter, test.sc)
 			t.CheckError(test.shouldErr, err)
+			t.CheckDeepEqual(test.expectedCode, actual)
 			if test.shouldErr {
 				t.CheckErrorContains(test.expected, err)
-				t.CheckDeepEqual(test.expectedCode, errCode)
 			}
 		})
 	}
@@ -529,29 +514,37 @@ func TestResourceMarkProcessed(t *testing.T) {
 	tests := []struct {
 		description string
 		c           *counter
-		err         error
+		sc          proto.StatusCode
 		expected    counter
 	}{
 		{
 			description: "when deployment failed, counter is updated",
 			c:           newCounter(10),
-			err:         errors.New("some ae"),
+			sc:          proto.StatusCode_STATUSCHECK_DEADLINE_EXCEEDED,
 			expected:    counter{total: 10, failed: 1, pending: 9},
 		},
+		{
+			description: "when deployment is cancelled, failed is not updated",
+			c:           newCounter(10),
+			sc:          proto.StatusCode_STATUSCHECK_USER_CANCELLED,
+			expected:    counter{total: 10, failed: 0, pending: 9},
+		},
 		{
 			description: "when deployment is successful, counter is updated",
 			c:           newCounter(10),
+			sc:          proto.StatusCode_STATUSCHECK_SUCCESS,
 			expected:    counter{total: 10, failed: 0, pending: 9},
 		},
 		{
 			description: "counter when 1 deployment is updated correctly",
 			c:           newCounter(1),
+			sc:          proto.StatusCode_STATUSCHECK_SUCCESS,
 			expected:    counter{total: 1, failed: 0, pending: 0},
 		},
 	}
 	for _, test := range tests {
 		testutil.Run(t, test.description, func(t *testutil.T) {
-			t.CheckDeepEqual(test.expected, test.c.markProcessed(test.err), cmp.AllowUnexported(counter{}))
+			t.CheckDeepEqual(test.expected, test.c.markProcessed(context.Background(), test.sc), cmp.AllowUnexported(counter{}))
 		})
 	}
 }