From 8acf20f70bd341612d6fc06294957dc99ca1bd6c Mon Sep 17 00:00:00 2001 From: tithakka Date: Tue, 30 Jun 2026 22:58:59 -0500 Subject: [PATCH] HYPERFLEET-1306 - fix: add preStop hook and rollout strategy to API deployment --- charts/README.md | 3 +++ charts/templates/deployment.yaml | 11 +++++++++++ charts/values.yaml | 25 +++++++++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/charts/README.md b/charts/README.md index 0567b239..e4e0312d 100644 --- a/charts/README.md +++ b/charts/README.md @@ -127,6 +127,9 @@ helm install hyperfleet-api oci://REGISTRY/hyperfleet-api \ | service | object | `{"type":"ClusterIP"}` | Kubernetes Service configuration | | service.type | string | `"ClusterIP"` | Service type (`ClusterIP`, `LoadBalancer`, `NodePort`) | | resources | object | `{"limits":{"cpu":"500m","memory":"512Mi"},"requests":{"cpu":"100m","memory":"128Mi"}}` | CPU and memory resource requests and limits | +| lifecycle | object | `{"preStop":{"exec":{"command":["/bin/sh","-c","sleep 5"]}}}` | Container lifecycle hooks. Use `preStop` to delay SIGTERM during rolling updates, giving the LoadBalancer time to drain the old pod. See HYPERFLEET-1306. | +| strategy | object | Kubernetes default (25% maxUnavailable, 25% maxSurge) | Deployment rollout strategy. `maxUnavailable: 0` ensures zero-downtime during rolling updates — the old pod stays until the new one is Ready. | +| terminationGracePeriodSeconds | int | `30` | Seconds Kubernetes waits after SIGTERM before SIGKILL. Must be > preStop sleep (5s) + API server shutdown (10s) + buffer. The health server uses a separate 20s timeout for OTel cleanup. | | nodeSelector | object | `{}` | Node selector constraints for pod scheduling | | tolerations | list | `[]` | Tolerations for pod scheduling | | affinity | object | `{}` | Affinity rules for pod scheduling | diff --git a/charts/templates/deployment.yaml b/charts/templates/deployment.yaml index 4f40512b..819a14ef 100644 --- a/charts/templates/deployment.yaml +++ b/charts/templates/deployment.yaml @@ -9,6 +9,10 @@ spec: {{- if not .Values.autoscaling.enabled }} replicas: {{ .Values.replicaCount }} {{- end }} + {{- with .Values.strategy }} + strategy: + {{- toYaml . | nindent 4 }} + {{- end }} selector: matchLabels: {{- include "hyperfleet-api.selectorLabels" . | nindent 6 }} @@ -44,6 +48,9 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} serviceAccountName: {{ include "hyperfleet-api.serviceAccountName" . }} + {{- if .Values.terminationGracePeriodSeconds }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }} + {{- end }} securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} {{- if and .Values.nativeSidecars (not (semverCompare ">=1.28.0-0" .Capabilities.KubeVersion.Version)) }} @@ -151,6 +158,10 @@ spec: failureThreshold: 3 resources: {{- toYaml .Values.resources | nindent 10 }} + {{- with .Values.lifecycle }} + lifecycle: + {{- toYaml . | nindent 10 }} + {{- end }} volumeMounts: # ConfigMap mount - generated from values or existingConfigMap - name: config diff --git a/charts/values.yaml b/charts/values.yaml index 45b53c95..d8bd2e4b 100644 --- a/charts/values.yaml +++ b/charts/values.yaml @@ -267,6 +267,31 @@ resources: cpu: 100m memory: 128Mi +# -- Container lifecycle hooks. Use `preStop` to delay SIGTERM during +# rolling updates, giving the LoadBalancer time to drain the old pod. +# See HYPERFLEET-1306. +lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - sleep 5 + +# -- Deployment rollout strategy. `maxUnavailable: 0` ensures zero-downtime +# during rolling updates — the old pod stays until the new one is Ready. +# @default -- Kubernetes default (25% maxUnavailable, 25% maxSurge) +strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + type: RollingUpdate + +# -- Seconds Kubernetes waits after SIGTERM before SIGKILL. +# Must be > preStop sleep (5s) + API server shutdown (10s) + buffer. +# The health server uses a separate 20s timeout for OTel cleanup. +terminationGracePeriodSeconds: 30 + # -- Node selector constraints for pod scheduling nodeSelector: {}