devantler-tech · devantler · Jun 10, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
@@ -9,3 +9,4 @@ resources:
   - httproute.yaml
   - http-scaled-object.yaml
   - networkpolicy.yaml
+  - pod-disruption-budget.yaml
@@ -0,0 +1,18 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: actual-budget
+  namespace: actual-budget
+spec:
+  # maxUnavailable: 1 is the platform-wide drain-safe PDB pattern (issue #1880).
+  # The community chart exposes no PDB value, so the PDB is declared here.
+  # actual-budget is a KEDA scale-to-zero app (0/1 replicas) on an RWO SQLite
+  # PVC; at 0 replicas the PDB is inert, and at 1 it still permits the eviction
+  # (maxUnavailable is evaluated against the live replica count), so drains are
+  # never deadlocked. Selector mirrors the labels already used by the
+  # topologySpreadConstraints patch in the HelmRelease.
+  maxUnavailable: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: actualbudget
+      app.kubernetes.io/instance: actual-budget
@@ -18,4 +18,5 @@ resources:
   # canary.yaml (Gateway API weighted canary).
   - canary.yaml
   - networkpolicy.yaml
+  - pod-disruption-budget.yaml
   - provision-cronjob.yaml
@@ -0,0 +1,21 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: umami
+  namespace: umami
+spec:
+  # maxUnavailable: 1 is the platform-wide drain-safe PDB pattern (issue #1880).
+  # Umami is a Flagger target like homepage: the selector keys on
+  # app.kubernetes.io/instance (NOT /name) because Flagger's default
+  # --selector-labels rewrites app.kubernetes.io/name to "umami-umami-primary"
+  # on the primary Deployment, while instance=umami stays intact on the running
+  # pods (see canary.yaml). The disruption controller sums the scales of every
+  # controller this selector matches (umami-umami-primary + the scaled-to-0
+  # canary Deployment), so maxUnavailable: 1 permits exactly one eviction at
+  # the steady state and stays 1-at-a-time during a rollout. The
+  # provision-tenants CronJob pods carry only app=umami-provision-tenants, so
+  # they are not caught by this selector.
+  maxUnavailable: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/instance: umami
@@ -24,6 +24,17 @@ spec:
   values:
     installCRDs: true
     replicaCount: ${external_secrets_replicas:=1}
+    # Drain-safe PDB pattern (#1880/#1882): maxUnavailable: 1 never deadlocks a
+    # node drain regardless of replica count. The chart defaults the PDB shape
+    # to minAvailable: 1 (the shape validate-pdb-drain-safe flags); `null`
+    # removes that key at Helm value coalescing so the template renders only
+    # maxUnavailable. Same for the webhook and cert-controller below -- the
+    # webhook serves the CRD conversion + validation path, so losing every
+    # replica at once breaks ExternalSecret admission cluster-wide.
+    podDisruptionBudget:
+      enabled: true
+      minAvailable: null
+      maxUnavailable: 1
     topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
@@ -34,6 +45,10 @@ spec:
             app.kubernetes.io/instance: external-secrets
     webhook:
       replicaCount: ${external_secrets_replicas:=1}
+      podDisruptionBudget:
+        enabled: true
+        minAvailable: null
+        maxUnavailable: 1
       topologySpreadConstraints:
         - maxSkew: 1
           topologyKey: kubernetes.io/hostname
@@ -43,6 +58,10 @@ spec:
               app.kubernetes.io/name: external-secrets-webhook
     certController:
       replicaCount: ${external_secrets_replicas:=1}
+      podDisruptionBudget:
+        enabled: true
+        minAvailable: null
+        maxUnavailable: 1
       topologySpreadConstraints:
         - maxSkew: 1
           topologyKey: kubernetes.io/hostname

@@ -7,3 +7,4 @@ resources:
   - helm-release.yaml
   - loadtester-helm-release.yaml
   - networkpolicy.yaml
+  - pod-disruption-budget.yaml
@@ -0,0 +1,17 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: flagger
+  namespace: flagger-system
+spec:
+  # maxUnavailable: 1 is the platform-wide drain-safe PDB pattern (issue #1880).
+  # The chart's own podDisruptionBudget value only offers minAvailable (the
+  # shape validate-pdb-drain-safe flags), so the PDB is declared here instead.
+  # Flagger runs leader-elected (flagger_replicas: 3 in prod, warm standbys);
+  # bounding evictions to one pod at a time keeps a leader available while a
+  # canary analysis is in flight.
+  maxUnavailable: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: flagger
+      app.kubernetes.io/instance: flagger
@@ -4,3 +4,4 @@ kind: Kustomization
 resources:
   - helm-release.yaml
   - reference-grant.yaml
+  - pod-disruption-budget.yaml
@@ -0,0 +1,17 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: keda-add-ons-http-external-scaler
+  namespace: keda
+spec:
+  # maxUnavailable: 1 is the platform-wide drain-safe PDB pattern (issue #1880).
+  # The chart ships a PDB for the interceptor (interceptor.pdb, enabled by
+  # default) but exposes no PDB knob for the external scaler, which sits on the
+  # scale-from-zero decision path for every KEDA HTTP app: with all scaler pods
+  # gone, KEDA can't compute desired replicas and cold starts stall. Selector
+  # mirrors the scaler Deployment's stable labels (component + instance).
+  maxUnavailable: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/component: scaler
+      app.kubernetes.io/instance: keda-http-add-on
@@ -25,3 +25,14 @@ spec:
       replicaCount: ${keda_operator_replicas:=1}
     webhooks:
       replicaCount: ${keda_webhooks_replicas:=1}
+    # Drain-safe PDB pattern (#1880/#1882): maxUnavailable: 1 never deadlocks a
+    # node drain regardless of replica count. The webhooks PDB matters most:
+    # keda-admission-webhooks validates every ScaledObject/TriggerAuthentication
+    # admission, and the operator drives all scale-from-zero decisions.
+    podDisruptionBudget:
+      operator:
+        maxUnavailable: 1
+      metricServer:
+        maxUnavailable: 1
+      webhooks:
+        maxUnavailable: 1
@@ -81,6 +81,18 @@ spec:
               app.kubernetes.io/instance: kyverno
     backgroundController:
       replicas: ${kyverno_background_replicas:=1}
+      # The chart auto-enables this controller's PDB whenever replicas > 1 and
+      # defaults it to minAvailable: 1 -- the exact shape validate-pdb-drain-safe
+      # flags. Pin the drain-safe house pattern (#1880/#1882) instead. The
+      # explicit `minAvailable: null` is REQUIRED: Helm deep-merges these values
+      # onto the chart defaults, so setting only maxUnavailable leaves the
+      # default minAvailable: 1 in place and the chart's kyverno.pdb.spec helper
+      # hard-fails the whole upgrade ("Cannot set both .minAvailable and
+      # .maxUnavailable"); null deletes the default key at value coalescing.
+      # At the local/CI default of 1 replica the chart renders no PDB at all.
+      podDisruptionBudget:
+        minAvailable: null
+        maxUnavailable: 1
       podSecurityContext:
         runAsNonRoot: true
         seccompProfile:

@@ -4,3 +4,4 @@ kind: Kustomization
 resources:
   - helm-repository.yaml
   - helm-release.yaml
+  - pod-disruption-budget.yaml
@@ -0,0 +1,16 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: snapshot-controller
+  namespace: kube-system
+spec:
+  # maxUnavailable: 1 is the platform-wide drain-safe PDB pattern (issue #1880).
+  # The piraeus chart exposes no PDB value, so the PDB is declared here. The
+  # snapshot-controller is leader-elected (snapshot_controller_replicas: 3 in
+  # prod); keeping a leader available means Velero's nightly CSI snapshot
+  # backups don't hang if a drain lands mid-backup.
+  maxUnavailable: 1
+  selector:
+    matchLabels:
+      app.kubernetes.io/name: snapshot-controller
+      app.kubernetes.io/instance: snapshot-controller