From bd67e65b16d7f63104a9bbcddd3a048422fc6e1d Mon Sep 17 00:00:00 2001 From: Nikolai Emil Damm Date: Thu, 11 Jun 2026 18:58:07 +0200 Subject: [PATCH] fix(cluster-policies): stop flagging Longhorn's operator-managed PDBs as drain-unsafe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit longhorn-manager creates a minAvailable: 1 PDB per instance-manager that still hosts replicas or engines — and deletes it once the node is safe to drain — plus PDBs for csi-attacher/csi-provisioner. That minAvailable shape IS the operator's eviction interlock, not a chart value anyone can flip, yet validate-pdb-drain-safe Audit-flags all 15 of them on prod, permanently. The policy header even promises 'zero violations' at steady state. Exclude them the same way CNPG-managed PDBs already are; Longhorn's PDBs carry no labels or ownerReferences, so the exclusion keys on the longhorn-system namespace plus the operator's fixed names. Co-Authored-By: Claude Fable 5 --- .../best-practices/validate-pdb-drain-safe.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/k8s/bases/infrastructure/cluster-policies/best-practices/validate-pdb-drain-safe.yaml b/k8s/bases/infrastructure/cluster-policies/best-practices/validate-pdb-drain-safe.yaml index 5ab782ea8..3c0f06201 100644 --- a/k8s/bases/infrastructure/cluster-policies/best-practices/validate-pdb-drain-safe.yaml +++ b/k8s/bases/infrastructure/cluster-policies/best-practices/validate-pdb-drain-safe.yaml @@ -72,6 +72,21 @@ spec: # keeps minAvailable: 1; its drain-safety comes from the HA PR's # replica bump to 3, not maxUnavailable. - hcloud-csi-controller + # Longhorn manages these PDBs itself: longhorn-manager creates a + # minAvailable: 1 PDB per instance-manager that still hosts replicas + # or engines (and deletes it once the node is safe to drain), plus + # PDBs for its csi-attacher/csi-provisioner deployments. The + # minAvailable shape IS the operator's eviction interlock — not a + # chart value anyone can flip — so flagging them is permanent Audit + # noise (15 objects on prod). They carry no labels or ownerReferences, + # so exclude by namespace + the operator's fixed names. + - resources: + namespaces: + - longhorn-system + names: + - instance-manager-?* + - csi-attacher + - csi-provisioner validate: message: >- PodDisruptionBudget '{{ request.object.metadata.name }}' sets