Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions test/e2e/pkg/e2eutils/backup/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1"
"github.com/cloudnative-pg/cloudnative-pg/tests/utils/envsubst"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/yaml"

Expand Down Expand Up @@ -205,6 +206,73 @@ func WaitForCompleted(ctx context.Context, c client.Client, ns, name string, tim
}
}

// MarkExpired patches status.expiredAt on the Backup CR via the
// status subresource so the operator's reconcile loop treats it as
// expired and garbage-collects it. Used by the expired-backup
// cleanup spec to fast-forward expiration without waiting out the
// real retentionDays window.
//
// Trigger semantics: the status patch fires a watch event that the
// Backup controller picks up within seconds, so deletion is
// event-driven rather than bound to the 1-minute periodic requeue in
// backup_controller.go.
//
// Implementation detail: this must use c.Status().Patch — patching
// the same field through c.Patch is a silent no-op because Backup
// declares status as a subresource (see operator/src/api/preview/
// backup_types.go +kubebuilder:subresource:status).
//
// A fresh Get is performed inside the helper so callers can pass a
// stable (ns, name) without juggling resourceVersion conflicts with
// the controller's own status writers.
func MarkExpired(ctx context.Context, c client.Client, ns, name string, when time.Time) error {
if c == nil {
return errors.New("backup.MarkExpired: client must not be nil")
}
var current previewv1.Backup
if err := c.Get(ctx, client.ObjectKey{Namespace: ns, Name: name}, &current); err != nil {
return fmt.Errorf("get Backup %s/%s: %w", ns, name, err)
}
patch := client.MergeFrom(current.DeepCopy())
expired := metav1.NewTime(when)
current.Status.ExpiredAt = &expired
if err := c.Status().Patch(ctx, &current, patch); err != nil {
return fmt.Errorf("patch status.expiredAt on Backup %s/%s: %w", ns, name, err)
}
return nil
}

// WaitForBackupDeleted polls until the named Backup no longer exists
// in ns or timeout elapses. Mirrors WaitForPVCDeleted in shape and
// is used by the expired-backup cleanup spec to assert the operator
// removed the Backup CR after MarkExpired flipped status.expiredAt
// into the past.
func WaitForBackupDeleted(ctx context.Context, c client.Client, ns, name string, timeout time.Duration) error {
if c == nil {
return errors.New("backup.WaitForBackupDeleted: client must not be nil")
}
deadline := time.Now().Add(timeout)
for {
var b previewv1.Backup
err := c.Get(ctx, client.ObjectKey{Namespace: ns, Name: name}, &b)
if apierrors.IsNotFound(err) {
return nil
}
if err != nil {
return fmt.Errorf("polling Backup %s/%s: %w", ns, name, err)
}
if time.Now().After(deadline) {
return fmt.Errorf("timed out after %s waiting for Backup %s/%s to be deleted (last phase=%q)",
timeout, ns, name, b.Status.Phase)
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(DefaultPollInterval):
}
}
}

// Delete issues a foreground delete on the Backup CR and returns when
// the object is gone or timeout elapses. Safe to call on a missing
// object (IsNotFound is treated as success).
Expand Down
70 changes: 70 additions & 0 deletions test/e2e/pkg/e2eutils/backup/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,76 @@ func TestPVBelongsToClusterMatchesLabelsOrClaimPrefix(t *testing.T) {
}
}

func TestMarkExpiredPatchesStatusViaSubresource(t *testing.T) {
t.Parallel()
s := newScheme(t)
bkp := &previewv1.Backup{
ObjectMeta: metav1.ObjectMeta{Name: "b", Namespace: "ns"},
Spec: previewv1.BackupSpec{Cluster: cnpgv1.LocalObjectReference{Name: "c"}},
}
c := fakeclient.NewClientBuilder().
WithScheme(s).
WithObjects(bkp).
WithStatusSubresource(&previewv1.Backup{}).
Build()

when := time.Date(2000, 1, 2, 3, 4, 5, 0, time.UTC)
if err := MarkExpired(context.Background(), c, "ns", "b", when); err != nil {
t.Fatalf("MarkExpired: %v", err)
}

var got previewv1.Backup
if err := c.Get(context.Background(), client.ObjectKey{Namespace: "ns", Name: "b"}, &got); err != nil {
t.Fatalf("Get after MarkExpired: %v", err)
}
if got.Status.ExpiredAt == nil {
t.Fatalf("expected status.expiredAt to be set; got nil")
}
if !got.Status.ExpiredAt.Time.Equal(when) {
t.Fatalf("status.expiredAt=%v want %v", got.Status.ExpiredAt.Time, when)
}
}

func TestMarkExpiredErrorsOnNotFound(t *testing.T) {
t.Parallel()
s := newScheme(t)
c := fakeclient.NewClientBuilder().
WithScheme(s).
WithStatusSubresource(&previewv1.Backup{}).
Build()

err := MarkExpired(context.Background(), c, "ns", "missing", time.Unix(0, 0))
if err == nil {
t.Fatal("expected error for missing Backup, got nil")
}
if !strings.Contains(err.Error(), "missing") {
t.Fatalf("expected error to mention name; got %v", err)
}
}

func TestWaitForBackupDeletedReturnsWhenMissing(t *testing.T) {
t.Parallel()
s := newScheme(t)
c := fakeclient.NewClientBuilder().WithScheme(s).Build()
if err := WaitForBackupDeleted(context.Background(), c, "ns", "nope", 500*time.Millisecond); err != nil {
t.Fatalf("expected nil for missing Backup, got %v", err)
}
}

func TestWaitForBackupDeletedTimesOutWhenPresent(t *testing.T) {
t.Parallel()
s := newScheme(t)
bkp := &previewv1.Backup{
ObjectMeta: metav1.ObjectMeta{Name: "b", Namespace: "ns"},
Status: previewv1.BackupStatus{Phase: cnpgv1.BackupPhaseCompleted},
}
c := fakeclient.NewClientBuilder().WithScheme(s).WithObjects(bkp).Build()
err := WaitForBackupDeleted(context.Background(), c, "ns", "b", 500*time.Millisecond)
if err == nil || !strings.Contains(err.Error(), "timed out") {
t.Fatalf("expected timeout error, got %v", err)
}
}

func TestWaitForPVCDeletedReturnsWhenMissing(t *testing.T) {
t.Parallel()
s := newScheme(t)
Expand Down
101 changes: 101 additions & 0 deletions test/e2e/tests/backup/backup_expiration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package backup

import (
"context"
"time"

. "github.com/onsi/ginkgo/v2" //nolint:revive
. "github.com/onsi/gomega" //nolint:revive

"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/documentdb/documentdb-operator/test/e2e"
"github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/assertions"
bkp "github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/backup"
"github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/documentdb"
"github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/namespaces"
"github.com/documentdb/documentdb-operator/test/e2e/pkg/e2eutils/timeouts"
)

var _ = Describe("DocumentDB backup — expired backup cleanup",
Label(e2e.BackupLabel, e2e.NeedsCSISnapshotsLabel), e2e.MediumLevelLabel,
func() {
const (
clusterName = "backup-expire"
backupName = "backup-expire-1"
)
var (
ctx context.Context
ns string
c client.Client
)

BeforeEach(func() {
e2e.SkipUnlessLevel(e2e.Medium)
ctx = context.Background()
c = e2e.SuiteEnv().Client
skipUnlessCSISnapshotsUsable(ctx, c)
ns = namespaces.NamespaceForSpec(e2e.BackupLabel)
createNamespace(ctx, c, ns)
createCredentialSecret(ctx, c, ns)
})

It("deletes the Backup CR once status.expiredAt is in the past", func() {
dd, err := documentdb.Create(ctx, c, ns, clusterName, documentdb.CreateOptions{
Base: "documentdb",
Vars: baseVars(clusterName, ns, "2Gi"),
ManifestsRoot: manifestsRoot(),
})
Expect(err).NotTo(HaveOccurred())
DeferCleanup(func(ctx SpecContext) {
_ = documentdb.Delete(ctx, c, dd, 3*time.Minute)
})

// 1. Source cluster healthy so the backup can actually run.
key := types.NamespacedName{Namespace: ns, Name: clusterName}
Eventually(assertions.AssertDocumentDBReady(ctx, c, key),
timeouts.For(timeouts.DocumentDBReady),
timeouts.PollInterval(timeouts.DocumentDBReady),
).Should(Succeed())

// 2. Create an on-demand Backup CR and wait for Completed
// before fast-forwarding expiration. Marking an in-flight
// backup expired would race the snapshot controller and
// leak CNPG state, and the retentionDays contract is
// defined against completed backups anyway.
_, err = bkp.Create(ctx, c, bkp.BackupVars{
Name: backupName,
Namespace: ns,
ClusterName: clusterName,
RetentionDays: 1,
})
Expect(err).NotTo(HaveOccurred(), "create Backup CR %s/%s", ns, backupName)
// The expired-cleanup path makes the Backup disappear on
// its own; bkp.Delete already swallows IsNotFound so this
// cleanup stays correct even after the spec succeeds.
DeferCleanup(func(ctx SpecContext) {
_ = bkp.Delete(ctx, c, ns, backupName, 1*time.Minute)
})
_, err = bkp.WaitForCompleted(ctx, c, ns, backupName,
timeouts.For(timeouts.BackupComplete))
Expect(err).NotTo(HaveOccurred(),
"Backup %s/%s did not reach Completed", ns, backupName)

// 3. Fast-forward expiration via the status subresource.
// The next reconcile sees IsExpired() == true and deletes
// the CR — this is the behaviour the retired
// test-backup-and-restore workflow used to cover.
Expect(bkp.MarkExpired(ctx, c, ns, backupName, time.Now().Add(-1*time.Hour))).
To(Succeed(), "patch status.expiredAt on Backup %s/%s", ns, backupName)

// 4. Operator must garbage-collect the Backup CR. The
// status patch from MarkExpired fires a watch event that
// the controller picks up within seconds, so 3 minutes is
// comfortable headroom even on a loaded CI worker — and
// well clear of the 1-minute periodic requeue cadence in
// backup_controller.go.
Expect(bkp.WaitForBackupDeleted(ctx, c, ns, backupName, 3*time.Minute)).
To(Succeed(), "Backup %s/%s was not garbage-collected after expiration", ns, backupName)
})
})
Loading