Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions controllers/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,11 @@ var (
InstantiateTemplateValues = instantiateTemplateValues
FetchClusterObjects = fetchClusterObjects

IsCluterSummaryProvisioned = isCluterSummaryProvisioned
IsNamespaced = isNamespaced
StringifyMap = stringifyMap
ParseMapFromString = parseMapFromString
IsCluterSummaryProvisioned = isCluterSummaryProvisioned
IsNamespaced = isNamespaced
RemoveStaleDriftDetectionResources = removeStaleDriftDetectionResources
StringifyMap = stringifyMap
ParseMapFromString = parseMapFromString

GetTemplateResourceRefHash = getTemplateResourceRefHash
)
Expand Down
65 changes: 42 additions & 23 deletions controllers/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,32 +351,41 @@ func removeStaleDriftDetectionResources(ctx context.Context, logger logr.Logger)
},
}

for {
time.Sleep(time.Minute)

c := getManagementClusterClient()
driftDetectionDeployments := &appsv1.DeploymentList{}
err := c.List(ctx, driftDetectionDeployments, listOptions...)
if err != nil {
logger.V(logs.LogInfo).Info(fmt.Sprintf("failed to collect driftDetection deployment: %v", err))
continue
}
ticker := time.NewTicker(time.Minute)
defer ticker.Stop()

for i := range driftDetectionDeployments.Items {
depl := &driftDetectionDeployments.Items[i]
for {
select {
case <-ctx.Done():
logger.V(logs.LogInfo).Info("stopping stale drift detection resource cleanup: context canceled")
return
case <-ticker.C:
c := getManagementClusterClient()
driftDetectionDeployments := &appsv1.DeploymentList{}
err := c.List(ctx, driftDetectionDeployments, listOptions...)
if err != nil {
logger.V(logs.LogInfo).Info(fmt.Sprintf("failed to collect driftDetection deployment: %v", err))
continue
}

exist, clusterNs, clusterName, clusterType := deplAssociatedClusterExist(ctx, c, depl, logger)
if !exist {
// find resourceSummaries from this cluster and remove those.
// Remove deployment only after this one succeed
err = removeStaleResourceSummary(ctx, clusterNs, clusterName, clusterType, logger)
if err != nil {
continue
for i := range driftDetectionDeployments.Items {
depl := &driftDetectionDeployments.Items[i]

exist, clusterNs, clusterName, clusterType := deplAssociatedClusterExist(ctx, c, depl, logger)
if !exist {
// find resourceSummaries from this cluster and remove those.
// Remove deployment only after this one succeed
// Do not change the order. First delete ResourceSummary instances. Till ResourceSummary
// instances are found, do not delete the drift-detection-manager deployment
err = removeStaleResourceSummary(ctx, clusterNs, clusterName, clusterType, logger)
if err != nil {
continue
}

logger.V(logs.LogInfo).Info(fmt.Sprintf("deleting driftDetection deployment %s/%s",
depl.Namespace, depl.Name))
_ = c.Delete(ctx, depl)
}

logger.V(logs.LogInfo).Info(fmt.Sprintf("deleting driftDetection deployment %s/%s",
depl.Namespace, depl.Name))
_ = c.Delete(ctx, depl)
}
}
}
Expand Down Expand Up @@ -421,6 +430,16 @@ func removeStaleResourceSummary(ctx context.Context, clusterNamespace, clusterNa
}
}

if len(resourceSummaries.Items) > 0 {
// The drift-detection-manager adds a finalizer to ResourceSummary instances and removes
// it only after a successful deletion. If ResourceSummary instances still exist, we return
// an error to prevent the drift-detection-manager deployment from being deleted, ensuring
// it has sufficient time to complete the cleanup. Otherwise, we risk leaving behind stale
// ResourceSummaries or, worse, deleted instances stuck with a finalizer that have no manager
// left to clean them up.
return fmt.Errorf("resourceSummary instances still present")
}

return nil
}

Expand Down
121 changes: 117 additions & 4 deletions controllers/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import (
"github.com/projectsveltos/addon-controller/lib/clusterops"
libsveltosv1beta1 "github.com/projectsveltos/libsveltos/api/v1beta1"
"github.com/projectsveltos/libsveltos/lib/k8s_utils"
"github.com/projectsveltos/libsveltos/lib/sveltos_upgrade"
)

const (
Expand All @@ -55,7 +56,6 @@ const (

const (
upstreamClusterNamePrefix = "upstream-cluster"
upstreamMachineNamePrefix = "upstream-machine"
clusterProfileNamePrefix = "cluster-profile"
)

Expand Down Expand Up @@ -500,14 +500,127 @@ metadata:

currentRS := &libsveltosv1beta1.ResourceSummary{}

Expect(controllers.RemoveStaleResourceSummary(context.TODO(), clusterNamespace, clusterName,
clusterType, logger)).To(Succeed())
err = controllers.RemoveStaleResourceSummary(context.TODO(), clusterNamespace, clusterName,
clusterType, logger)
Expect(err).ToNot(BeNil())
Expect(err.Error()).To(ContainSubstring("resourceSummary instances still present"))

Eventually(func() bool {
err = testEnv.Get(context.TODO(),
types.NamespacedName{Namespace: u.GetNamespace(), Name: u.GetName()}, currentRS)
if err == nil {
return false
return !currentRS.DeletionTimestamp.IsZero()
}
return apierrors.IsNotFound(err)
}, timeout, pollingInterval).Should(BeTrue())
})

It("removeStaleDriftDetectionResources deletes drift-detection-manager after all ResourceSummaries are gone",
func(ctx SpecContext) {
namespace := randomString()
clusterName := randomString()

ns := &corev1.Namespace{
ObjectMeta: metav1.ObjectMeta{
Name: namespace,
},
}
Expect(testEnv.Create(context.TODO(), ns)).To(Succeed())
Expect(waitForObject(context.TODO(), testEnv, ns)).To(Succeed())

resourceSummary := &libsveltosv1beta1.ResourceSummary{
ObjectMeta: metav1.ObjectMeta{
Name: randomString(),
Namespace: namespace,
Labels: map[string]string{
sveltos_upgrade.ClusterNameLabel: clusterName,
sveltos_upgrade.ClusterTypeLabel: strings.ToLower(string(libsveltosv1beta1.ClusterTypeSveltos)),
},
},
}

Expect(testEnv.Create(context.TODO(), resourceSummary)).To(Succeed())
Expect(waitForObject(context.TODO(), testEnv, resourceSummary)).To(Succeed())

lbls := map[string]string{
"cluster-namespace": namespace,
"cluster-name": clusterName,
"cluster-type": strings.ToLower(string(libsveltosv1beta1.ClusterTypeSveltos)),
"feature": "drift-detection",
}

var replicas int32 = 1
driftDetectionManager := &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{
Name: randomString(),
Namespace: "projectsveltos",
Labels: lbls,
},
Spec: appsv1.DeploymentSpec{
Selector: &metav1.LabelSelector{
MatchLabels: lbls,
},
Replicas: &replicas,
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Labels: lbls,
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: randomString(),
Image: randomString(),
ImagePullPolicy: corev1.PullAlways,
},
},
ServiceAccountName: randomString(),
},
},
},
}
Expect(testEnv.Create(context.TODO(), driftDetectionManager)).To(Succeed())
Expect(waitForObject(context.TODO(), testEnv, driftDetectionManager)).To(Succeed())

logger := textlogger.NewLogger(textlogger.NewConfig())
go controllers.RemoveStaleDriftDetectionResources(ctx, logger)

// RemoveStaleDriftDetectionResources sleeps for a minute
time.Sleep(time.Minute)

// Because ResourceSummary exists, deployment is not deleted
Consistently(func() bool {
currentDeployment := &appsv1.Deployment{}
err := testEnv.Get(context.TODO(),
types.NamespacedName{Namespace: driftDetectionManager.GetNamespace(), Name: driftDetectionManager.GetName()},
currentDeployment)
return err == nil
}, timeout, pollingInterval).Should(BeTrue())

currentResourceSummary := &libsveltosv1beta1.ResourceSummary{}
Expect(testEnv.Get(context.TODO(), types.NamespacedName{Namespace: resourceSummary.Namespace, Name: resourceSummary.Name},
currentResourceSummary)).To(Succeed())
currentResourceSummary.Finalizers = nil
Expect(testEnv.Delete(context.TODO(), currentResourceSummary)).To(Succeed())

Eventually(func() bool {
err := testEnv.Get(context.TODO(),
types.NamespacedName{Namespace: resourceSummary.Namespace, Name: resourceSummary.Name},
currentResourceSummary)
return err != nil &&
apierrors.IsNotFound(err)
}, timeout, pollingInterval).Should(BeTrue())

// RemoveStaleDriftDetectionResources sleeps for a minute
time.Sleep(time.Minute)

// Because ResourceSummary is gone, deployment is deleted
Eventually(func() bool {
currentDeployment := &appsv1.Deployment{}
err := testEnv.Get(context.TODO(),
types.NamespacedName{Namespace: driftDetectionManager.GetNamespace(), Name: driftDetectionManager.GetName()},
currentDeployment)
if err == nil {
return !currentDeployment.DeletionTimestamp.IsZero()
}
return apierrors.IsNotFound(err)
}, timeout, pollingInterval).Should(BeTrue())
Expand Down