From 095fe915a8e20ffb8a16384c5b755d9092e74f62 Mon Sep 17 00:00:00 2001 From: "avi@robusta.dev" Date: Thu, 23 Jan 2025 15:05:49 +0200 Subject: [PATCH 1/5] discovery workload improved --- src/robusta/core/discovery/top_service_resolver.py | 10 +++++++++- src/robusta/core/reporting/base.py | 1 + src/robusta/core/sinks/slack/slack_sink.py | 12 +++--------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/robusta/core/discovery/top_service_resolver.py b/src/robusta/core/discovery/top_service_resolver.py index f073958f4..68b062788 100644 --- a/src/robusta/core/discovery/top_service_resolver.py +++ b/src/robusta/core/discovery/top_service_resolver.py @@ -1,7 +1,7 @@ import threading import time from collections import defaultdict -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional from pydantic.main import BaseModel @@ -67,6 +67,14 @@ def guess_cached_resource(cls, name: str, namespace: str) -> Optional[TopLevelRe return cached_resource return None + @classmethod + def guess_workload_from_labels(cls, labels: Dict[Any, Any] = None) -> Optional[TopLevelResource]: + relevant_label_keys = ["job_name", "deployment", "statefulset", "daemonset", "pod"] + for label in relevant_label_keys: + if label in labels: + return labels[label] + return None + @classmethod def add_cached_resource(cls, resource: TopLevelResource): cls.__namespace_to_resource[resource.namespace].append(resource) diff --git a/src/robusta/core/reporting/base.py b/src/robusta/core/reporting/base.py index bfb6d9517..cff0f8b88 100644 --- a/src/robusta/core/reporting/base.py +++ b/src/robusta/core/reporting/base.py @@ -282,6 +282,7 @@ def __init__( self.enrichments: List[Enrichment] = [] self.links: List[Link] = [] self.service = TopServiceResolver.guess_cached_resource(name=subject.name, namespace=subject.namespace) + self.backup_workload_name = TopServiceResolver.guess_workload_from_labels(labels=silence_labels) self.service_key = self.service.get_resource_key() if self.service else "" uri_path = f"services/{self.service_key}?tab=grouped" if self.service_key else "graphs" self.investigate_uri = f"{ROBUSTA_UI_DOMAIN}/{uri_path}" diff --git a/src/robusta/core/sinks/slack/slack_sink.py b/src/robusta/core/sinks/slack/slack_sink.py index 1f7aa7989..9e66024d4 100644 --- a/src/robusta/core/sinks/slack/slack_sink.py +++ b/src/robusta/core/sinks/slack/slack_sink.py @@ -42,7 +42,7 @@ def handle_notification_grouping(self, finding: Finding, platform_enabled: bool) investigate_uri = self.get_timeline_uri(self.account_id, self.cluster_name) finding_data = finding.attribute_map # The top level entity name (the owner of the pod etc) - finding_data["workload"] = finding.service.name if finding.service else None + finding_data["workload"] = finding.service.name if finding.service else finding.backup_workload_name finding_data["cluster"] = self.cluster_name resolved = finding.title.startswith("[RESOLVED]") @@ -112,19 +112,13 @@ def __replace_callback_with_string(self, slack_message, block_id, message_string blocks[i] = { "type": "section", "block_id": block_id, - "text": { - "type": "mrkdwn", - "text": message_string - } + "text": {"type": "mrkdwn", "text": message_string}, } break # Call the shorter update function return self.slack_sender.update_slack_message( - channel=channel_id, - ts=message_ts, - blocks=blocks, - text=message_string + channel=channel_id, ts=message_ts, blocks=blocks, text=message_string ) except Exception as e: From 59a97356eb32e7c863aa781f1287dc49e2cc509c Mon Sep 17 00:00:00 2001 From: "avi@robusta.dev" Date: Sun, 26 Jan 2025 12:08:26 +0200 Subject: [PATCH 2/5] fixed label guess --- src/robusta/core/discovery/top_service_resolver.py | 9 +++++++-- src/robusta/core/reporting/base.py | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/robusta/core/discovery/top_service_resolver.py b/src/robusta/core/discovery/top_service_resolver.py index 68b062788..1566ac8ec 100644 --- a/src/robusta/core/discovery/top_service_resolver.py +++ b/src/robusta/core/discovery/top_service_resolver.py @@ -69,10 +69,15 @@ def guess_cached_resource(cls, name: str, namespace: str) -> Optional[TopLevelRe @classmethod def guess_workload_from_labels(cls, labels: Dict[Any, Any] = None) -> Optional[TopLevelResource]: + if not labels or "namespace" not in labels: + return None + namespace = labels["namespace"] relevant_label_keys = ["job_name", "deployment", "statefulset", "daemonset", "pod"] for label in relevant_label_keys: - if label in labels: - return labels[label] + if not label in labels: + continue + cls.guess_cached_resource(name=labels[label], namespace=namespace) + return labels[label] return None @classmethod diff --git a/src/robusta/core/reporting/base.py b/src/robusta/core/reporting/base.py index cff0f8b88..b98748a5c 100644 --- a/src/robusta/core/reporting/base.py +++ b/src/robusta/core/reporting/base.py @@ -282,6 +282,7 @@ def __init__( self.enrichments: List[Enrichment] = [] self.links: List[Link] = [] self.service = TopServiceResolver.guess_cached_resource(name=subject.name, namespace=subject.namespace) + # this is only used by the slack sink in the case that self.service is None, like if the recourse is deleted self.backup_workload_name = TopServiceResolver.guess_workload_from_labels(labels=silence_labels) self.service_key = self.service.get_resource_key() if self.service else "" uri_path = f"services/{self.service_key}?tab=grouped" if self.service_key else "graphs" From e0461c335b2a10a02c0643539f1887170bfde51d Mon Sep 17 00:00:00 2001 From: "avi@robusta.dev" Date: Sun, 2 Feb 2025 13:43:50 +0200 Subject: [PATCH 3/5] fix TopServiceResolver for deleted workloads --- src/robusta/core/discovery/top_service_resolver.py | 3 ++- src/robusta/core/reporting/base.py | 3 ++- src/robusta/core/sinks/slack/slack_sink.py | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/robusta/core/discovery/top_service_resolver.py b/src/robusta/core/discovery/top_service_resolver.py index 1566ac8ec..0e36a9ad9 100644 --- a/src/robusta/core/discovery/top_service_resolver.py +++ b/src/robusta/core/discovery/top_service_resolver.py @@ -76,7 +76,8 @@ def guess_workload_from_labels(cls, labels: Dict[Any, Any] = None) -> Optional[T for label in relevant_label_keys: if not label in labels: continue - cls.guess_cached_resource(name=labels[label], namespace=namespace) + resource_type = label.capitalize() if label != "job_name" else "Job" + TopLevelResource(name=labels[label], namespace=namespace, resource_type=resource_type) return labels[label] return None diff --git a/src/robusta/core/reporting/base.py b/src/robusta/core/reporting/base.py index b98748a5c..2d523ddaa 100644 --- a/src/robusta/core/reporting/base.py +++ b/src/robusta/core/reporting/base.py @@ -283,7 +283,8 @@ def __init__( self.links: List[Link] = [] self.service = TopServiceResolver.guess_cached_resource(name=subject.name, namespace=subject.namespace) # this is only used by the slack sink in the case that self.service is None, like if the recourse is deleted - self.backup_workload_name = TopServiceResolver.guess_workload_from_labels(labels=silence_labels) + if self.service is None and silence_labels: + self.service = TopServiceResolver.guess_workload_from_labels(labels=silence_labels) self.service_key = self.service.get_resource_key() if self.service else "" uri_path = f"services/{self.service_key}?tab=grouped" if self.service_key else "graphs" self.investigate_uri = f"{ROBUSTA_UI_DOMAIN}/{uri_path}" diff --git a/src/robusta/core/sinks/slack/slack_sink.py b/src/robusta/core/sinks/slack/slack_sink.py index 9e66024d4..e98522cec 100644 --- a/src/robusta/core/sinks/slack/slack_sink.py +++ b/src/robusta/core/sinks/slack/slack_sink.py @@ -42,7 +42,7 @@ def handle_notification_grouping(self, finding: Finding, platform_enabled: bool) investigate_uri = self.get_timeline_uri(self.account_id, self.cluster_name) finding_data = finding.attribute_map # The top level entity name (the owner of the pod etc) - finding_data["workload"] = finding.service.name if finding.service else finding.backup_workload_name + finding_data["workload"] = finding.service.name if finding.service else None finding_data["cluster"] = self.cluster_name resolved = finding.title.startswith("[RESOLVED]") From 818489d8c323fc5da9198bde7399f2703dbea653 Mon Sep 17 00:00:00 2001 From: "avi@robusta.dev" Date: Sun, 2 Feb 2025 13:44:56 +0200 Subject: [PATCH 4/5] fix --- src/robusta/core/discovery/top_service_resolver.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/robusta/core/discovery/top_service_resolver.py b/src/robusta/core/discovery/top_service_resolver.py index 0e36a9ad9..a58fb0c11 100644 --- a/src/robusta/core/discovery/top_service_resolver.py +++ b/src/robusta/core/discovery/top_service_resolver.py @@ -77,8 +77,7 @@ def guess_workload_from_labels(cls, labels: Dict[Any, Any] = None) -> Optional[T if not label in labels: continue resource_type = label.capitalize() if label != "job_name" else "Job" - TopLevelResource(name=labels[label], namespace=namespace, resource_type=resource_type) - return labels[label] + return TopLevelResource(name=labels[label], namespace=namespace, resource_type=resource_type) return None @classmethod From 196a01bd3b2e40610aee7440e81a8769437a7ac0 Mon Sep 17 00:00:00 2001 From: "avi@robusta.dev" Date: Sun, 2 Feb 2025 13:50:44 +0200 Subject: [PATCH 5/5] added comment --- src/robusta/core/reporting/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/robusta/core/reporting/base.py b/src/robusta/core/reporting/base.py index 2d523ddaa..daade7c9c 100644 --- a/src/robusta/core/reporting/base.py +++ b/src/robusta/core/reporting/base.py @@ -282,7 +282,7 @@ def __init__( self.enrichments: List[Enrichment] = [] self.links: List[Link] = [] self.service = TopServiceResolver.guess_cached_resource(name=subject.name, namespace=subject.namespace) - # this is only used by the slack sink in the case that self.service is None, like if the recourse is deleted + # An alert can be fired on a deleted resource that we cant resolve if self.service is None and silence_labels: self.service = TopServiceResolver.guess_workload_from_labels(labels=silence_labels) self.service_key = self.service.get_resource_key() if self.service else ""