stackrox · janisz · Jan 15, 2026 · Jan 16, 2026 · Jan 19, 2026 · mtodor
@@ -16,3 +16,9 @@
 
 # Lint output
 /report.xml
+
+# E2E tests
+/e2e-tests/.env
+/e2e-tests/mcp-reports/
+/e2e-tests/bin/
+/e2e-tests/**/*-out.json
@@ -57,6 +57,10 @@ helm-lint: ## Run helm lint for Helm chart
 test: ## Run unit tests
 	$(GOTEST) -v ./...
 
+.PHONY: e2e-test
+e2e-test: ## Run E2E tests
+	@cd e2e-tests && ./scripts/run-tests.sh
+
 .PHONY: test-coverage-and-junit
 test-coverage-and-junit: ## Run unit tests with coverage and junit output
 	go install github.com/jstemmer/go-junit-report/[email protected]

@@ -0,0 +1,92 @@
+# StackRox MCP E2E Testing
+
+End-to-end tests for the StackRox MCP server using [gevals](https://github.com/genmcp/gevals).
+
+## Prerequisites
+
+- Go 1.25+
+- OpenAI API Key (for AI agent and LLM judge)
+- StackRox API Token
+
+## Setup
+
+### 1. Build gevals
+
+```bash
+cd e2e-tests
+./scripts/build-gevals.sh
+```
+
+### 2. Configure Environment
+
+Create `.env` file:
+
+```bash
+OPENAI_API_KEY=<OpenAI Key>
+STACKROX_MCP__CENTRAL__API_TOKEN=<StackRox API Token>
+```
+
+## Running Tests
+
+```bash
+./scripts/run-tests.sh
+```
+
+Results are saved to `gevals-stackrox-mcp-e2e-out.json`.
+
+### View Results
+
+```bash
+# Summary
+jq '.tasks[] | {name, passed}' gevals-stackrox-mcp-e2e-out.json
+
+# Tool calls
+jq '.tasks[].callHistory[] | {toolName, arguments}' gevals-stackrox-mcp-e2e-out.json
+```
+
+## Test Cases
+
+| Test | Description | Tool |
+|------|-------------|------|
+| `list-clusters` | List all clusters | `list_clusters` |
+| `cve-detected-workloads` | CVE detected in deployments | `get_deployments_for_cve` |
+| `cve-detected-clusters` | CVE detected in clusters | `get_clusters_with_orchestrator_cve` |
+| `cve-nonexistent` | Handle non-existent CVE | `get_clusters_with_orchestrator_cve` |
+| `cve-cluster-does-exist` | CVE with cluster filter | `get_clusters_with_orchestrator_cve` |
+| `cve-cluster-does-not-exist` | CVE with cluster filter | `get_clusters_with_orchestrator_cve` |
+| `cve-clusters-general` | General CVE query | `get_clusters_with_orchestrator_cve` |
+| `cve-cluster-list` | CVE across clusters | `get_clusters_with_orchestrator_cve` |
+
+## Configuration
+
+- **`gevals/eval.yaml`**: Main test configuration, agent settings, assertions
+- **`gevals/mcp-config.yaml`**: MCP server configuration
+- **`gevals/tasks/*.yaml`**: Individual test task definitions
+
+## How It Works
+
+Gevals uses a proxy architecture to intercept MCP tool calls:
+
+1. AI agent receives task prompt
+2. Agent calls MCP tool
+3. Gevals proxy intercepts and records the call
+4. Call forwarded to StackRox MCP server
+5. Server executes and returns result
+6. Gevals validates assertions and response quality
+
+## Troubleshooting
+
+**Tests fail - no tools called**
+- Verify StackRox Central is accessible
+- Check API token permissions
+
+**Build errors**
+```bash
+go mod tidy
+./scripts/build-gevals.sh
+```
+
+## Further Reading
+
+- [Gevals Documentation](https://github.com/genmcp/gevals)
+- [StackRox MCP Server](../README.md)
@@ -0,0 +1,101 @@
+kind: Eval
+metadata:
+  name: "stackrox-mcp-e2e"
+config:
+  agent:
+    type: "builtin.openai-agent"
+    model: "gpt-4o"
+  llmJudge:
+    env:
+      baseUrlKey: JUDGE_BASE_URL
+      apiKeyKey: JUDGE_API_KEY
+      modelNameKey: JUDGE_MODEL_NAME
+  mcpConfigFile: mcp-config.yaml
+  taskSets:
+    # Test 1: List clusters
+    - path: tasks/list-clusters.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "list_clusters"
+        minToolCalls: 1
+        maxToolCalls: 1
+
+    # Test 2: CVE detected in workloads
+    - path: tasks/cve-detected-workloads.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_deployments_for_cve"
+            argumentsMatch:
+              cveName: "CVE-2021-31805"
+        minToolCalls: 1
+        maxToolCalls: 1
+
+    # Test 3: CVE detected in clusters - basic
+    - path: tasks/cve-detected-clusters.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_clusters_with_orchestrator_cve"
+            argumentsMatch:
+              cveName: "CVE-2016-1000031"
+        minToolCalls: 1
+        maxToolCalls: 3
+
+    # Test 4: Non-existent CVE
+    # Expects 3 calls because "Is CVE detected in my clusters?" triggers comprehensive check
+    # (orchestrator, deployments, nodes). The LLM cannot know beforehand if CVE exists.
+    - path: tasks/cve-nonexistent.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_clusters_with_orchestrator_cve"
+            argumentsMatch:
+              cveName: "CVE-2099-00001"
+        minToolCalls: 1
+        maxToolCalls: 3
+
+    # Test 5: CVE with specific cluster filter (does exist)
+    - path: tasks/cve-cluster-does-exist.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "list_clusters"
+          - server: stackrox-mcp
+            toolPattern: "get_clusters_with_orchestrator_cve"
+            argumentsMatch:
+              cveName: "CVE-2016-1000031"
+        minToolCalls: 1
+        maxToolCalls: 2
+
+    # Test 6: CVE with specific cluster filter (does not exist)
+    - path: tasks/cve-cluster-does-not-exist.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "list_clusters"
+        minToolCalls: 1
+        maxToolCalls: 2
-        maxToolCalls: 2
+        maxToolCalls: 1
-        maxToolCalls: 2
+        maxToolCalls: 1
+
+    # Test 7: CVE detected in clusters - general
+    - path: tasks/cve-clusters-general.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_clusters_with_orchestrator_cve"
+            argumentsMatch:
+              cveName: "CVE-2021-31805"
+        minToolCalls: 1
+        maxToolCalls: 5
+
+    # Test 8: CVE check with cluster list reference
+    - path: tasks/cve-cluster-list.yaml
+      assertions:
+        toolsUsed:
+          - server: stackrox-mcp
+            toolPattern: "get_clusters_with_orchestrator_cve"
+            argumentsMatch:
+              cveName: "CVE-2024-52577"
+        minToolCalls: 1
+        maxToolCalls: 5