diff --git a/.gitleaks.toml b/.gitleaks.toml index 0d67954e..242c3538 100644 --- a/.gitleaks.toml +++ b/.gitleaks.toml @@ -19,6 +19,17 @@ stopwords = ["wat-opaque-123"] description = "Test fixture signing secret in Slack verification unit test (not a real credential)." stopwords = ["test-signing-secret-abc123"] +[[allowlists]] +# #247: the orchestration idempotencyKey fixtures (`orch__`) are +# made-up test values, but generic-api-key flags the key-ish assignment. Scope +# the exemption to the orchestration test files (no real credentials there). +description = "Orchestration idempotencyKey test fixtures (not real credentials)." +targetRules = ["generic-api-key"] +paths = [ + "^cdk/test/handlers/shared/orchestration-release\\.test\\.ts$", + "^cdk/test/handlers/orchestration-reconciler\\.test\\.ts$", +] + # Catch bare 12-digit AWS account IDs. The default ruleset does not flag these, # which is how a real account ID reached a committed comment in the #236 integ # work. RE2 (Go) has no lookarounds, so the non-digit neighbours are captured in diff --git a/agent/src/config.py b/agent/src/config.py index bf4ad57c..c6fd7b35 100644 --- a/agent/src/config.py +++ b/agent/src/config.py @@ -17,14 +17,22 @@ # id whose ``requires_repo`` is false. Used by the load-failure fallback to # decide repo-optionality without loading the file. REPO_LESS_DEFAULT_WORKFLOW_ID = "default/agent-v1" -# First-party workflow ids that operate on an existing pull request. -PR_WORKFLOW_IDS = frozenset(("coding/pr-iteration-v1", "coding/pr-review-v1")) +# First-party workflow ids that operate on an existing pull request — they +# check out the existing PR branch instead of creating a fresh one. restack-v1 +# (#305 A6) re-merges a changed predecessor into an existing stacked-child PR. +PR_WORKFLOW_IDS = frozenset(("coding/pr-iteration-v1", "coding/pr-review-v1", "coding/restack-v1")) # First-party workflow ids that are writeable (NOT read-only). Used only by the # load-failure fallback to bias an unrecognised id toward read-only (fail closed # on the write-deny invariant). pr-review-v1 is intentionally excluded (it is # read-only); default/agent-v1 is excluded because its conservative posture # should fail closed too. -_KNOWN_WRITEABLE_WORKFLOW_IDS = frozenset(("coding/new-task-v1", "coding/pr-iteration-v1")) +_KNOWN_WRITEABLE_WORKFLOW_IDS = frozenset( + ( + "coding/new-task-v1", + "coding/pr-iteration-v1", + "coding/restack-v1", + ) +) def resolve_github_token() -> str: @@ -459,9 +467,13 @@ def build_config( dry_run: bool = False, task_id: str = "", system_prompt_overrides: str = "", + build_command: str = "", + lint_command: str = "", resolved_workflow: dict | None = None, branch_name: str = "", pr_number: str = "", + base_branch: str | None = None, + merge_branches: list[str] | None = None, channel_source: str = "", channel_metadata: dict[str, str] | None = None, trace: bool = False, @@ -565,6 +577,8 @@ def build_config( max_turns=max_turns, max_budget_usd=max_budget_usd, system_prompt_overrides=system_prompt_overrides, + build_command=build_command, + lint_command=lint_command, resolved_workflow=workflow, policy_principal=policy_principal, read_only=workflow_read_only, @@ -573,6 +587,8 @@ def build_config( is_pr_workflow=is_pr_workflow, branch_name=branch_name, pr_number=pr_number, + base_branch=base_branch, + merge_branches=merge_branches or [], task_id=task_id or uuid.uuid4().hex[:12], channel_source=channel_source, channel_metadata=channel_metadata or {}, diff --git a/agent/src/models.py b/agent/src/models.py index 0befafda..b605bb57 100644 --- a/agent/src/models.py +++ b/agent/src/models.py @@ -158,6 +158,13 @@ class TaskConfig(BaseModel): max_turns: int = 10 max_budget_usd: float | None = None system_prompt_overrides: str = "" + # Per-repo build/lint verification commands (#1 build-gate fix). When set + # (from the blueprint, via the payload), the agent runs these instead of + # the hardcoded ``mise run build`` / ``mise run lint`` to gate build/lint + # regressions. Empty → default to mise. Set for non-mise repos (e.g. + # ``npm run build``) so gating actually runs the repo's real command. + build_command: str = "" + lint_command: str = "" # The pinned workflow this task runs ({"id", "version"}), resolved at the # create-task boundary and threaded through the payload (#248). None on # local/batch runs, where the pipeline defaults to coding/new-task-v1. @@ -237,6 +244,11 @@ class TaskConfig(BaseModel): approval_gate_cap: int | None = None issue: GitHubIssue | None = None base_branch: str | None = None + # #247 A4: predecessor branches to merge into this child's branch + # before work, for a diamond child (2+ predecessors) that branches off + # main but must see all predecessors' code. Empty for root + linear + # children (linear children stack via ``base_branch`` instead). + merge_branches: list[str] = Field(default_factory=list) # Attachments from the orchestrator payload (Phase 3). Validated as # AttachmentConfig models. Empty list for tasks without attachments. attachments: list[AttachmentConfig] = Field(default_factory=list) @@ -294,6 +306,13 @@ class RepoSetup(BaseModel): build_before: bool = True lint_before: bool = True default_branch: str = "main" + # #1: True when the build verification command is INERT — it could not run + # at all (no build task / command not found) AND no explicit build_command + # was configured. In that state build-regression gating is effectively OFF + # (a change that breaks the build still reports success), so the agent + # surfaces a one-time warning on the PR. Distinct from a genuinely red build + # (command ran, exited non-zero), which IS meaningful gating signal. + build_gate_inert: bool = False class TokenUsage(BaseModel): diff --git a/agent/src/pipeline.py b/agent/src/pipeline.py index 7718cb48..6723379f 100644 --- a/agent/src/pipeline.py +++ b/agent/src/pipeline.py @@ -574,11 +574,15 @@ def run_task( task_id: str = "", hydrated_context: dict | None = None, system_prompt_overrides: str = "", + build_command: str = "", + lint_command: str = "", prompt_version: str = "", memory_id: str = "", resolved_workflow: dict | None = None, branch_name: str = "", pr_number: str = "", + base_branch: str | None = None, + merge_branches: list[str] | None = None, cedar_policies: list[str] | None = None, approval_timeout_s: int | None = None, initial_approvals: list[str] | None = None, @@ -616,9 +620,13 @@ def run_task( aws_region=aws_region, task_id=task_id, system_prompt_overrides=system_prompt_overrides, + build_command=build_command, + lint_command=lint_command, resolved_workflow=resolved_workflow, branch_name=branch_name, pr_number=pr_number, + base_branch=base_branch, + merge_branches=merge_branches, channel_source=channel_source, channel_metadata=channel_metadata, trace=trace, @@ -1012,8 +1020,8 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None: safety_committed = False if workflow_read_only else ensure_committed(setup.repo_dir) post_span.set_attribute("safety_net.committed", safety_committed) - build_passed = verify_build(setup.repo_dir) - lint_passed = verify_lint(setup.repo_dir) + build_passed = verify_build(setup.repo_dir, config.build_command) + lint_passed = verify_lint(setup.repo_dir, config.lint_command) pr_url = ensure_pr( config, setup, diff --git a/agent/src/post_hooks.py b/agent/src/post_hooks.py index 058a1e4c..f7f3dede 100644 --- a/agent/src/post_hooks.py +++ b/agent/src/post_hooks.py @@ -3,6 +3,7 @@ from __future__ import annotations import re +import shlex import subprocess from typing import TYPE_CHECKING @@ -11,14 +12,64 @@ if TYPE_CHECKING: from models import AgentResult, RepoSetup, TaskConfig +# Default verification commands (#1 build-gate fix). A repo that uses mise gets +# these for free; a non-mise repo sets ``pipeline.buildCommand`` / +# ``lintCommand`` in its blueprint (threaded to the agent as build_command / +# lint_command) so gating runs the repo's real command. +DEFAULT_BUILD_COMMAND = "mise run build" +DEFAULT_LINT_COMMAND = "mise run lint" -def verify_build(repo_dir: str) -> bool: - """Run mise run build after agent completion to verify the build.""" - log("POST", "Running post-agent build verification (mise run build)...") +# POSIX shell exit code for "command not found" — an inert build signal (the +# configured verify command isn't installed), not a genuine build failure. +SHELL_COMMAND_NOT_FOUND = 127 + + +def is_verify_command_inert(returncode: int, stderr: str) -> bool: + """True when a verify command did not actually RUN (vs ran-and-failed). + + Distinguishes the #1 inert-gate state — the build/lint command isn't + runnable in this repo, so gating is effectively OFF — from a genuine red + build (command executed, exited non-zero), which IS meaningful signal. + + Heuristics (conservative — only the unambiguous "couldn't run" signals): + - exit 127: shell "command not found" (e.g. ``gradle`` not installed). + - mise "no tasks defined" / "no task named" / "not found": the configured + (or default ``mise run build``) task does not exist in the repo. + A repo that genuinely fails its build returns some other non-zero code with + real compiler/test output, which this does NOT flag. + """ + if returncode == SHELL_COMMAND_NOT_FOUND: + return True + s = (stderr or "").lower() + return ( + "no tasks defined" in s + or "no task named" in s + or ("mise" in s and "not found" in s) + or "command not found" in s + ) + + +def resolve_verify_argv(command: str | None, default: str) -> list[str]: + """Split a configured verify command into argv, falling back to the default. + + Empty/whitespace/None ``command`` → the default (mise). Parsed with ``shlex`` so + a configured ``'npm run build && npm test'`` would need a shell — we keep it + simple argv here; chained shell commands should be wrapped in a mise/npm + task by the repo. A single command with args (``npm run build``) splits + cleanly. + """ + cmd = (command or "").strip() or default + return shlex.split(cmd) + + +def verify_build(repo_dir: str, command: str = "") -> bool: + """Run the configured build command (default ``mise run build``) to verify the build.""" + argv = resolve_verify_argv(command, DEFAULT_BUILD_COMMAND) + log("POST", f"Running post-agent build verification ({' '.join(argv)})...") try: result = run_cmd( - ["mise", "run", "build"], - label="mise-run-build-post", + argv, + label="verify-build-post", cwd=repo_dir, check=False, ) @@ -32,13 +83,14 @@ def verify_build(repo_dir: str) -> bool: return True -def verify_lint(repo_dir: str) -> bool: - """Run mise run lint after agent completion to verify lint passes.""" - log("POST", "Running post-agent lint verification (mise run lint)...") +def verify_lint(repo_dir: str, command: str = "") -> bool: + """Run the configured lint command (default ``mise run lint``) to verify lint passes.""" + argv = resolve_verify_argv(command, DEFAULT_LINT_COMMAND) + log("POST", f"Running post-agent lint verification ({' '.join(argv)})...") try: result = run_cmd( - ["mise", "run", "lint"], - label="mise-run-lint-post", + argv, + label="verify-lint-post", cwd=repo_dir, check=False, ) @@ -343,19 +395,35 @@ def ensure_pr( build_status = "PASS" if build_passed else "FAIL" lint_status = "PASS" if lint_passed else "FAIL" + # #1: show the actual commands run (default mise), not a hardcoded label. + build_label = (config.build_command or DEFAULT_BUILD_COMMAND).strip() + lint_label = (config.lint_command or DEFAULT_LINT_COMMAND).strip() cost_line = "" if agent_result and agent_result.cost_usd is not None: cost_line = f"- Agent cost: **${agent_result.cost_usd:.4f}**\n" + # #1: when build-regression gating is inert (no runnable build command, none + # configured), say so plainly — otherwise a green "build: PASS" misleads: + # nothing was actually verified. + gate_warning = "" + if getattr(setup, "build_gate_inert", False): + gate_warning = ( + "> ⚠️ **Build-regression gating is OFF for this repo.** No runnable " + f"`{DEFAULT_BUILD_COMMAND}` task was found and no build command is configured, " + "so a change that breaks the build still reports success. To enable gating, set " + "`pipeline.buildCommand` in this repo's ABCA blueprint (e.g. `npm run build`).\n\n" + ) + pr_body = ( f"## Summary\n\n" f"{task_source}" f"### Commits\n\n" f"```\n{commits}\n```\n\n" f"## Verification\n\n" - f"- `mise run build` (post-agent): **{build_status}**\n" - f"- `mise run lint` (post-agent): **{lint_status}**\n" + f"{gate_warning}" + f"- `{build_label}` (post-agent): **{build_status}**\n" + f"- `{lint_label}` (post-agent): **{lint_status}**\n" f"{cost_line}\n" f"---\n\n" f"By submitting this pull request, I confirm that you can use, modify, copy, " diff --git a/agent/src/prompt_builder.py b/agent/src/prompt_builder.py index dd9a7ca9..f7f0feef 100644 --- a/agent/src/prompt_builder.py +++ b/agent/src/prompt_builder.py @@ -144,28 +144,62 @@ def _channel_prompt_addendum(config: TaskConfig) -> str: """ if config.channel_source != "linear": return "" + # #247 UX.16: a synthetic orchestration integration node has NO real Linear + # sub-issue — `linear_issue_id` is intentionally omitted from its + # channel_metadata (see orchestration-release.ts). Without a target issue + # the agent would grope via the MCP and post its "Starting"/"PR opened" + # comments onto the PARENT epic, cluttering the maturing panel (which + # already shows the integration row + combined PR + preview). Skip the + # progress addendum entirely for these nodes — the panel is the surface. + if not config.channel_metadata.get("linear_issue_id"): + return "" issue_identifier = config.channel_metadata.get("linear_issue_identifier") or "" issue_ref = f" (`{issue_identifier}`)" if issue_identifier else "" + issue_id = config.channel_metadata.get("linear_issue_id") or "" + project_id = config.channel_metadata.get("linear_project_id") or "" return ( "\n\n## Linear issue progress updates (REQUIRED)\n\n" f"This task was submitted from Linear issue{issue_ref}. The Linear MCP " "server is loaded. You MUST perform these updates; they are part of " "the task contract, not optional:\n\n" + "**State transitions — important.** Different Linear teams configure " + "different workflow states. Many teams do NOT have an `In Review` " + "state at all (e.g. only Backlog/Todo/In Progress/Done). When you " + "pass a state name that doesn't exist on the team's workflow, " + "`mcp__linear-server__save_issue` silently no-ops — it returns 200 " + "with the issue body unchanged, so it LOOKS like it worked but the " + "state never moves. To avoid this:\n" + " - Call `mcp__linear-server__list_issue_statuses` once at the start " + "of the task and cache the names you got back.\n" + " - Before each transition, check whether the target name is in the " + "cached list. If not, pick the closest available state per the " + "fallbacks below.\n" + " - After each `save_issue`, look at the returned `state.name` field " + "in the response — if it's not what you asked for, the transition " + "didn't happen and you should NOT claim it did.\n\n" + "**Comment image rendering — important.** Do NOT embed " + "`uploads.linear.app/...` URLs in `save_comment` bodies. Linear's CDN " + "signed URLs work in the original poster's context but render as a " + "broken-image icon when re-embedded in a comment from a different " + "author. If you need to reference an image the user attached, link to " + "it in the GitHub PR (where GitHub's image proxy caches the bytes) or " + "describe it in words. Other URL hosts (imgur, github user-content) " + "are fine to embed.\n\n" "1. **At start** — call `mcp__linear-server__save_comment` with a short " '"🤖 Starting on this issue…" message, then call ' - "`mcp__linear-server__save_issue` to transition the issue state. Use " - "`mcp__linear-server__list_issue_statuses` first if you don't already " - "know the state ids; pick the one named `In Progress` (fall back to " - "`Todo` if that state doesn't exist). If the issue is already in " - "`In Progress` or any later state (`In Review`, `Done`), skip the " - "transition. If neither exists, skip — the comment alone is enough. " - "Do not invent state names or loop on `list_issue_statuses`.\n" + "`mcp__linear-server__list_issue_statuses` once to get the state map, " + "then call `mcp__linear-server__save_issue` to transition to " + "`In Progress` (fall back to `Todo` if that state doesn't exist). If " + "the issue is already in `In Progress` or any later state (`In Review`, " + "`Done`), skip the transition. If neither exists, skip — the comment " + "alone is enough. Do not invent state names.\n" "2. **When you open the PR** — call `mcp__linear-server__save_comment` " "with the PR URL, then call `mcp__linear-server__save_issue` to " - "transition the issue state to `In Review` (fall back to `In Progress` " - "if that state doesn't exist). If neither exists, skip the state " - "transition — the PR comment alone is enough. Do not invent state " - "names or loop on `list_issue_statuses`.\n\n" + "transition to `In Review`. Use the cached state map from step 1. If " + "the team has no `In Review` state, fall back to leaving it at " + "`In Progress` — DO NOT silently fail by claiming you transitioned " + "when the response shows the state didn't change. Acknowledge in the " + "PR comment that the team workflow has no In-Review-equivalent.\n\n" "**Do NOT post a final 'task completed' or 'task failed' comment.** " "The platform fan-out plane (issue #239) posts a structured " "✅/⚠️/❌ summary on terminal events with cost / turns / duration / " @@ -173,7 +207,47 @@ def _channel_prompt_addendum(config: TaskConfig) -> str: "agent-side completion comment would just stack two near-identical " "comments on the issue.\n\n" "Keep the start + PR-opened comments concise. Do not mirror the full " - "agent transcript back to Linear." + "agent transcript back to Linear.\n\n" + "## Linear context discovery (on demand)\n\n" + "The same Linear MCP exposes tools for fetching extra context on the " + "issue when you need it. Use them sparingly — only when the task " + "description references material you don't have, when the description " + "is ambiguous and project-level context would clarify, or when a " + "decision point benefits from a fresh look at the issue thread. Do " + "NOT call these on every task; the issue title + description are " + "usually sufficient.\n\n" + f"- **Issue + paperclip attachments.** Call `mcp__linear-server__get_issue` " + f'with `id: "{issue_id}"` to fetch the full issue, including its ' + "`attachments` connection (paperclip-icon files like PDFs, logs, " + "spec docs that aren't embedded as markdown images). Read the " + "attachment titles first; for each one that looks relevant, call " + "`mcp__linear-server__get_attachment` with that attachment id. Skip " + "ones that look unrelated (e.g. screenshots from prior debugging " + "sessions).\n" + "- **Embedded images.** Description and comment images that look " + "like `![alt](https://uploads.linear.app/…)` may have stale signed " + "URLs by the time you run. If you need to actually look at one, call " + "`mcp__linear-server__extract_images` to get fresh signed URLs, then " + "use the built-in `WebFetch` tool to download. (The screened " + "description-image path runs at task-creation time and is separate " + "from this — you don't need to re-screen.)\n" + "- **Project documents.** When the issue belongs to a project and " + "the task is ambiguous enough that project-level context (specs, " + "design docs, RFCs) would help, call " + f"`mcp__linear-server__list_documents` filtered to " + f'`projectId: "{project_id}"` (skip if the issue has no project). ' + "Read the titles. For documents that clearly relate to your task, " + "call `mcp__linear-server__get_document` to read the body. Don't " + "fetch every document.\n" + "- **Comments posted after task start.** Comments left while you're " + "running (e.g. clarifications, approve/deny signals from the " + "requester) are not in your task description. Before opening the PR, " + f"and again before merging if asked, call `mcp__linear-server__list_comments` " + f'with `issueId: "{issue_id}"` and look for new comments since ' + "task start. Respect any clear approve / deny / block / hold signals " + "from the original requester (the issue creator or the person who " + "applied the trigger label) — if they say stop, stop and post a " + "comment explaining why." ) diff --git a/agent/src/prompts/__init__.py b/agent/src/prompts/__init__.py index 60c5b2c0..0cc739da 100644 --- a/agent/src/prompts/__init__.py +++ b/agent/src/prompts/__init__.py @@ -13,6 +13,7 @@ from .new_task import NEW_TASK_WORKFLOW from .pr_iteration import PR_ITERATION_WORKFLOW from .pr_review import PR_REVIEW_WORKFLOW +from .restack import RESTACK_WORKFLOW from .web_research import WEB_RESEARCH_PROMPT DEFAULT_WORKFLOW_ID = "coding/new-task-v1" @@ -26,6 +27,9 @@ "coding/new-task-v1": BASE_PROMPT.replace("{workflow}", NEW_TASK_WORKFLOW), "coding/pr-iteration-v1": BASE_PROMPT.replace("{workflow}", PR_ITERATION_WORKFLOW), "coding/pr-review-v1": BASE_PROMPT.replace("{workflow}", PR_REVIEW_WORKFLOW), + # A6 re-stack (#305): re-merge a changed predecessor into an existing + # stacked-child branch. push_resolve to the existing PR; not new work. + "coding/restack-v1": BASE_PROMPT.replace("{workflow}", RESTACK_WORKFLOW), # Repo-less knowledge workflow (#248 Phase 3) — no git/branch/PR placeholders. "default/agent-v1": DEFAULT_AGENT_PROMPT, # Repo-less reference knowledge workflow (#248) — research-specialized prompt diff --git a/agent/src/prompts/restack.py b/agent/src/prompts/restack.py new file mode 100644 index 00000000..caf38229 --- /dev/null +++ b/agent/src/prompts/restack.py @@ -0,0 +1,59 @@ +"""Workflow section for restack (#305 A6 — re-merge a changed predecessor). + +A stacked child's predecessor PR was edited after the child already merged the +predecessor's code in, so the child is stale. The platform re-runs the child on +its EXISTING branch with the updated predecessor branch(es) merged into the +working tree before the agent starts (same mechanism as the initial A4 diamond +merge). The agent's job is narrow: reconcile, verify, push to the same branch — +NOT new feature work. +""" + +RESTACK_WORKFLOW = """\ +## Workflow + +You are RE-STACKING an existing pull request branch (`{branch_name}`). A +predecessor branch this work was built on has changed, and its updated code has +already been merged into your working tree before you started. Your only job is +to reconcile that update — do NOT add features or change scope. + +Follow these steps in order: + +1. **Assess the merged-in predecessor changes** + The setup notes above record which predecessor branch(es) were merged in and + whether the merge was clean or left conflicts. Read them first. + - If a merge was aborted due to conflicts, the predecessor branch is fetched + as `origin/`; merge it now and resolve the conflicts so your + branch contains both your original work AND the updated predecessor code. + - If the merge was clean, just verify your original changes still apply on top + of the updated predecessor code (the predecessor may have moved code you + depended on). + +2. **Reconcile — keep BOTH sides** + The goal is a branch that has your sub-issue's changes correctly layered on + the predecessor's NEW code. Do not drop your work, and do not revert the + predecessor's update. Resolve conflicts by integrating both intents. + +3. **Test your changes (MANDATORY)** + - Run the project build: `mise run build` + - Run linters/type-checkers if available. + - Run tests if the project has them (`npm test`, `pytest`, `make test`). + - The combined result must build — a re-stack that doesn't build is worse + than the stale state it replaced. + +4. **Commit and push to `{branch_name}` (the SAME branch — do not create a new one)** + ``` + git add + git commit -m "chore(restack): re-merge updated predecessor into {branch_name}" + git push origin {branch_name} + ``` + Pushing to the existing branch updates the existing PR in place — the + platform does NOT open a new PR for a re-stack. + +5. **Post a brief summary comment on the PR** + ``` + gh pr comment {pr_number} --repo {repo_url} --body "" + ``` + Note which predecessor change was absorbed, any conflicts resolved, and the + build/test result. Keep it concise — this is a maintenance update, not a new + review.\ +""" diff --git a/agent/src/repo.py b/agent/src/repo.py index 3f7abebf..acbc1e4a 100644 --- a/agent/src/repo.py +++ b/agent/src/repo.py @@ -17,10 +17,23 @@ def setup_repo(config: TaskConfig) -> RepoSetup: repo_dir = f"{AGENT_WORKSPACE}/{config.task_id}" notes: list[str] = [] - if config.is_pr_workflow and config.branch_name: + # Always use the platform-provided branch name verbatim when present. + # The platform computes branch_name (gateway.ts generateBranchName/slugify) + # and persists it on the TaskRecord AND, for #247 stacked children, as the + # predecessor's child_branch_name that the reconciler hands to the next + # child as its base. If the agent re-derives the slug here it produces a + # DIFFERENT string (shell.py slugify strips dots vs gateway's dash, and + # truncates at 40 vs 50) — e.g. ``...guide.html`` → agent ``guidehtml`` vs + # platform ``guide-html``. That divergence means a stacked child's + # ``git fetch origin `` 404s and it silently falls back + # to branching off main (A4 stacking broken). Use config.branch_name as-is. + if config.branch_name: branch = config.branch_name else: - # Derive branch slug from issue title or task description + # Fallback only when the platform supplied no branch (older callers / + # direct invocations). Derive a slug from the issue title or task + # description. NOTE: this path's slug may differ from the platform's; + # it exists for resilience, not for the orchestrated/standard flow. title = "" if config.issue: title = config.issue.title @@ -82,6 +95,47 @@ def setup_repo(config: TaskConfig) -> RepoSetup: label="checkout-pr-branch", cwd=repo_dir, ) + # #305 A6 re-stack: a predecessor branch changed; merge its UPDATED + # code into this existing PR branch so the child is no longer stale. + # (pr_iteration / pr_review pass no merge_branches, so this is a no-op + # for them — only the restack path threads predecessors here.) + for pred_branch in config.merge_branches: + _merge_predecessor_branch(repo_dir, pred_branch, notes) + elif config.base_branch: + # #247 A4: stacked child. Branch from the predecessor's branch + # (linear) or from main (diamond) so the child sees predecessor + # code without waiting for a human merge. fetch the base first — + # it is an unmerged sibling branch that the fresh clone may not + # have locally. + log("SETUP", f"Creating branch {branch} from base {config.base_branch}") + fetch_res = run_cmd( + ["git", "fetch", "origin", config.base_branch], + label="fetch-base-branch", + cwd=repo_dir, + check=False, + ) + if fetch_res.returncode == 0: + run_cmd( + ["git", "checkout", "-b", branch, f"origin/{config.base_branch}"], + label="create-branch-from-base", + cwd=repo_dir, + ) + else: + # Base branch not found on origin (e.g. predecessor PR already + # merged + branch deleted, or a transient fetch error). Fall + # back to a normal branch off the current HEAD so the child + # still runs rather than failing setup; the predecessor's code + # is likely in the default branch by now anyway. + notes.append( + f"base branch '{config.base_branch}' not fetchable; branched off default instead" + ) + log("SETUP", f"Base branch not found; creating {branch} off HEAD") + run_cmd(["git", "checkout", "-b", branch], label="create-branch", cwd=repo_dir) + + # Diamond: merge each predecessor branch into this child's branch + # so it sees ALL predecessors' code (the base only gave it one). + for pred_branch in config.merge_branches: + _merge_predecessor_branch(repo_dir, pred_branch, notes) else: log("SETUP", f"Creating branch: {branch}") run_cmd(["git", "checkout", "-b", branch], label="create-branch", cwd=repo_dir) @@ -108,44 +162,71 @@ def setup_repo(config: TaskConfig) -> RepoSetup: else: notes.append("mise install: OK") - # Initial build (record whether the project builds before agent changes) - log("SETUP", "Running initial build (mise run build)...") + # Initial build (record whether the project builds before agent changes). + # #1: use the repo's configured build command (default mise run build). + from post_hooks import ( + DEFAULT_BUILD_COMMAND, + DEFAULT_LINT_COMMAND, + is_verify_command_inert, + resolve_verify_argv, + ) + + build_gate_inert = False + build_argv = resolve_verify_argv(config.build_command, DEFAULT_BUILD_COMMAND) + build_cmd_str = " ".join(build_argv) + log("SETUP", f"Running initial build ({build_cmd_str})...") result = run_cmd( - ["mise", "run", "build"], - label="mise-run-build-pre", + build_argv, + label="verify-build-pre", cwd=repo_dir, check=False, ) if result.returncode != 0: - note = "Initial build (mise run build) FAILED before agent changes" + note = f"Initial build ({build_cmd_str}) FAILED before agent changes" notes.append(note) build_before = False + # #1: if the build command could not RUN (no task / not found) AND no + # explicit build_command was configured, build-regression gating is + # INERT — flag it so the agent warns on the PR rather than silently + # passing every task. A configured command that fails to run is the + # operator's typo, not the silent-default trap, so only flag the + # unconfigured (mise-default) case. + if not config.build_command and is_verify_command_inert(result.returncode, result.stderr): + build_gate_inert = True + notes.append( + "⚠️ Build-regression gating is INERT: no runnable `mise run build` task in this " + "repo and no build command configured. A change that breaks the build will still " + "report success. Set pipeline.buildCommand in the repo's blueprint (e.g. " + "'npm run build') to enable gating." + ) else: - notes.append("Initial build (mise run build): OK") + notes.append(f"Initial build ({build_cmd_str}): OK") build_before = True # Initial lint baseline (record whether lint passes before agent changes) - log("SETUP", "Running initial lint (mise run lint)...") + lint_argv = resolve_verify_argv(config.lint_command, DEFAULT_LINT_COMMAND) + lint_cmd_str = " ".join(lint_argv) + log("SETUP", f"Running initial lint ({lint_cmd_str})...") result = run_cmd( - ["mise", "run", "lint"], - label="mise-run-lint-pre", + lint_argv, + label="verify-lint-pre", cwd=repo_dir, check=False, ) if result.returncode != 0: - note = "Initial lint (mise run lint) FAILED before agent changes" + note = f"Initial lint ({lint_cmd_str}) FAILED before agent changes" notes.append(note) lint_before = False else: - notes.append("Initial lint (mise run lint): OK") + notes.append(f"Initial lint ({lint_cmd_str}): OK") lint_before = True - # Detect default branch - # For PR tasks (pr_iteration, pr_review): use base_branch from orchestrator if available - if config.is_pr_workflow and config.base_branch: - default_branch = config.base_branch - else: - default_branch = detect_default_branch(config.repo_url, repo_dir) + # Detect default branch (used as the PR base + the commit-diff range). + # - PR tasks: base_branch from the orchestrator (the PR's real base). + # - #247 A4 stacked children: base_branch is the predecessor's branch + # (linear) or main (diamond) — the child's PR targets it. + # - Otherwise: detect the repo default (main/master). + default_branch = config.base_branch or detect_default_branch(config.repo_url, repo_dir) # Install prepare-commit-msg hook for code attribution _install_commit_hook(repo_dir) @@ -157,7 +238,52 @@ def setup_repo(config: TaskConfig) -> RepoSetup: build_before=build_before, lint_before=lint_before, default_branch=default_branch, + build_gate_inert=build_gate_inert, + ) + + +def _merge_predecessor_branch(repo_dir: str, pred_branch: str, notes: list[str]) -> None: + """Merge a predecessor branch into the current child branch (#247 A4 diamond). + + Fetches the predecessor branch and merges it so the child sees its + code. On a clean merge: done. On a CONFLICT: abort the merge (leaving + the working tree clean) and record a note. We deliberately do NOT leave + the repo in a conflicted state — the agent runs AFTER setup and a + half-merged tree would break its build/lint baseline. Instead the + predecessor branch remains fetched (``origin/``) and the + note tells the agent to integrate it as part of its task. This keeps + conflict resolution agent-driven (per #247 design) without corrupting + the deterministic setup phase. + """ + fetch_res = run_cmd( + ["git", "fetch", "origin", pred_branch], + label="fetch-predecessor", + cwd=repo_dir, + check=False, + ) + if fetch_res.returncode != 0: + notes.append(f"predecessor branch '{pred_branch}' not fetchable; skipped merge") + log("SETUP", f"Predecessor branch not found, skipping merge: {pred_branch}") + return + + merge_res = run_cmd( + ["git", "merge", "--no-edit", f"origin/{pred_branch}"], + label="merge-predecessor", + cwd=repo_dir, + check=False, + ) + if merge_res.returncode == 0: + log("SETUP", f"Merged predecessor branch: {pred_branch}") + notes.append(f"merged predecessor branch '{pred_branch}'") + return + + # Conflict (or other merge failure): abort to keep the tree clean. + run_cmd(["git", "merge", "--abort"], label="merge-abort", cwd=repo_dir, check=False) + notes.append( + f"predecessor branch '{pred_branch}' conflicts with this branch; " + f"merge aborted — integrate origin/{pred_branch} as part of the task" ) + log("SETUP", f"Predecessor merge conflicted, aborted: {pred_branch}") def _install_commit_hook(repo_dir: str) -> None: diff --git a/agent/src/server.py b/agent/src/server.py index e1114374..1244642d 100644 --- a/agent/src/server.py +++ b/agent/src/server.py @@ -389,11 +389,15 @@ def _run_task_background( session_id: str = "", hydrated_context: dict | None = None, system_prompt_overrides: str = "", + build_command: str = "", + lint_command: str = "", prompt_version: str = "", memory_id: str = "", resolved_workflow: dict | None = None, branch_name: str = "", pr_number: str = "", + base_branch: str | None = None, + merge_branches: list[str] | None = None, cedar_policies: list[str] | None = None, approval_timeout_s: int | None = None, initial_approvals: list[str] | None = None, @@ -473,11 +477,15 @@ def _run_task_background( task_id=task_id, hydrated_context=hydrated_context, system_prompt_overrides=system_prompt_overrides, + build_command=build_command, + lint_command=lint_command, prompt_version=prompt_version, memory_id=memory_id, resolved_workflow=resolved_workflow, branch_name=branch_name, pr_number=pr_number, + base_branch=base_branch, + merge_branches=merge_branches, cedar_policies=cedar_policies, approval_timeout_s=approval_timeout_s, initial_approvals=initial_approvals, @@ -522,6 +530,9 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict: inp.get("model_id") or inp.get("anthropic_model") or os.environ.get("ANTHROPIC_MODEL", "") ) system_prompt_overrides = inp.get("system_prompt_overrides", "") + # #1: per-repo build/lint verification commands. Empty → agent defaults to mise. + build_command = inp.get("build_command", "") + lint_command = inp.get("lint_command", "") max_turns = int(inp.get("max_turns", 0)) or int(os.environ.get("MAX_TURNS", "100")) max_budget_usd = float(inp.get("max_budget_usd", 0)) or None aws_region = inp.get("aws_region") or os.environ.get("AWS_REGION", "") @@ -532,6 +543,12 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict: resolved_workflow = inp.get("resolved_workflow") branch_name = inp.get("branch_name", "") pr_number = str(inp.get("pr_number", "")) + # #247 A4: stacked-child base branch + (diamond) predecessor branches + # to merge in. The orchestrator sets these from the orchestration row; + # absent for ordinary tasks (agent branches off main as today). + base_branch = inp.get("base_branch") or None + merge_branches_raw = inp.get("merge_branches") or [] + merge_branches = [b for b in merge_branches_raw if isinstance(b, str)] cedar_policies = inp.get("cedar_policies") or [] # Cedar HITL (§7.3) — per-task approval defaults + seeded allowlist. # Both are forwarded verbatim to the pipeline; the engine @@ -633,11 +650,15 @@ def _extract_invocation_params(inp: dict, request: Request) -> dict: "session_id": session_id, "hydrated_context": hydrated_context, "system_prompt_overrides": system_prompt_overrides, + "build_command": build_command, + "lint_command": lint_command, "prompt_version": prompt_version, "memory_id": memory_id, "resolved_workflow": resolved_workflow, "branch_name": branch_name, "pr_number": pr_number, + "base_branch": base_branch, + "merge_branches": merge_branches, "cedar_policies": cedar_policies, "approval_timeout_s": approval_timeout_s, "initial_approvals": initial_approvals, @@ -659,7 +680,8 @@ def _validate_required_params(params: dict) -> list[str]: workflow requires ``repo_url``; a repo-less workflow (``requires_repo:false``, #248 Phase 3) does not. All non-PR workflows need either an ``issue_number`` or ``task_description``; PR workflows (``coding/pr-iteration-v1`` / - ``coding/pr-review-v1``) additionally require ``pr_number``. + ``coding/pr-review-v1`` / ``coding/restack-v1``) require ``pr_number`` + instead and carry no description. """ missing: list[str] = [] workflow_id = (params.get("resolved_workflow") or {}).get("id", "coding/new-task-v1") @@ -684,7 +706,7 @@ def _validate_required_params(params: dict) -> list[str]: if requires_repo and not params.get("repo_url"): missing.append("repo_url") - if workflow_id in ("coding/pr-iteration-v1", "coding/pr-review-v1"): + if workflow_id in ("coding/pr-iteration-v1", "coding/pr-review-v1", "coding/restack-v1"): if not params.get("pr_number"): missing.append("pr_number") else: diff --git a/agent/src/workflow/runner.py b/agent/src/workflow/runner.py index 4f818689..18e3a92d 100644 --- a/agent/src/workflow/runner.py +++ b/agent/src/workflow/runner.py @@ -515,11 +515,11 @@ def gate_status( def _handle_verify_build(step: Step, ctx: StepContext) -> StepOutcome: - """Run ``mise run build``. Gating vs informational is the step's ``gate``.""" + """Run the repo's build command (default ``mise run build``); gating is the step's ``gate``.""" from post_hooks import verify_build repo_dir = ctx.setup.repo_dir if ctx.setup else "" - passed = verify_build(repo_dir) + passed = verify_build(repo_dir, ctx.config.build_command) # was_passing_before defaults True (assume green-before, so a post-agent # failure IS a regression) — the same conservative default pipeline.py uses. was_passing_before = ctx.setup.build_before if ctx.setup else True @@ -539,11 +539,11 @@ def _handle_verify_build(step: Step, ctx: StepContext) -> StepOutcome: def _handle_verify_lint(step: Step, ctx: StepContext) -> StepOutcome: - """Run ``mise run lint`` (typically an advisory ``on_failure: continue`` gate).""" + """Run the repo's lint command (default ``mise run lint``; usually an advisory gate).""" from post_hooks import verify_lint repo_dir = ctx.setup.repo_dir if ctx.setup else "" - passed = verify_lint(repo_dir) + passed = verify_lint(repo_dir, ctx.config.lint_command) was_passing_before = ctx.setup.lint_before if ctx.setup else True status = gate_status( passed=passed, diff --git a/agent/tests/test_entrypoint.py b/agent/tests/test_entrypoint.py index c2574000..5aff6ca9 100644 --- a/agent/tests/test_entrypoint.py +++ b/agent/tests/test_entrypoint.py @@ -500,3 +500,148 @@ def test_selects_pr_review_prompt(self): assert "READ-ONLY" in prompt assert "must NOT modify" in prompt assert "55" in prompt + + +# --------------------------------------------------------------------------- +# _build_system_prompt — Linear channel addendum +# --------------------------------------------------------------------------- + + +class TestBuildSystemPromptLinearChannel: + """The Linear-channel addendum is appended only for channel_source=='linear'.""" + + def _setup(self) -> RepoSetup: + return RepoSetup( + repo_dir="/workspace/t1", + branch="b", + default_branch="main", + notes=[], + ) + + def test_no_addendum_when_channel_is_blank(self): + config = TaskConfig( + repo_url="o/r", + task_id="t1", + max_turns=10, + github_token="ghp_test", + aws_region="us-east-1", + ) + prompt = _build_system_prompt(config, self._setup(), None, "") + assert "Linear issue progress updates" not in prompt + assert "Linear context discovery" not in prompt + + def test_no_addendum_for_slack_channel(self): + config = TaskConfig( + repo_url="o/r", + task_id="t1", + max_turns=10, + github_token="ghp_test", + aws_region="us-east-1", + channel_source="slack", + ) + prompt = _build_system_prompt(config, self._setup(), None, "") + assert "Linear issue progress updates" not in prompt + assert "Linear context discovery" not in prompt + + def test_addendum_present_for_linear_channel(self): + config = TaskConfig( + repo_url="o/r", + task_id="t1", + max_turns=10, + github_token="ghp_test", + aws_region="us-east-1", + channel_source="linear", + channel_metadata={ + "linear_issue_id": "issue-uuid-1", + "linear_issue_identifier": "ABC-42", + "linear_project_id": "project-uuid-1", + }, + ) + prompt = _build_system_prompt(config, self._setup(), None, "") + assert "Linear issue progress updates" in prompt + assert "Linear context discovery" in prompt + assert "ABC-42" in prompt + + def test_linear_addendum_names_attachment_tools(self): + # The agent must know the exact MCP tool names — vague references + # would cause it to grope. Lock these in so a rename triggers the test. + config = TaskConfig( + repo_url="o/r", + task_id="t1", + max_turns=10, + github_token="ghp_test", + aws_region="us-east-1", + channel_source="linear", + channel_metadata={"linear_issue_id": "issue-uuid-1"}, + ) + prompt = _build_system_prompt(config, self._setup(), None, "") + for tool in ( + "mcp__linear-server__get_issue", + "mcp__linear-server__get_attachment", + "mcp__linear-server__extract_images", + "mcp__linear-server__list_documents", + "mcp__linear-server__get_document", + "mcp__linear-server__list_comments", + ): + assert tool in prompt, f"expected {tool} to be named in the Linear addendum" + + def test_linear_addendum_inlines_issue_id_and_project_id(self): + config = TaskConfig( + repo_url="o/r", + task_id="t1", + max_turns=10, + github_token="ghp_test", + aws_region="us-east-1", + channel_source="linear", + channel_metadata={ + "linear_issue_id": "issue-uuid-deadbeef", + "linear_project_id": "project-uuid-cafebabe", + }, + ) + prompt = _build_system_prompt(config, self._setup(), None, "") + # The agent shouldn't have to guess the ids — they're in the metadata, + # so we surface them directly in the prompt. + assert "issue-uuid-deadbeef" in prompt + assert "project-uuid-cafebabe" in prompt + + def test_linear_addendum_warns_save_issue_no_ops_on_unknown_state(self): + # Regression-guard: many Linear teams do NOT have an `In Review` + # state. When the agent passes a state name that doesn't exist, + # save_issue silently no-ops — the response shows the unchanged + # state, but the agent claimed success on DEM-9 (2026-05-27). + # The prompt must (a) tell the agent to cache list_issue_statuses, + # (b) check the cached map before each transition, and (c) verify + # the response state.name matches what was asked. + config = TaskConfig( + repo_url="o/r", + task_id="t1", + max_turns=10, + github_token="ghp_test", + aws_region="us-east-1", + channel_source="linear", + channel_metadata={"linear_issue_id": "i"}, + ) + prompt = _build_system_prompt(config, self._setup(), None, "") + assert "no-op" in prompt or "no op" in prompt + assert "cache" in prompt.lower() + # Must explicitly call out post-transition response verification. + assert "state.name" in prompt or "returned" in prompt.lower() + + def test_linear_addendum_warns_against_embedding_uploads_linear_app_in_comments(self): + # Regression-guard: Linear's CDN signed URLs render fine in the + # original poster's context but show a broken-image icon when + # re-embedded by the bot in a comment. Hit on DEM-9 2026-05-27. + config = TaskConfig( + repo_url="o/r", + task_id="t1", + max_turns=10, + github_token="ghp_test", + aws_region="us-east-1", + channel_source="linear", + channel_metadata={"linear_issue_id": "i"}, + ) + prompt = _build_system_prompt(config, self._setup(), None, "") + assert "uploads.linear.app" in prompt + # The phrasing must be a prohibition for save_comment specifically, + # not just a passing mention — make sure we're forbidding the embed. + assert "Do NOT embed" in prompt or "do not embed" in prompt.lower() diff --git a/agent/tests/test_models.py b/agent/tests/test_models.py index 49cbd93a..2d7dfc35 100644 --- a/agent/tests/test_models.py +++ b/agent/tests/test_models.py @@ -282,6 +282,21 @@ def test_required_fields(self): assert config.is_pr_workflow is False assert config.cedar_policies == [] assert config.issue is None + # #247 A4: defaults for stacked-child fields. + assert config.base_branch is None + assert config.merge_branches == [] + + def test_a4_stacked_child_fields(self): + # Diamond child: base off main + predecessor branches to merge in. + config = TaskConfig( + repo_url="owner/repo", + github_token="ghp_test", + aws_region="us-east-1", + base_branch="main", + merge_branches=["bgagent/taskB/b", "bgagent/taskC/c"], + ) + assert config.base_branch == "main" + assert config.merge_branches == ["bgagent/taskB/b", "bgagent/taskC/c"] def test_mutable_assignment(self): config = TaskConfig( diff --git a/agent/tests/test_prompts.py b/agent/tests/test_prompts.py index b26e13aa..a399988d 100644 --- a/agent/tests/test_prompts.py +++ b/agent/tests/test_prompts.py @@ -35,13 +35,34 @@ def test_linear_channel_includes_linear_tools(self): addendum = _channel_prompt_addendum( _config( channel_source="linear", - channel_metadata={"linear_issue_identifier": "ABC-42"}, + channel_metadata={ + "linear_issue_id": "issue-uuid-1", + "linear_issue_identifier": "ABC-42", + }, ) ) assert "Linear issue progress updates" in addendum assert "mcp__linear-server__save_comment" in addendum assert "ABC-42" in addendum + def test_linear_integration_node_gets_no_addendum(self): + # #247 UX.16: the synthetic orchestration integration node is a Linear + # task but has NO real sub-issue — channel_metadata omits + # linear_issue_id. Without a target issue the agent would grope via the + # MCP and post its "Starting"/"PR opened" comments onto the PARENT epic, + # cluttering the maturing panel. No issue id → no progress addendum. + addendum = _channel_prompt_addendum( + _config( + channel_source="linear", + channel_metadata={ + "orchestration_id": "orch_abc", + "orchestration_sub_issue_id": "orch_abc__integration", + "parent_linear_issue_id": "parent-uuid", + }, + ) + ) + assert addendum == "" + def test_jira_channel_gets_no_addendum(self): # Jira comments are posted out-of-band by jira_reactions (REST shim); # the Atlassian MCP can't load in a headless agent, so instructing the @@ -84,8 +105,27 @@ def test_pr_review_returns_prompt_with_review_workflow(self): assert "Write and Edit are not available" in prompt assert "{workflow}" not in prompt + def test_restack_returns_prompt_with_remerge_workflow(self): + prompt = get_system_prompt("coding/restack-v1") + assert "RE-STACKING" in prompt + assert "predecessor" in prompt + assert ( + "do NOT add features" in prompt + or "NOT new feature work" in prompt + or "not new feature" in prompt.lower() + ) + assert "{branch_name}" in prompt # pushes to the SAME existing branch + assert "{pr_number}" in prompt + assert "{repo_url}" in prompt + assert "{workflow}" not in prompt + def test_all_workflows_contain_shared_base_sections(self): - for workflow_id in ("coding/new-task-v1", "coding/pr-iteration-v1", "coding/pr-review-v1"): + for workflow_id in ( + "coding/new-task-v1", + "coding/pr-iteration-v1", + "coding/pr-review-v1", + "coding/restack-v1", + ): prompt = get_system_prompt(workflow_id) assert "## Environment" in prompt, f"Missing Environment in {workflow_id}" has_rules = "## Rules" in prompt or "## Rules override" in prompt diff --git a/agent/tests/test_repo.py b/agent/tests/test_repo.py index 8d1b8c66..6dca7b77 100644 --- a/agent/tests/test_repo.py +++ b/agent/tests/test_repo.py @@ -137,3 +137,53 @@ def fake_run(*args, **kwargs): monkeypatch.setattr(subprocess, "run", fake_run) assert repo.detect_default_branch("owner/repo", "/tmp/x") == "main" + + +class TestPlatformBranchNameVerbatim: + """The agent MUST use the platform-provided ``config.branch_name`` verbatim + when present, for EVERY workflow — never re-deriving its own slug. A + re-derived slug diverges from the platform's (shell.py slugify strips + dots / truncates at 40; gateway.ts uses dashes / truncates at 50), which + silently breaks #247 A4 stacking: a stacked child fetches the + predecessor's platform-named branch, the agent pushed a differently-named + one, the fetch 404s, and the child falls back to main (#14).""" + + def test_uses_platform_branch_name_verbatim_for_new_task(self, monkeypatch): + # new_task (is_pr_workflow=False) with a platform branch_name carrying a + # dotted/dashed slug. The agent must NOT re-slugify it. + fake = _fake_run_cmd() + _patch_common(monkeypatch, fake) + monkeypatch.setattr(repo, "detect_default_branch", lambda url, d: "main") + setup = repo.setup_repo( + _config( + is_pr_workflow=False, + branch_name="bgagent/01TESTTASKID/abca-166-add-seville-guide-html", + task_description="ABCA-166: Add seville-guide.html", + ) + ) + assert setup.branch == "bgagent/01TESTTASKID/abca-166-add-seville-guide-html" + + def test_uses_platform_branch_name_verbatim_for_pr_workflow(self, monkeypatch): + fake = _fake_run_cmd() + _patch_common(monkeypatch, fake) + setup = repo.setup_repo( + _config( + is_pr_workflow=True, + branch_name="bgagent/01TESTTASKID/abca-167-stacked-child", + base_branch="bgagent/01PREDTASK/abca-166-predecessor", + ) + ) + assert setup.branch == "bgagent/01TESTTASKID/abca-167-stacked-child" + + def test_falls_back_to_derived_slug_only_when_no_branch_name(self, monkeypatch): + # No platform branch_name → the agent derives its own slug (legacy path). + fake = _fake_run_cmd() + _patch_common(monkeypatch, fake) + monkeypatch.setattr(repo, "detect_default_branch", lambda url, d: "main") + setup = repo.setup_repo( + _config( + is_pr_workflow=False, + task_description="ABCA-168: derive me", + ) + ) + assert setup.branch.startswith("bgagent/") diff --git a/agent/tests/test_server.py b/agent/tests/test_server.py index 144f9514..4bfda789 100644 --- a/agent/tests/test_server.py +++ b/agent/tests/test_server.py @@ -308,6 +308,26 @@ def test_validate_required_params_pr_workflows_require_pr_number(): ) assert missing == [] + # #305 A6: restack is a PR workflow — pr_number suffices, NO description + # required (regression: it previously fell into the non-PR branch and + # 400'd on missing issue_number_or_task_description). + missing = server._validate_required_params( + { + "repo_url": "o/r", + "resolved_workflow": {"id": "coding/restack-v1", "version": "1.0.0"}, + "pr_number": "113", + } + ) + assert missing == [] + missing = server._validate_required_params( + { + "repo_url": "o/r", + "resolved_workflow": {"id": "coding/restack-v1", "version": "1.0.0"}, + "pr_number": "", + } + ) + assert missing == ["pr_number"] + # A non-PR workflow needs issue OR description. missing = server._validate_required_params( { @@ -774,3 +794,75 @@ def test_none_stays_none(self): self._fake_req(), ) assert params["approval_gate_cap"] is None + + +class TestInvocationParamContract: + """The invocation boundary is wired as: + + params = _extract_invocation_params(inp, request) # a dict + _run_task_background(**params) # kwargs unpack + + The ONLY thing keeping these in sync is that every dict key is a valid + parameter name of ``_run_task_background`` (and vice-versa for required + fields). A mismatch is invisible until runtime and crashes EVERY task + with a ``NameError`` / ``TypeError`` — exactly the #247 A4 regression + where ``base_branch`` was passed to ``run_task`` but never extracted + into the params dict. These tests lock that contract structurally so + the next field added on one side but not the other fails in CI. + """ + + def _fake_req(self) -> Any: + return _FakeRequest() + + def _payload(self, **extra): + return {"repo_url": "org/repo", "task_description": "x", "task_id": "t-1", **extra} + + def test_every_extracted_key_is_a_valid_background_param(self): + import inspect + + params = server._extract_invocation_params(self._payload(), self._fake_req()) + sig = inspect.signature(server._run_task_background) + bg_param_names = set(sig.parameters) + + unknown = set(params) - bg_param_names + assert not unknown, ( + f"_extract_invocation_params returns keys that _run_task_background " + f"does not accept (would crash on **kwargs unpack): {sorted(unknown)}" + ) + + def test_extracted_params_unpack_into_background_signature(self): + # Binding the extracted dict against the real signature is exactly + # what `_run_task_background(**params)` does — this raises TypeError + # if a key is unknown OR a required (no-default) param is missing. + import inspect + + params = server._extract_invocation_params(self._payload(), self._fake_req()) + sig = inspect.signature(server._run_task_background) + # Should not raise. + sig.bind(**params) + + def test_a4_base_branch_and_merge_branches_extracted_and_accepted(self): + # The specific A4 fields whose omission caused the regression. + import inspect + + params = server._extract_invocation_params( + self._payload(base_branch="bgagent/taskA/a", merge_branches=["b1", "b2"]), + self._fake_req(), + ) + assert params["base_branch"] == "bgagent/taskA/a" + assert params["merge_branches"] == ["b1", "b2"] + # And they are real parameters of the background runner. + bg = set(inspect.signature(server._run_task_background).parameters) + assert {"base_branch", "merge_branches"} <= bg + + def test_a4_fields_default_safely_when_absent(self): + params = server._extract_invocation_params(self._payload(), self._fake_req()) + assert params["base_branch"] is None + assert params["merge_branches"] == [] + + def test_merge_branches_non_string_entries_filtered(self): + params = server._extract_invocation_params( + self._payload(merge_branches=["ok", 123, None, "ok2"]), + self._fake_req(), + ) + assert params["merge_branches"] == ["ok", "ok2"] diff --git a/agent/tests/test_verify_commands.py b/agent/tests/test_verify_commands.py new file mode 100644 index 00000000..48349019 --- /dev/null +++ b/agent/tests/test_verify_commands.py @@ -0,0 +1,92 @@ +"""Tests for the configurable build/lint verification command (#1 build-gate fix).""" + +from __future__ import annotations + +import subprocess +from types import SimpleNamespace + +import post_hooks +from post_hooks import ( + DEFAULT_BUILD_COMMAND, + DEFAULT_LINT_COMMAND, + is_verify_command_inert, + resolve_verify_argv, + verify_build, + verify_lint, +) + + +class TestResolveVerifyArgv: + def test_empty_falls_back_to_default(self): + assert resolve_verify_argv("", DEFAULT_BUILD_COMMAND) == ["mise", "run", "build"] + assert resolve_verify_argv(" ", DEFAULT_LINT_COMMAND) == ["mise", "run", "lint"] + + def test_none_falls_back_to_default(self): + assert resolve_verify_argv(None, DEFAULT_BUILD_COMMAND) == ["mise", "run", "build"] + + def test_configured_command_splits_to_argv(self): + assert resolve_verify_argv("npm run build", "") == ["npm", "run", "build"] + assert resolve_verify_argv("gradle build", "") == ["gradle", "build"] + + def test_quoted_args_preserved(self): + assert resolve_verify_argv('make "target with spaces"', DEFAULT_BUILD_COMMAND) == [ + "make", + "target with spaces", + ] + + +class TestVerifyBuildHonorsCommand: + def _capture_argv(self, monkeypatch): + seen = {} + + def fake_run_cmd(argv, **kw): + seen["argv"] = argv + return SimpleNamespace(returncode=0) + + monkeypatch.setattr(post_hooks, "run_cmd", fake_run_cmd) + return seen + + def test_build_defaults_to_mise(self, monkeypatch): + seen = self._capture_argv(monkeypatch) + assert verify_build("/repo") is True + assert seen["argv"] == ["mise", "run", "build"] + + def test_build_uses_configured_command(self, monkeypatch): + seen = self._capture_argv(monkeypatch) + assert verify_build("/repo", "npm run build") is True + assert seen["argv"] == ["npm", "run", "build"] + + def test_lint_uses_configured_command(self, monkeypatch): + seen = self._capture_argv(monkeypatch) + assert verify_lint("/repo", "ruff check .") is True + assert seen["argv"] == ["ruff", "check", "."] + + def test_nonzero_returncode_is_failure(self, monkeypatch): + monkeypatch.setattr(post_hooks, "run_cmd", lambda argv, **kw: SimpleNamespace(returncode=1)) + assert verify_build("/repo", "npm run build") is False + + def test_timeout_is_failure(self, monkeypatch): + def boom(argv, **kw): + raise subprocess.TimeoutExpired(cmd=argv, timeout=1) + + monkeypatch.setattr(post_hooks, "run_cmd", boom) + assert verify_build("/repo", "npm run build") is False + + +class TestIsVerifyCommandInert: + def test_mise_no_tasks_defined_is_inert(self): + assert is_verify_command_inert(1, "mise ERROR no tasks defined in /repo") is True + + def test_command_not_found_exit_127_is_inert(self): + assert is_verify_command_inert(127, "gradle: command not found") is True + + def test_no_task_named_is_inert(self): + assert is_verify_command_inert(1, "mise ERROR: no task named 'build'") is True + + def test_genuine_build_failure_is_NOT_inert(self): + # Real compiler/test output, exited non-zero → meaningful gating signal. + real_failure = "TypeError: cannot read property 'x'\n1 test failed" + assert is_verify_command_inert(2, real_failure) is False + + def test_clean_exit_is_not_inert(self): + assert is_verify_command_inert(0, "") is False diff --git a/agent/tests/test_workflow_runner.py b/agent/tests/test_workflow_runner.py index 6ca4d0d8..7844e5e3 100644 --- a/agent/tests/test_workflow_runner.py +++ b/agent/tests/test_workflow_runner.py @@ -492,7 +492,7 @@ def test_verify_build_regression_only_passes_when_broken_before(self, monkeypatc from workflow.runner import _handle_verify_build # build red after, but it was already red before → not a regression. - monkeypatch.setattr("post_hooks.verify_build", lambda _d: False) + monkeypatch.setattr("post_hooks.verify_build", lambda _d, _c="": False) wf = _workflow( [ {"kind": "verify_build", "name": "build", "gate": "regression_only"}, @@ -509,7 +509,7 @@ def test_verify_build_regression_only_fails_on_regression(self, monkeypatch): from models import RepoSetup from workflow.runner import _handle_verify_build - monkeypatch.setattr("post_hooks.verify_build", lambda _d: False) + monkeypatch.setattr("post_hooks.verify_build", lambda _d, _c="": False) wf = _workflow( [ {"kind": "verify_build", "name": "build", "gate": "regression_only"}, @@ -525,7 +525,7 @@ def test_verify_lint_read_only_is_informational(self, monkeypatch): from workflow.runner import _handle_verify_lint # read_only workflow: a lint failure must not gate (symmetry with build). - monkeypatch.setattr("post_hooks.verify_lint", lambda _d: False) + monkeypatch.setattr("post_hooks.verify_lint", lambda _d, _c="": False) wf = _workflow( [ {"kind": "clone_repo"}, diff --git a/agent/workflows/coding/restack-v1.yaml b/agent/workflows/coding/restack-v1.yaml new file mode 100644 index 00000000..f0115598 --- /dev/null +++ b/agent/workflows/coding/restack-v1.yaml @@ -0,0 +1,53 @@ +# A6 re-stack (#305): re-merge a CHANGED predecessor branch into an existing +# stacked-child PR so the child is no longer stale. Like pr-iteration it +# operates on an existing PR branch (push_resolve — no new PR), but it also +# receives the updated predecessor branch(es) as merge_branches, which repo.py +# merges into the working tree before the agent runs. The agent reconciles +# conflicts, verifies the build, and pushes the same branch. +# +# Triggered by the platform (the A6 re-stack handler off a pull_request +# webhook), not by a user. Writeable; Cedar principal "new_task" (the +# id→legacy map has no restack entry, so it falls to new_task — a writeable +# coding identity, correct here). +id: coding/restack-v1 +version: 1.0.0 +domain: coding +description: Re-merge a changed predecessor branch into an existing stacked-child PR (#305 A6). +requires_repo: true +read_only: false +prompt: + template: registry://prompt/coding-restack-workflow + placeholders: + - repo_url + - task_id + - workspace + - branch_name + - default_branch + - max_turns + - setup_notes + - memory_context + - pr_number +hydration: + sources: [pull_request, memory, task_description] +agent_config: + tier: standard + allowed_tools: [Bash, Read, Write, Edit, Glob, Grep, WebFetch] + cedar_policy_modules: [builtin/hard_deny, builtin/soft_deny] +repo_config: + provider: github + discover: true +required_inputs: + all_of: [pr_number] +steps: + - { kind: clone_repo, name: setup } + - { kind: hydrate_context, name: context } + - { kind: run_agent, name: restack } + - { kind: verify_build, name: build, gate: regression_only } + - { kind: ensure_pr, name: resolve_pr, strategy: push_resolve } +terminal_outcomes: + primary: pr_url +limits: + max_turns: 100 +promotion_gate: + requires: [tests:agent/restack] +status: production diff --git a/cdk/eslint.config.mjs b/cdk/eslint.config.mjs index 6ff27c2b..db991648 100644 --- a/cdk/eslint.config.mjs +++ b/cdk/eslint.config.mjs @@ -242,11 +242,18 @@ export default [ }, }, - // Override: tests legitimately use inline literals (fixtures, assertions) + // Override: tests legitimately use inline literals (fixtures, assertions), + // long fixture/assertion lines, and reuse small helper names (``row``, + // ``makeDdb``) across sibling describe blocks. Relax the stylistic rules that + // only add noise in test code; correctness rules stay on. { files: ['test/**/*.ts'], rules: { '@typescript-eslint/no-magic-numbers': 'off', + '@typescript-eslint/no-shadow': 'off', + 'no-shadow': 'off', + '@stylistic/max-len': 'off', + 'max-len': 'off', }, }, ]; diff --git a/cdk/mise.toml b/cdk/mise.toml index 60332012..f493a454 100644 --- a/cdk/mise.toml +++ b/cdk/mise.toml @@ -27,6 +27,15 @@ description = "Jest tests" depends = [":compile"] run = ["mkdir -p $TMPDIR", "yarn test"] +# Focused, low-footprint test run for iterating on one file/pattern: +# mise //cdk:testf -- orchestration-release +# Skips coverage (the heaviest phase) and runs a single worker, so it +# won't spawn the worker fleet that OOMs the Mac on the 1240-test +# stack-synth suite. Use //cdk:test for the full coverage run (CI parity). +[tasks.testf] +description = "Focused jest run (no coverage, single worker)" +run = "yarn jest --coverage=false --runInBand" + [tasks.synth] description = "cdk synth" run = ["mkdir -p $TMPDIR", "yarn synth"] diff --git a/cdk/package.json b/cdk/package.json index a1097dda..b79c55ed 100644 --- a/cdk/package.json +++ b/cdk/package.json @@ -77,6 +77,8 @@ "/@(src|test)/**/*(*.)@(spec|test).ts?(x)", "/@(src|test)/**/__tests__/**/*.ts?(x)" ], + "maxWorkers": "50%", + "workerIdleMemoryLimit": "1536MB", "clearMocks": true, "collectCoverage": true, "coverageReporters": [ diff --git a/cdk/src/constructs/blueprint.ts b/cdk/src/constructs/blueprint.ts index 5ac64ac1..35463442 100644 --- a/cdk/src/constructs/blueprint.ts +++ b/cdk/src/constructs/blueprint.ts @@ -113,6 +113,27 @@ export interface BlueprintProps { * Override the default poll interval (ms) for awaiting agent completion. */ readonly pollIntervalMs?: number; + + /** + * Command the agent runs to BUILD/verify the repo before opening a PR + * (and as the pre-change baseline). Drives build-regression gating: if + * the repo built green before the agent's change and fails after, the + * task fails. Defaults to ``mise run build`` when unset. + * + * Set this for repos that do NOT use mise (e.g. ``'npm run build'``, + * ``'gradle build'``, ``'make'``). Without a runnable build command, + * build-regression gating is INERT — a change that breaks the build + * still reports success (the agent emits a one-time warning on the PR). + * Runs in the agent's cloud container against the cloned repo; this is a + * compile/test verification, NOT a deployment. + */ + readonly buildCommand?: string; + + /** + * Command the agent runs to LINT the repo (advisory gate). Defaults to + * ``mise run lint`` when unset. Same semantics as ``buildCommand``. + */ + readonly lintCommand?: string; }; /** @@ -239,6 +260,12 @@ export class Blueprint extends Construct { if (props.pipeline?.pollIntervalMs !== undefined) { item.poll_interval_ms = { N: String(props.pipeline.pollIntervalMs) }; } + if (props.pipeline?.buildCommand) { + item.build_command = { S: props.pipeline.buildCommand }; + } + if (props.pipeline?.lintCommand) { + item.lint_command = { S: props.pipeline.lintCommand }; + } if (this.egressAllowlist.length > 0) { item.egress_allowlist = { L: this.egressAllowlist.map(d => ({ S: d })) }; } @@ -317,6 +344,8 @@ export class Blueprint extends Construct { if (props.agent?.systemPromptOverrides) fields.push(', #system_prompt_overrides = :system_prompt_overrides'); if (props.credentials?.githubTokenSecretArn) fields.push(', #github_token_secret_arn = :github_token_secret_arn'); if (props.pipeline?.pollIntervalMs !== undefined) fields.push(', #poll_interval_ms = :poll_interval_ms'); + if (props.pipeline?.buildCommand) fields.push(', #build_command = :build_command'); + if (props.pipeline?.lintCommand) fields.push(', #lint_command = :lint_command'); if (this.egressAllowlist.length > 0) fields.push(', #egress_allowlist = :egress_allowlist'); if (this.cedarPolicies.length > 0) fields.push(', #cedar_policies = :cedar_policies'); if (this.approvalGateCap !== undefined) fields.push(', #approval_gate_cap = :approval_gate_cap'); @@ -332,6 +361,8 @@ export class Blueprint extends Construct { if (props.agent?.systemPromptOverrides) names['#system_prompt_overrides'] = 'system_prompt_overrides'; if (props.credentials?.githubTokenSecretArn) names['#github_token_secret_arn'] = 'github_token_secret_arn'; if (props.pipeline?.pollIntervalMs !== undefined) names['#poll_interval_ms'] = 'poll_interval_ms'; + if (props.pipeline?.buildCommand) names['#build_command'] = 'build_command'; + if (props.pipeline?.lintCommand) names['#lint_command'] = 'lint_command'; if (this.egressAllowlist.length > 0) names['#egress_allowlist'] = 'egress_allowlist'; if (this.cedarPolicies.length > 0) names['#cedar_policies'] = 'cedar_policies'; if (this.approvalGateCap !== undefined) names['#approval_gate_cap'] = 'approval_gate_cap'; @@ -347,6 +378,8 @@ export class Blueprint extends Construct { if (props.agent?.systemPromptOverrides) values[':system_prompt_overrides'] = { S: props.agent.systemPromptOverrides }; if (props.credentials?.githubTokenSecretArn) values[':github_token_secret_arn'] = { S: props.credentials.githubTokenSecretArn }; if (props.pipeline?.pollIntervalMs !== undefined) values[':poll_interval_ms'] = { N: String(props.pipeline.pollIntervalMs) }; + if (props.pipeline?.buildCommand) values[':build_command'] = { S: props.pipeline.buildCommand }; + if (props.pipeline?.lintCommand) values[':lint_command'] = { S: props.pipeline.lintCommand }; if (this.egressAllowlist.length > 0) values[':egress_allowlist'] = { L: this.egressAllowlist.map(d => ({ S: d })) }; if (this.cedarPolicies.length > 0) values[':cedar_policies'] = { L: this.cedarPolicies.map(p => ({ S: p })) }; if (this.approvalGateCap !== undefined) values[':approval_gate_cap'] = { N: String(this.approvalGateCap) }; diff --git a/cdk/src/constructs/github-screenshot-integration.ts b/cdk/src/constructs/github-screenshot-integration.ts index b48c7086..b0256270 100644 --- a/cdk/src/constructs/github-screenshot-integration.ts +++ b/cdk/src/constructs/github-screenshot-integration.ts @@ -67,6 +67,15 @@ export interface GitHubScreenshotIntegrationProps { */ readonly linearWorkspaceRegistryTable?: dynamodb.ITable; + /** + * Optional — when provided, the processor persists the captured + * screenshot's public URL onto the deploy task's TaskRecord (keyed by the + * taskId in the deploy branch), so the #247 orchestration reconciler can + * embed the integration node's combined preview in the parent epic panel. + * Unset → persistence is skipped (the PR + Linear comments still post). + */ + readonly taskTable?: dynamodb.ITable; + /** * Removal policy for the dedup table + screenshot bucket. Defaults * to DESTROY so dev stacks don't accumulate orphans on `cdk destroy`. @@ -192,6 +201,9 @@ export class GitHubScreenshotIntegration extends Construct { ...(props.linearWorkspaceRegistryTable && { LINEAR_WORKSPACE_REGISTRY_TABLE_NAME: props.linearWorkspaceRegistryTable.tableName, }), + ...(props.taskTable && { + TASK_TABLE_NAME: props.taskTable.tableName, + }), }, bundling: commonBundling, }); @@ -247,6 +259,13 @@ export class GitHubScreenshotIntegration extends Construct { })); } + // #247: write access so the processor can persist screenshot_url onto the + // deploy task's TaskRecord (conditional UpdateItem). grantWriteData covers + // the UpdateItem; the handler's update is guarded by attribute_exists. + if (props.taskTable) { + props.taskTable.grantWriteData(this.webhookProcessorFn); + } + // AgentCore Browser session lifecycle + automation-stream connect. // Action set scoped to the three calls the handler actually makes; // resource is `*` because Browser sessions are ephemeral and the diff --git a/cdk/src/constructs/linear-integration.ts b/cdk/src/constructs/linear-integration.ts index d51e043b..94141d70 100644 --- a/cdk/src/constructs/linear-integration.ts +++ b/cdk/src/constructs/linear-integration.ts @@ -25,6 +25,7 @@ import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; import * as iam from 'aws-cdk-lib/aws-iam'; import { Runtime, Architecture } from 'aws-cdk-lib/aws-lambda'; import * as lambda from 'aws-cdk-lib/aws-lambda-nodejs'; +import * as s3 from 'aws-cdk-lib/aws-s3'; import * as secretsmanager from 'aws-cdk-lib/aws-secretsmanager'; import { NagSuppressions } from 'cdk-nag'; import { Construct } from 'constructs'; @@ -60,15 +61,47 @@ export interface LinearIntegrationProps { /** The DynamoDB repo config table (optional — for repo onboarding checks). */ readonly repoTable?: dynamodb.ITable; + /** + * OrchestrationTable for #247 Mode A parent/sub-issue orchestration. + * When provided, the webhook processor probes labeled parent issues for + * a sub-issue graph (seeds the DAG + releases root children). When + * omitted, the orchestration path is dormant (ORCHESTRATION_TABLE_NAME + * unset) and the processor behaves as one-issue → one-task. + */ + readonly orchestrationTable?: dynamodb.ITable; + /** Orchestrator Lambda function ARN for async task invocation. */ readonly orchestratorFunctionArn?: string; + /** + * User concurrency counter table (#331). When provided alongside + * ``orchestrationTable``, the webhook processor throttles the seed-time + * ROOT release to the user's free concurrency budget so a wide-root epic + * (many independent sub-issues, no shared foundation) doesn't over-release + * roots that admission then hard-fails. A failed root is UNRECOVERABLE + * (the sweep can only re-release a child whose predecessor still shows + * succeeded — a root has none), so throttling here matters most. Omitted + * → release all roots (back-compat; admission still gates). + */ + readonly userConcurrencyTable?: dynamodb.ITable; + + /** Per-user concurrency cap, shared with the orchestrator (#331). Default 10. */ + readonly maxConcurrentTasksPerUser?: number; + /** Bedrock Guardrail ID for input screening. */ readonly guardrailId?: string; /** Bedrock Guardrail version for input screening. */ readonly guardrailVersion?: string; + /** + * S3 bucket for attachment storage. Required to support image attachments + * extracted from issue descriptions (markdown `![alt](https://…)` images). + * When omitted, Linear-triggered tasks with image attachments fail at + * `createTaskCore` with "Attachment storage is not configured." + */ + readonly attachmentsBucket?: s3.IBucket; + /** Task retention in days for TTL computation. */ readonly taskRetentionDays?: number; @@ -168,6 +201,9 @@ export class LinearIntegration extends Construct { createTaskEnv.GUARDRAIL_ID = props.guardrailId; createTaskEnv.GUARDRAIL_VERSION = props.guardrailVersion; } + if (props.attachmentsBucket) { + createTaskEnv.ATTACHMENTS_BUCKET_NAME = props.attachmentsBucket.bucketName; + } // --- Cognito Authorizer (for /linear/link) --- const cognitoAuthorizer = new apigw.CognitoUserPoolsAuthorizer(this, 'LinearCognitoAuthorizer', { @@ -203,12 +239,31 @@ export class LinearIntegration extends Construct { LINEAR_PROJECT_MAPPING_TABLE_NAME: this.projectMappingTable.tableName, LINEAR_USER_MAPPING_TABLE_NAME: this.userMappingTable.tableName, LINEAR_WORKSPACE_REGISTRY_TABLE_NAME: this.workspaceRegistryTable.tableName, + // #247 Mode A: when set, enables parent/sub-issue orchestration + // (seed DAG + release roots). Unset → orchestration path dormant. + ...(props.orchestrationTable && { + ORCHESTRATION_TABLE_NAME: props.orchestrationTable.tableName, + }), + // #331: throttle the seed-time root release to the free concurrency + // budget (see prop doc). Only wired when both tables are present. + ...(props.orchestrationTable && props.userConcurrencyTable && { + USER_CONCURRENCY_TABLE_NAME: props.userConcurrencyTable.tableName, + MAX_CONCURRENT_TASKS_PER_USER: String(props.maxConcurrentTasksPerUser ?? 10), + }), }, bundling: commonBundling, }); this.projectMappingTable.grantReadData(webhookProcessorFn); this.userMappingTable.grantReadData(webhookProcessorFn); this.workspaceRegistryTable.grantReadData(webhookProcessorFn); + // #247: seed the orchestration DAG + release root children. + if (props.orchestrationTable) { + props.orchestrationTable.grantReadWriteData(webhookProcessorFn); + } + // #331: read the user concurrency counter to throttle the root release. + if (props.orchestrationTable && props.userConcurrencyTable) { + props.userConcurrencyTable.grantReadData(webhookProcessorFn); + } // Phase 2.0b-O2: per-workspace OAuth token secrets are created by the // CLI at setup time (`bgagent-linear-oauth-`), not by CDK. Grant // the webhook processor Get + Put on the prefix so it can read tokens @@ -248,6 +303,15 @@ export class LinearIntegration extends Construct { ], })); } + // Issue descriptions can carry markdown `![alt](https://…)` images, which + // `extractImageUrlAttachments` (linear-webhook-processor.ts) turns into + // URL attachments. `createTaskCore` then uploads the screened bytes to + // `ATTACHMENTS_BUCKET_NAME`, mirroring the TaskApi/Slack paths. Without + // grantPut + grantDelete here, that upload fails closed with 503. + if (props.attachmentsBucket) { + props.attachmentsBucket.grantPut(webhookProcessorFn); + props.attachmentsBucket.grantDelete(webhookProcessorFn); + } // --- Webhook receiver (verifies HMAC, dedups, invokes processor) --- const webhookFn = new lambda.NodejsFunction(this, 'WebhookFn', { diff --git a/cdk/src/constructs/orchestration-reconciler.ts b/cdk/src/constructs/orchestration-reconciler.ts new file mode 100644 index 00000000..481f24be --- /dev/null +++ b/cdk/src/constructs/orchestration-reconciler.ts @@ -0,0 +1,150 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import * as path from 'path'; +import { Duration } from 'aws-cdk-lib'; +import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; +import { Architecture, Runtime, StartingPosition } from 'aws-cdk-lib/aws-lambda'; +import { DynamoEventSource, SqsDlq } from 'aws-cdk-lib/aws-lambda-event-sources'; +import * as lambda from 'aws-cdk-lib/aws-lambda-nodejs'; +import * as sqs from 'aws-cdk-lib/aws-sqs'; +import { NagSuppressions } from 'cdk-nag'; +import { Construct } from 'constructs'; + +/** + * Properties for OrchestrationReconciler construct. + */ +export interface OrchestrationReconcilerProps { + /** + * TaskTable — MUST have a stream enabled (NEW_IMAGE). This construct is + * the table's stream consumer; the reconciler reacts to child tasks + * reaching terminal status. + */ + readonly taskTable: dynamodb.ITable; + + /** OrchestrationTable — the reconciler reads the DAG + writes child statuses. */ + readonly orchestrationTable: dynamodb.ITable; + + /** TaskTable (for createTaskCore writes when releasing children). */ + readonly taskTableForWrites?: dynamodb.ITable; + + /** Orchestrator function ARN — releaseChild → createTaskCore invokes it. */ + readonly orchestratorFunctionArn?: string; + + /** Forwarded so released child tasks land in the right tables. */ + readonly taskEventsTable: dynamodb.ITable; +} + +/** + * TaskTable-stream consumer that drives Linear parent/sub-issue + * orchestration (issue #247, Mode A). On each child task reaching a + * terminal status it releases newly-unblocked children in dependency + * order (see `handlers/orchestration-reconciler.ts`). + * + * Stream-source rationale: TaskEventsTable's stream is at its 2-consumer + * limit (FanOutConsumer + ApprovalMetricsPublisher); TaskTable had no + * stream, so the reconciler is its first and only consumer — zero + * contention with the fan-out plane. + */ + +/** DLQ message retention (days) — long enough for an operator to inspect a + * poison stream record before it ages out. */ +const DLQ_RETENTION_DAYS = 14; + +export class OrchestrationReconciler extends Construct { + public readonly fn: lambda.NodejsFunction; + public readonly dlq: sqs.Queue; + + constructor(scope: Construct, id: string, props: OrchestrationReconcilerProps) { + super(scope, id); + + const handlersDir = path.join(__dirname, '..', 'handlers'); + + this.fn = new lambda.NodejsFunction(this, 'ReconcilerFn', { + entry: path.join(handlersDir, 'orchestration-reconciler.ts'), + handler: 'handler', + runtime: Runtime.NODEJS_24_X, + architecture: Architecture.ARM_64, + timeout: Duration.minutes(2), + // 512 MB (not 256): the reconciler bundles createTaskCore, which + // pulls in the Bedrock guardrail + S3 attachment-screening SDK + // stack. At 256 MB it OOMs during init on every stream event + // (Max Memory Used 255/256 MB) and never releases children. The + // LinearIntegration webhook processor runs the same code at 512 MB. + memorySize: 512, + environment: { + ORCHESTRATION_TABLE_NAME: props.orchestrationTable.tableName, + TASK_TABLE_NAME: props.taskTable.tableName, + TASK_EVENTS_TABLE_NAME: props.taskEventsTable.tableName, + ...(props.orchestratorFunctionArn && { + ORCHESTRATOR_FUNCTION_ARN: props.orchestratorFunctionArn, + }), + }, + bundling: { + externalModules: ['@aws-sdk/*'], + }, + }); + + // DLQ for poison stream records (a record that repeatedly fails the + // reconcile). Fan-out uses the same pattern; without it a bad record + // would block the shard. + this.dlq = new sqs.Queue(this, 'ReconcilerDlq', { + retentionPeriod: Duration.days(DLQ_RETENTION_DAYS), + enforceSSL: true, + }); + + // Orchestration child creation/gating reads + writes the DAG table, + // reads/writes TaskTable (createTaskCore), and writes task events. + props.orchestrationTable.grantReadWriteData(this.fn); + props.taskTable.grantReadWriteData(this.fn); + props.taskEventsTable.grantReadWriteData(this.fn); + + // Subscribe to the TaskTable stream. LATEST: we only care about + // tasks transitioning to terminal from here on. bisectBatchOnError + + // DLQ so one poison record can't wedge the shard. + this.fn.addEventSource(new DynamoEventSource(props.taskTable, { + startingPosition: StartingPosition.LATEST, + batchSize: 10, + retryAttempts: 3, + bisectBatchOnError: true, + onFailure: new SqsDlq(this.dlq), + })); + + NagSuppressions.addResourceSuppressions(this.fn, [ + { + id: 'AwsSolutions-IAM4', + reason: 'AWSLambdaBasicExecutionRole is required for CloudWatch Logs access', + }, + { + id: 'AwsSolutions-IAM5', + reason: + 'DynamoDB index/* + stream ARN wildcards generated by CDK grantReadWriteData ' + + '(ChildTaskIndex query) and the DynamoEventSource read access', + }, + ], true); + + NagSuppressions.addResourceSuppressions(this.dlq, [ + { + id: 'AwsSolutions-SQS3', + reason: + 'This queue IS the DLQ for the reconciler stream consumer — having its own DLQ would be infinite recursion', + }, + ]); + } +} diff --git a/cdk/src/constructs/orchestration-table.ts b/cdk/src/constructs/orchestration-table.ts new file mode 100644 index 00000000..8de2587e --- /dev/null +++ b/cdk/src/constructs/orchestration-table.ts @@ -0,0 +1,143 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { RemovalPolicy } from 'aws-cdk-lib'; +import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; +import { Construct } from 'constructs'; + +/** + * Properties for OrchestrationTable construct. + */ +export interface OrchestrationTableProps { + /** + * Optional table name override. + * @default - auto-generated by CloudFormation + */ + readonly tableName?: string; + + /** + * Removal policy for the table. + * @default RemovalPolicy.DESTROY + */ + readonly removalPolicy?: RemovalPolicy; + + /** + * Whether to enable point-in-time recovery. + * @default true + */ + readonly pointInTimeRecovery?: boolean; +} + +/** + * DynamoDB table holding the parent/sub-issue dependency graph (DAG) + * for Linear orchestration (issue #247, Mode A executor). + * + * One orchestration = one labeled Linear parent issue with sub-issues. + * Each child sub-issue is a row; the reconciler (PR A3) walks the rows + * to find children whose predecessors are all terminal-success and + * releases them via ``createTaskCore``. + * + * Schema: orchestration_id (PK), sub_issue_id (SK). + * + * Per-child row fields (written by graph discovery, PR A2): + * - linear_sub_issue_id — the Linear sub-issue UUID this row tracks + * - child_task_id — the ABCA task_id created for this child (absent + * until the child is released by the reconciler) + * - depends_on — list of ``sub_issue_id``s that must reach + * terminal-success before this child may start + * - child_status — orchestration-local lifecycle marker (e.g. + * ``blocked`` | ``released`` | ``succeeded`` | ``failed`` | ``skipped``) + * - base_branch — the predecessor branch this child stacks on (ADR-001 + * stacked PRs); ``main`` for root children + * - parent_linear_issue_id, linear_workspace_id, repo — provenance + * + * GSI: + * - ChildTaskIndex (PK: child_task_id) — the reconciler receives a + * child terminal-state event keyed by ``task_id`` and must resolve + * which orchestration + child row it belongs to. Sparse: only rows + * whose child has been released carry ``child_task_id``. + * - ChildBranchIndex (PK: child_branch_name) — the A6 re-stack path + * (#305) receives a GitHub ``pull_request`` event keyed by head branch + * and must resolve which orchestration child opened that branch, so it + * can re-stack the child's dependents when its branch changes. Sparse: + * only released children carry ``child_branch_name``. + * + * NOTE (PR A1): this construct is introduced but not yet instantiated + * in any stack — graph discovery (A2) and the reconciler (A3) wire it + * in. Synth-only here keeps the foundational PR deploy-safe. + */ +export class OrchestrationTable extends Construct { + /** + * GSI name for resolving a child ``task_id`` back to its + * orchestration + sub-issue row. + * PK: child_task_id. Sparse — only released children are projected. + */ + public static readonly CHILD_TASK_INDEX = 'ChildTaskIndex'; + + /** + * GSI name for resolving a child's head branch back to its + * orchestration + sub-issue row (A6 re-stack, #305). + * PK: child_branch_name. Sparse — only released children are projected. + */ + public static readonly CHILD_BRANCH_INDEX = 'ChildBranchIndex'; + + /** + * The underlying DynamoDB table. Use this to grant access or read the table name. + */ + public readonly table: dynamodb.Table; + + constructor(scope: Construct, id: string, props: OrchestrationTableProps = {}) { + super(scope, id); + + this.table = new dynamodb.Table(this, 'Table', { + tableName: props.tableName, + partitionKey: { + name: 'orchestration_id', + type: dynamodb.AttributeType.STRING, + }, + sortKey: { + name: 'sub_issue_id', + type: dynamodb.AttributeType.STRING, + }, + billingMode: dynamodb.BillingMode.PAY_PER_REQUEST, + timeToLiveAttribute: 'ttl', + pointInTimeRecoverySpecification: { + pointInTimeRecoveryEnabled: props.pointInTimeRecovery ?? true, + }, + removalPolicy: props.removalPolicy ?? RemovalPolicy.DESTROY, + }); + + // GSI: resolve a released child's task_id back to its orchestration row. + // Sparse — rows without child_task_id (not yet released) are not projected. + this.table.addGlobalSecondaryIndex({ + indexName: OrchestrationTable.CHILD_TASK_INDEX, + partitionKey: { name: 'child_task_id', type: dynamodb.AttributeType.STRING }, + projectionType: dynamodb.ProjectionType.ALL, + }); + + // GSI: resolve a released child's head branch back to its orchestration + // row (A6 re-stack, #305). Sparse — rows without child_branch_name + // (not yet released) are not projected. + this.table.addGlobalSecondaryIndex({ + indexName: OrchestrationTable.CHILD_BRANCH_INDEX, + partitionKey: { name: 'child_branch_name', type: dynamodb.AttributeType.STRING }, + projectionType: dynamodb.ProjectionType.ALL, + }); + } +} diff --git a/cdk/src/constructs/stranded-orchestration-reconciler.ts b/cdk/src/constructs/stranded-orchestration-reconciler.ts new file mode 100644 index 00000000..4cfa9be4 --- /dev/null +++ b/cdk/src/constructs/stranded-orchestration-reconciler.ts @@ -0,0 +1,121 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import * as path from 'path'; +import { Duration } from 'aws-cdk-lib'; +import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; +import * as events from 'aws-cdk-lib/aws-events'; +import * as targets from 'aws-cdk-lib/aws-events-targets'; +import { Architecture, Runtime } from 'aws-cdk-lib/aws-lambda'; +import * as lambda from 'aws-cdk-lib/aws-lambda-nodejs'; +import { NagSuppressions } from 'cdk-nag'; +import { Construct } from 'constructs'; + +/** + * Properties for StrandedOrchestrationReconciler construct. + */ +export interface StrandedOrchestrationReconcilerProps { + /** OrchestrationTable — read DAG state, write recovered child statuses. */ + readonly orchestrationTable: dynamodb.ITable; + /** TaskTable — read released children's task status (terminal? built?) + createTaskCore writes. */ + readonly taskTable: dynamodb.ITable; + /** TaskEventsTable — createTaskCore writes task_created events. */ + readonly taskEventsTable: dynamodb.ITable; + /** Orchestrator function ARN — releaseChild → createTaskCore async-invokes it. */ + readonly orchestratorFunctionArn?: string; + /** + * Sweep cadence. Long enough to amortise the scan; short enough to + * clear a lost-event stall in a reasonable user-facing time. + * @default Duration.minutes(10) + */ + readonly schedule?: Duration; +} + +/** + * Scheduled backstop for Linear orchestration (#247, gap #303). + * + * The live ``OrchestrationReconciler`` reacts to TaskTable-stream terminal + * events to release dependency-unblocked children. If it is unavailable + * when an event fires (deploy/throttle/OOM/DLQ-parked record) that event + * is lost and the orchestration stalls. This scheduled sweep re-derives + * gating truth from persisted state and recovers stranded children + * (see ``handlers/reconcile-stranded-orchestrations.ts``). + * + * Mirrors ``StrandedTaskReconciler``. Grants match the live reconciler + * because it runs the same ``createTaskCore`` release path in-process. + */ + +/** Sweep Lambda timeout (minutes) — matches the live reconciler's createTaskCore + * + Bedrock/S3 SDK bundle cold-start + release work. */ +const SWEEP_TIMEOUT_MINUTES = 5; + +export class StrandedOrchestrationReconciler extends Construct { + public readonly fn: lambda.NodejsFunction; + + constructor(scope: Construct, id: string, props: StrandedOrchestrationReconcilerProps) { + super(scope, id); + + const handlersDir = path.join(__dirname, '..', 'handlers'); + + this.fn = new lambda.NodejsFunction(this, 'ReconcilerFn', { + entry: path.join(handlersDir, 'reconcile-stranded-orchestrations.ts'), + handler: 'handler', + runtime: Runtime.NODEJS_24_X, + architecture: Architecture.ARM_64, + timeout: Duration.minutes(SWEEP_TIMEOUT_MINUTES), + // 512 MB to match the live reconciler — same createTaskCore + + // Bedrock/S3 SDK bundle (see OrchestrationReconciler memory note). + memorySize: 512, + environment: { + ORCHESTRATION_TABLE_NAME: props.orchestrationTable.tableName, + TASK_TABLE_NAME: props.taskTable.tableName, + TASK_EVENTS_TABLE_NAME: props.taskEventsTable.tableName, + ...(props.orchestratorFunctionArn && { + ORCHESTRATOR_FUNCTION_ARN: props.orchestratorFunctionArn, + }), + }, + bundling: { + externalModules: ['@aws-sdk/*'], + }, + }); + + props.orchestrationTable.grantReadWriteData(this.fn); + props.taskTable.grantReadWriteData(this.fn); + props.taskEventsTable.grantReadWriteData(this.fn); + + const schedule = props.schedule ?? Duration.minutes(10); + const rule = new events.Rule(this, 'SweepSchedule', { + schedule: events.Schedule.rate(schedule), + }); + rule.addTarget(new targets.LambdaFunction(this.fn)); + + NagSuppressions.addResourceSuppressions(this.fn, [ + { + id: 'AwsSolutions-IAM4', + reason: 'AWSLambdaBasicExecutionRole is required for CloudWatch Logs access', + }, + { + id: 'AwsSolutions-IAM5', + reason: + 'DynamoDB index/* wildcards generated by CDK grantReadWriteData for the ' + + 'orchestration scan + child-task lookups + createTaskCore write path', + }, + ], true); + } +} diff --git a/cdk/src/constructs/task-table.ts b/cdk/src/constructs/task-table.ts index dfbf9e27..56a7c96f 100644 --- a/cdk/src/constructs/task-table.ts +++ b/cdk/src/constructs/task-table.ts @@ -54,6 +54,9 @@ export interface TaskTableProps { * - UserStatusIndex (PK: user_id, SK: status_created_at) — "my tasks" queries * - StatusIndex (PK: status, SK: created_at) — queue processing, monitoring * - IdempotencyIndex (PK: idempotency_key) — sparse index for dedup + * - LinearIssueIndex (PK: linear_issue_id, SK: created_at) — sparse; resolve a + * Linear issue back to its newest ABCA task + PR (#247 UX.3 standalone + * comment trigger) */ export class TaskTable extends Construct { /** @@ -74,6 +77,16 @@ export class TaskTable extends Construct { */ public static readonly IDEMPOTENCY_INDEX = 'IdempotencyIndex'; + /** + * GSI name for resolving a Linear issue → its newest ABCA task + PR (#247 + * UX.3). PK: linear_issue_id, SK: created_at (newest task wins). Sparse — + * only Linear-origin tasks (which write the top-level ``linear_issue_id`` + * attribute) are projected; GitHub/Slack/API tasks are absent. Powers the + * standalone ``@bgagent`` comment trigger on a plain (non-orchestration) + * issue, where no orchestration row records the issue→PR link. + */ + public static readonly LINEAR_ISSUE_INDEX = 'LinearIssueIndex'; + /** * The underlying DynamoDB table. Use this to grant access or read the table name. */ @@ -93,6 +106,17 @@ export class TaskTable extends Construct { pointInTimeRecoverySpecification: { pointInTimeRecoveryEnabled: props.pointInTimeRecovery ?? true, }, + // NEW_IMAGE stream feeds the #247 orchestration reconciler + // (`OrchestrationReconciler`), which reacts to child tasks reaching + // terminal status to release dependency-unblocked children. This is + // the table's FIRST and only stream consumer — deliberately on + // TaskTable rather than TaskEventsTable, whose stream is already at + // its 2-consumer limit (FanOutConsumer + ApprovalMetricsPublisher; + // see TaskEventsTable). NEW_IMAGE suffices — the reconciler reads + // status/build_passed/orchestration_id off the new record image. + // Enabling a stream on an existing table is an in-place CFN update + // (no table replacement). + stream: dynamodb.StreamViewType.NEW_IMAGE, removalPolicy: props.removalPolicy ?? RemovalPolicy.DESTROY, }); @@ -118,5 +142,17 @@ export class TaskTable extends Construct { partitionKey: { name: 'idempotency_key', type: dynamodb.AttributeType.STRING }, projectionType: dynamodb.ProjectionType.KEYS_ONLY, }); + + // GSI: Linear issue → newest ABCA task + PR (sparse — only Linear-origin + // tasks carry the top-level linear_issue_id). #247 UX.3 standalone + // comment trigger. INCLUDE-projects just the fields the trigger reads, so + // the index stays lean (no full-item copy on every task write). + this.table.addGlobalSecondaryIndex({ + indexName: TaskTable.LINEAR_ISSUE_INDEX, + partitionKey: { name: 'linear_issue_id', type: dynamodb.AttributeType.STRING }, + sortKey: { name: 'created_at', type: dynamodb.AttributeType.STRING }, + projectionType: dynamodb.ProjectionType.INCLUDE, + nonKeyAttributes: ['pr_url', 'pr_number', 'status', 'repo', 'user_id', 'channel_metadata'], + }); } } diff --git a/cdk/src/handlers/fanout-task-events.ts b/cdk/src/handlers/fanout-task-events.ts index 3eabd8c4..81785684 100644 --- a/cdk/src/handlers/fanout-task-events.ts +++ b/cdk/src/handlers/fanout-task-events.ts @@ -47,13 +47,15 @@ import type { } from 'aws-lambda'; import { clearTokenCache, resolveGitHubToken } from './shared/context-hydration'; import { classifyError } from './shared/error-classifier'; +import { renderFailureReply } from './shared/failure-reply'; import { renderCommentBody, upsertTaskComment } from './shared/github-comment'; -import { postIssueComment } from './shared/linear-feedback'; +import { postIssueComment, replyToComment } from './shared/linear-feedback'; import { logger } from './shared/logger'; import { coerceNumericOrNull } from './shared/numeric'; import { loadRepoConfig } from './shared/repo-config'; import type { ChannelConfig, TaskNotificationsConfig, TaskRecord } from './shared/types'; import { dispatchSlackEvent, SlackApiError } from './slack-notify'; +import { TaskStatus } from '../constructs/task-status'; // Re-export the shared types so existing test imports (and any future // caller that only imports from the handler module) continue to work. @@ -1099,6 +1101,78 @@ async function dispatchToLinear(event: FanOutEvent): Promise { ); } } + + // #247 UX.3: a STANDALONE comment-triggered iteration (carries + // trigger_comment_id but NOT orchestration_iteration — those get the + // reconciler's reply) closes the human's @bgagent conversation with a + // THREADED ✅/❌ reply beneath their comment, on top of the metrics comment + // above. Orchestration iterations are skipped here to avoid a double-reply. + await replyToStandaloneTrigger(event, task, registryTableName, workspaceId, issueId); +} + +/** + * #247 UX.3 — post the threaded ✅/❌ reply for a standalone comment-triggered + * iteration. Idempotent: claims the one reply by conditionally stamping + * ``ack_replied_at`` on the task record, so a redelivered terminal stream + * record never double-replies (mirrors the reconciler's orchestration-iteration + * ack). Best-effort — never throws into the dispatcher. + */ +async function replyToStandaloneTrigger( + event: FanOutEvent, + task: TaskRecord, + registryTableName: string, + workspaceId: string, + issueId: string, +): Promise { + const cm = task.channel_metadata; + const triggerCommentId = cm?.trigger_comment_id; + // Only standalone iterations: must have a trigger comment AND must NOT be an + // orchestration iteration (the reconciler owns that reply). + if (!triggerCommentId || cm?.orchestration_iteration === 'true') return; + + const tableName = process.env.TASK_TABLE_NAME; + if (!tableName) return; + + // Claim the single reply for this task (dedup redelivered terminal events). + try { + await ddb.send(new UpdateCommand({ + TableName: tableName, + Key: { task_id: task.task_id }, + UpdateExpression: 'SET ack_replied_at = :now', + ConditionExpression: 'attribute_not_exists(ack_replied_at)', + ExpressionAttributeValues: { ':now': event.timestamp }, + })); + } catch (err) { + if ((err as { name?: string })?.name !== 'ConditionalCheckFailedException') { + logger.warn('[fanout/linear] UX.3 ack claim failed — skipping reply', { + task_id: task.task_id, + error: err instanceof Error ? err.message : String(err), + }); + } + return; // lost the claim (replay) or errored → don't double-reply + } + + // A clean success = completed AND the build/tests passed. A completed task + // whose build is red is NOT a clean ack — it gets the build/test failure + // reply (consistent with the reconciler's success gate). + const completed = event.event_type === 'task_completed'; + const succeeded = completed && task.build_passed !== false; + const prNumber = typeof task.pr_number === 'number' + ? task.pr_number + : (typeof task.pr_url === 'string' ? Number(task.pr_url.match(/\/pull\/(\d+)\b/)?.[1]) || null : null); + const body = succeeded + ? (prNumber !== null ? `✅ Updated — PR #${prNumber}.` : '✅ Updated.') + : renderFailureReply({ + // Preserve COMPLETED (so a completed-but-build-failed task reads as a + // build/test failure, not an agent crash); a non-completed terminal + // (failed/cancelled/stranded/timed_out) is an agent-itself failure. + status: completed ? TaskStatus.COMPLETED : TaskStatus.FAILED, + buildPassed: typeof task.build_passed === 'boolean' ? task.build_passed : null, + ...(typeof task.error_message === 'string' && { errorMessage: task.error_message }), + taskId: task.task_id, + }); + + await replyToComment({ linearWorkspaceId: workspaceId, registryTableName }, issueId, triggerCommentId, body); } /** Exposed for testing: the per-channel dispatcher callable by the diff --git a/cdk/src/handlers/github-webhook-processor.ts b/cdk/src/handlers/github-webhook-processor.ts index a205a051..fe1b526f 100644 --- a/cdk/src/handlers/github-webhook-processor.ts +++ b/cdk/src/handlers/github-webhook-processor.ts @@ -17,7 +17,9 @@ * SOFTWARE. */ +import { DynamoDBClient } from '@aws-sdk/client-dynamodb'; import { PutObjectCommand, S3Client } from '@aws-sdk/client-s3'; +import { DynamoDBDocumentClient, GetCommand, UpdateCommand } from '@aws-sdk/lib-dynamodb'; import { captureScreenshot } from './shared/agentcore-browser'; import { resolveGitHubToken } from './shared/context-hydration'; import { upsertTaskComment } from './shared/github-comment'; @@ -26,11 +28,23 @@ import { validateDeploymentStatusPayload, } from './shared/github-deployment-status'; import { postIssueComment } from './shared/linear-feedback'; -import { extractLinearIdentifier, findLinearIssueByIdentifier } from './shared/linear-issue-lookup'; +import { + extractLinearIdentifier, + extractLinearIdentifierFromBranch, + findLinearIssueByIdentifier, +} from './shared/linear-issue-lookup'; import { logger } from './shared/logger'; -import { buildScreenshotKey, encodeMarkdownUrl, isAllowedScreenshotUrl } from './shared/screenshot-url'; +import { isIntegrationNode } from './shared/orchestration-integration-node'; +import { buildScreenshotKey, encodeMarkdownUrl, extractTaskIdFromBranch, isAllowedScreenshotUrl } from './shared/screenshot-url'; const s3 = new S3Client({}); +const ddb = DynamoDBDocumentClient.from(new DynamoDBClient({})); +// Optional — when set, the processor persists the screenshot's public URL onto +// the deploy task's TaskRecord (keyed by the taskId in the deploy branch) so +// the #247 orchestration reconciler can embed the integration node's combined +// preview in the parent epic panel. Unset → persistence is skipped (the PR + +// Linear comments still post). +const TASK_TABLE = process.env.TASK_TABLE_NAME; const SCREENSHOT_BUCKET = process.env.SCREENSHOT_BUCKET_NAME!; // CloudFront distribution domain — `.cloudfront.net`. Used as @@ -249,6 +263,19 @@ export async function handler(event: ProcessorEvent): Promise { const publicUrl = `https://${SCREENSHOT_PUBLIC_HOST}/${key}`; const commentBody = renderCommentBody(publicUrl, previewUrl); + // #247: persist the screenshot + preview URLs on the deploy task's record + // (keyed by the taskId in the branch) so the orchestration reconciler can + // embed the integration node's combined preview in the parent epic panel. + // Best-effort, before the comment posts so a comment-post failure doesn't + // skip it. The return tells us whether this is the synthetic integration + // node — whose screenshot belongs in the panel only, never as a standalone + // Linear comment on the parent epic (#247 UX.16). + const { isIntegrationNode: isIntegrationDeploy } = await persistScreenshotUrl( + pr.headRefName, + publicUrl, + previewUrl, + ); + try { const result = await upsertTaskComment({ repo, @@ -285,9 +312,25 @@ export async function handler(event: ProcessorEvent): Promise { // Best-effort Linear comment. The GitHub PR comment above is the // load-bearing artifact; the Linear comment is bonus surface for // reviewers who live in Linear. Only fires when the registry table - // is configured AND the PR title/body carries a Linear identifier. - if (LINEAR_WORKSPACE_REGISTRY_TABLE) { - const identifier = extractLinearIdentifier(pr.title) ?? extractLinearIdentifier(pr.body); + // is configured AND the PR carries a Linear identifier. + // + // #247 UX.16: the synthetic integration node has no Linear sub-issue of its + // own, so a Linear post here would resolve the parent-epic identifier from + // the PR title and land a "🖼️ Preview screenshot" comment ON THE PARENT — + // cluttering the maturing panel (which already embeds the combined preview + // via the persisted screenshot_url). Skip the Linear post for the integration + // node; the panel is the only Linear surface for the combined result. + if (LINEAR_WORKSPACE_REGISTRY_TABLE && !isIntegrationDeploy) { + // Branch-name first — it deterministically encodes this PR's own + // issue (`bgagent/{taskId}/abca-151-...`). Title/body are ambiguous + // fallbacks: in a stacked #247 orchestration the body often names a + // predecessor issue before the one the PR closes, and + // `extractLinearIdentifier` returns the first match in document + // order — which would misroute the screenshot to the predecessor. + const identifier = + extractLinearIdentifierFromBranch(pr.headRefName) + ?? extractLinearIdentifier(pr.title) + ?? extractLinearIdentifier(pr.body); if (identifier) { const linearIssue = await findLinearIssueByIdentifier(identifier, LINEAR_WORKSPACE_REGISTRY_TABLE); if (linearIssue) { @@ -332,6 +375,12 @@ interface OpenPr { readonly number: number; readonly title: string; readonly body: string; + /** + * Head branch ref (e.g. `bgagent/{taskId}/abca-151-...`). The + * authoritative source for the linked Linear issue — see + * `extractLinearIdentifierFromBranch`. + */ + readonly headRefName: string; } /** @@ -382,9 +431,9 @@ async function findPullRequestForShaWithRetry( * "List pull requests associated with a commit" GitHub API * (https://docs.github.com/rest/commits/commits#list-pull-requests-associated-with-a-commit). * - * Returns the first OPEN PR (with title/body), or null if none. - * Closed/merged PRs are filtered out — v1 only screenshots active - * reviews. + * Returns the OPEN PR that the deploy is *for* (head SHA == `sha`), or + * the first open PR as a fallback, or null if none. Closed/merged PRs + * are filtered out — v1 only screenshots active reviews. */ async function findPullRequestForSha( repo: string, @@ -454,17 +503,78 @@ async function findPullRequestForSha( state?: string; title?: string; body?: string | null; + head?: { ref?: string; sha?: string } | null; }>; - const open = pulls.find((p) => p.state === 'open' && typeof p.number === 'number'); - if (!open) return null; + const openPulls = pulls.filter((p) => p.state === 'open' && typeof p.number === 'number'); + if (openPulls.length === 0) return null; + // Prefer the PR whose own head is this SHA — the PR that introduced the + // commit. For a stacked #247 chain the commit-pulls API also lists every + // PR stacked on top (their history contains the commit); routing reads + // the selected PR's branch, so we must pick its true owner. Fall back to + // the first open PR for non-head SHAs (e.g. a merge/base commit). + const owner = openPulls.find((p) => p.head?.sha === sha) ?? openPulls[0]; return { - number: open.number!, - title: open.title ?? '', - body: open.body ?? '', + number: owner.number!, + title: owner.title ?? '', + body: owner.body ?? '', + headRefName: owner.head?.ref ?? '', }; } /** Render the PR comment body. */ +/** + * #247: persist the captured screenshot's public URL onto the deploy task's + * TaskRecord, so the orchestration reconciler can embed the integration node's + * combined preview in the parent epic panel. Keyed by the taskId encoded in + * the deploy branch (``bgagent/{taskId}/…``). Best-effort and never throws — + * a non-ABCA branch (no taskId), an unset table, or a vanished record (TTL) + * just skips persistence; the PR + Linear comments are the load-bearing + * artifacts. Conditional on ``attribute_exists`` so we never resurrect a + * TTL-reaped row. + */ +async function persistScreenshotUrl( + branchName: string, + publicUrl: string, + previewUrl: string, +): Promise<{ isIntegrationNode: boolean }> { + const result = { isIntegrationNode: false }; + if (!TASK_TABLE) return result; + const taskId = extractTaskIdFromBranch(branchName); + if (!taskId) return result; + try { + // Persist BOTH the captured image URL and the live preview-deploy URL so + // the reconciler can render a clickable combined-preview deep-link in the + // panel (#247 UX.17). Return-on-values so we learn whether this deploy task + // is a synthetic integration node WITHOUT a second Get (#247 UX.16): the + // integration node's screenshot belongs in the PANEL only — it must NOT + // also post a standalone Linear comment on the parent epic. + const upd = await ddb.send(new UpdateCommand({ + TableName: TASK_TABLE, + Key: { task_id: taskId }, + UpdateExpression: 'SET screenshot_url = :u, screenshot_preview_url = :p', + ConditionExpression: 'attribute_exists(task_id)', + ExpressionAttributeValues: { ':u': publicUrl, ':p': previewUrl }, + ReturnValues: 'ALL_NEW', + })); + const subIssueId = upd.Attributes?.channel_metadata?.orchestration_sub_issue_id; + result.isIntegrationNode = typeof subIssueId === 'string' && isIntegrationNode(subIssueId); + logger.info('Persisted screenshot_url on task record', { + task_id: taskId, + public_url: publicUrl, + is_integration_node: result.isIntegrationNode, + }); + } catch (err) { + // ConditionalCheckFailed = the task row is gone (TTL); anything else is a + // transient DDB error. Either way the comments still posted — log + move on. + logger.warn('Failed to persist screenshot_url (non-fatal)', { + event: 'screenshot.persist_failed', + task_id: taskId, + error: err instanceof Error ? err.message : String(err), + }); + } + return result; +} + function renderCommentBody(publicUrl: string, previewUrl: string): string { // previewUrl is payload-derived; percent-encode its parens so a crafted // path can't break out of the markdown link and inject content into a diff --git a/cdk/src/handlers/linear-webhook-processor.ts b/cdk/src/handlers/linear-webhook-processor.ts index 15d2b4b3..13267e95 100644 --- a/cdk/src/handlers/linear-webhook-processor.ts +++ b/cdk/src/handlers/linear-webhook-processor.ts @@ -21,9 +21,21 @@ import * as crypto from 'crypto'; import { DynamoDBClient } from '@aws-sdk/client-dynamodb'; import { DynamoDBDocumentClient, GetCommand } from '@aws-sdk/lib-dynamodb'; import { createTaskCore } from './shared/create-task-core'; -import { reportIssueFailure } from './shared/linear-feedback'; +import { reactToComment, replyToComment, reportIssueFailure, EMOJI_STARTED } from './shared/linear-feedback'; +import { + probeLinearIssueContext, + renderIssueContextHint, +} from './shared/linear-issue-context-probe'; import { resolveLinearOauthToken } from './shared/linear-oauth-resolver'; +import { fetchIssueParentId } from './shared/linear-subissue-fetch'; +import { resolveTaskByLinearIssue, prNumberFromTask } from './shared/linear-task-by-issue'; import { logger } from './shared/logger'; +import { buildIterationInstruction, parseCommentTrigger, type CommentTrigger } from './shared/orchestration-comment-trigger'; +import { discoverOrchestration } from './shared/orchestration-discovery'; +import { parseParentNodeReference, renderParentDisambiguationReply, suggestClosestNode } from './shared/orchestration-parent-comment'; +import { readConcurrencyBudget, releaseReadyChildren } from './shared/orchestration-release'; +import { upsertEpicPanel } from './shared/orchestration-rollup'; +import { claimCommentAck, deriveOrchestrationId, loadOrchestration, setStatusCommentId, type OrchestrationReleaseContext } from './shared/orchestration-store'; import type { Attachment } from './shared/types'; const ddb = DynamoDBDocumentClient.from(new DynamoDBClient({})); @@ -31,7 +43,24 @@ const ddb = DynamoDBDocumentClient.from(new DynamoDBClient({})); const PROJECT_MAPPING_TABLE = process.env.LINEAR_PROJECT_MAPPING_TABLE_NAME!; const USER_MAPPING_TABLE = process.env.LINEAR_USER_MAPPING_TABLE_NAME!; const WORKSPACE_REGISTRY_TABLE = process.env.LINEAR_WORKSPACE_REGISTRY_TABLE_NAME; +// #247 Mode A: name of OrchestrationTable. Unset until PR A3 wires the +// orchestration stack — while unset, the parent/sub-issue path is fully +// dormant and the handler behaves exactly as one-issue → one-task. +const ORCHESTRATION_TABLE = process.env.ORCHESTRATION_TABLE_NAME; const DEFAULT_LABEL_FILTER = 'bgagent'; +// #331: throttle the seed-time root release to the user's free concurrency +// budget. Unset → release all roots (back-compat; admission still gates). +const USER_CONCURRENCY_TABLE = process.env.USER_CONCURRENCY_TABLE_NAME; +const MAX_CONCURRENT = Number(process.env.MAX_CONCURRENT_TASKS_PER_USER ?? '10'); +// createTaskCore rejects idempotency keys longer than this; synthesized keys +// are sliced to fit the validated /^[A-Za-z0-9_-]{1,128}$/ pattern. +const MAX_IDEMPOTENCY_KEY_LENGTH = 128; +/** + * TTL (seconds) for the per-comment ack-claim marker (#247 UX.20). Only needs + * to outlive Linear's webhook redelivery window (minutes), but we keep a day of + * slack so a delayed redelivery still dedups; the row self-expires after. + */ +const ACK_CLAIM_TTL_SECONDS = 86_400; /** * Post a Linear comment + ❌ reaction without ever propagating an error. @@ -112,6 +141,32 @@ interface LinearIssueEvent { readonly webhookId?: string; } +/** Shape of a Linear `Comment` webhook (#247 A6 trigger). */ +interface LinearCommentEvent { + readonly action: 'create' | 'update' | 'remove' | string; + readonly type: 'Comment'; + readonly data: { + readonly id: string; + readonly body?: string; + /** The issue the comment is on (the sub-issue, for A6). */ + readonly issueId?: string; + readonly issue?: { readonly id?: string }; + readonly userId?: string; + /** + * Set when this comment is a REPLY within a thread — the id of the thread + * ROOT (top-level) comment. Linear threads are one level deep, and + * commentCreate rejects a reply whose parentId is itself a reply ("Parent + * comment must be a top level comment"). So the ✅/❌ ack must reply to the + * ROOT, not to this comment when it's a reply (#247 — live-caught: a + * thread-reply @bgagent trigger had its ack silently dropped). + */ + readonly parentId?: string; + readonly [key: string]: unknown; + }; + readonly actor?: { readonly id?: string; readonly name?: string }; + readonly organizationId?: string; +} + interface ProcessorEvent { readonly raw_body: string; } @@ -133,9 +188,9 @@ export async function handler(event: ProcessorEvent): Promise { return; } - let payload: LinearIssueEvent; + let payload: LinearIssueEvent | LinearCommentEvent; try { - payload = JSON.parse(event.raw_body) as LinearIssueEvent; + payload = JSON.parse(event.raw_body) as LinearIssueEvent | LinearCommentEvent; } catch (err) { logger.error('Linear webhook processor could not parse raw_body', { error: err instanceof Error ? err.message : String(err), @@ -143,12 +198,20 @@ export async function handler(event: ProcessorEvent): Promise { return; } - if (payload.type !== 'Issue') { - logger.info('Linear processor skipping non-Issue payload', { type: payload.type }); + // #247 A6: a Comment with an @bgagent mention on an orchestrated sub-issue + // re-iterates that sub-issue's PR (the reconciler then cascades the + // re-stack). Handled on a separate path from Issue → task creation. + if (payload.type === 'Comment') { + await handleCommentTrigger(payload as LinearCommentEvent); return; } - const issue = payload.data; + if ((payload as { type?: string }).type !== 'Issue') { + logger.info('Linear processor skipping unrecognized payload', { type: (payload as { type?: string }).type }); + return; + } + + const issue = (payload as LinearIssueEvent).data; const projectId = issue.projectId; // Resolve the per-project label override (if any) BEFORE the label gate so @@ -252,8 +315,6 @@ export async function handler(event: ProcessorEvent): Promise { return; } - const taskDescription = buildTaskDescription(issue); - const channelMetadata: Record = { linear_issue_id: issue.id, linear_workspace_id: workspaceId, @@ -278,6 +339,13 @@ export async function handler(event: ProcessorEvent): Promise { // skip, the user mapping lookup would fail, and we'd burn agent // quota for no observable result. Drop the event explicitly here // rather than rely on downstream lookups to incidentally block it. + // + // #247: also capture the access token — the orchestration path below + // needs it to fetch the sub-issue graph. Past this block ``resolved`` + // is guaranteed present (we return otherwise), so the token is set + // whenever the registry table is configured. + let resolvedAccessToken: string | undefined; + let contextHint = ''; if (WORKSPACE_REGISTRY_TABLE) { const resolved = await resolveLinearOauthToken(workspaceId, WORKSPACE_REGISTRY_TABLE); if (!resolved) { @@ -289,8 +357,237 @@ export async function handler(event: ProcessorEvent): Promise { } channelMetadata.linear_oauth_secret_arn = resolved.oauthSecretArn; channelMetadata.linear_workspace_slug = resolved.workspaceSlug; + resolvedAccessToken = resolved.accessToken; + // Best-effort presence probe: ask Linear once whether the issue has + // paperclip attachments or sits in a project with documents. The agent + // will fetch the actual content via the Linear MCP at runtime — this + // step only flags that there's something worth fetching. + const probe = await probeLinearIssueContext(resolved.accessToken, issue.id); + contextHint = renderIssueContextHint(probe); + } + + // #247 Mode A — parent/sub-issue orchestration. Env-var gated: until + // the orchestration stack (PR A3) sets ORCHESTRATION_TABLE_NAME this + // whole branch is dormant and the handler behaves exactly as before + // (one issue → one task). When enabled AND we have a workspace token, + // probe the labeled issue for a sub-issue dependency graph: + // - has sub-issues → seed the DAG and hand off to the reconciler + // (A3) which creates children in dependency order. The parent + // issue itself does NOT spawn a task here (no special label + // needed: a human-authored graph is implicit consent to execute). + // - no sub-issues → fall through to the single-task path below. + // - invalid graph (cycle/dangling) → terminal ❌ comment, no task. + // - transient Linear error → terminal comment; do NOT silently + // degrade to a single task (that would drop the epic structure). + if (ORCHESTRATION_TABLE && resolvedAccessToken) { + const releaseContext: OrchestrationReleaseContext = { + platform_user_id: platformUserId, + // This orchestration was seeded by the Linear trigger; stamp the + // channel on the meta row so downstream release + rollup follow it + // (#247 trigger-agnostic seam). Defaults to 'linear' if ever omitted. + channel_source: 'linear', + ...(channelMetadata.linear_oauth_secret_arn && { + linear_oauth_secret_arn: channelMetadata.linear_oauth_secret_arn, + }), + ...(channelMetadata.linear_workspace_slug && { + linear_workspace_slug: channelMetadata.linear_workspace_slug, + }), + linear_project_id: projectId, + }; + + const discovery = await discoverOrchestration({ + ddb, + tableName: ORCHESTRATION_TABLE, + accessToken: resolvedAccessToken, + parentLinearIssueId: issue.id, + linearWorkspaceId: workspaceId, + repo, + now: new Date().toISOString(), + releaseContext, + }); + + if (discovery.kind === 'rejected') { + logger.info('Linear orchestration graph rejected — not creating tasks', { + issue_id: issue.id, + reason: discovery.reason, + }); + await safeReportIssueFailure(issue.id, workspaceId, `❌ ${discovery.message}`); + return; + } + if (discovery.kind === 'error') { + await safeReportIssueFailure( + issue.id, + workspaceId, + `❌ ABCA couldn't read this issue's sub-issues: ${discovery.message}`, + ); + return; + } + if (discovery.kind === 'seeded') { + // Release the ROOT children (layer 0) now — the reconciler only + // fires on a child's terminal event, so nothing would start the + // graph otherwise. Downstream children are released by the + // reconciler as predecessors succeed. On idempotent replay + // (alreadyExisted) the roots were released on the first pass and + // releaseChild's idempotency key makes a re-release a no-op, so we + // still load + release defensively (cheap, and recovers a crash + // between seed and root-release on the first pass). + const snapshot = await loadOrchestration(ddb, ORCHESTRATION_TABLE, discovery.orchestrationId); + let releasedRoots = 0; + if (snapshot) { + // #331: throttle the root release to the user's free concurrency + // budget. A wide-root epic (many independent sub-issues, no shared + // foundation) would otherwise release >cap roots at once; the + // overflow gets hard-failed by admission — and a failed ROOT is + // UNRECOVERABLE (the sweep re-releases a child from its succeeded + // predecessor; a root has none). Leftover roots stay ``ready`` and + // the #303 sweep releases them as slots free. Unset table → release + // all (back-compat; admission still gates). + const budget = USER_CONCURRENCY_TABLE + ? await readConcurrencyBudget( + ddb, USER_CONCURRENCY_TABLE, snapshot.meta.release_context.platform_user_id, MAX_CONCURRENT) + : undefined; + const results = await releaseReadyChildren( + ddb, + ORCHESTRATION_TABLE, + snapshot.children, + snapshot.meta.release_context, + createTaskCore, + new Date().toISOString(), + // full child set for A4 base selection (roots have no preds → off-main) + snapshot.children, + 'main', + budget, + ); + releasedRoots = results.filter((r) => r.kind === 'released').length; + } + logger.info('Linear orchestration seeded — root children released', { + issue_id: issue.id, + orchestration_id: discovery.orchestrationId, + child_count: discovery.childCount, + root_count: discovery.rootSubIssueIds.length, + released_roots: releasedRoots, + already_existed: discovery.alreadyExisted, + }); + // #247 UX.2: post the initial epic panel + mirror the parent start + // signal (👀 reaction + In Progress) in one upsertEpicPanel call. The + // reconciler edits this same panel on every later event and advances the + // parent to In Review on completion. Only on the first seed — a replay + // (alreadyExisted) routes to the 'extended' branch instead. Best-effort; + // gated on the registry table like every other feedback. + if (WORKSPACE_REGISTRY_TABLE && !discovery.alreadyExisted) { + const parentCtx = { linearWorkspaceId: workspaceId, registryTableName: WORKSPACE_REGISTRY_TABLE }; + // #247 UX.2: post the initial maturing panel (in-progress) and mirror + // the parent start signal (👀 + In Progress) in one call. Re-load + // post-release so roots show 'running'. Stamp the comment id so the + // reconciler edits this same panel on every later event. Best-effort. + try { + const postReleaseSnapshot = await loadOrchestration(ddb, ORCHESTRATION_TABLE, discovery.orchestrationId); + if (postReleaseSnapshot) { + const commentId = await upsertEpicPanel({ + ctx: parentCtx, + parentLinearIssueId: issue.id, + children: postReleaseSnapshot.children, + inProgress: true, + mirrorParentState: true, + }); + if (commentId) { + await setStatusCommentId(ddb, ORCHESTRATION_TABLE, discovery.orchestrationId, commentId); + } + } + } catch (err) { + logger.warn('Failed to post orchestration panel at seed (non-fatal)', { + issue_id: issue.id, + orchestration_id: discovery.orchestrationId, + error: err instanceof Error ? err.message : String(err), + }); + } + } + // The parent issue itself spawns no task; the reconciler (off the + // TaskTable stream) releases downstream children as roots succeed. + return; + } + if (discovery.kind === 'extended') { + // Orchestration-extend: sub-issues were added to an already-seeded epic. + // Release the newly-added nodes whose predecessors are ALREADY done (the + // store marked them 'ready'); the rest are 'blocked' and the reconciler + // releases them as predecessors finish. A re-trigger with no new nodes + // returns empty → nothing to do. + if (discovery.addedSubIssueIds.length === 0) { + logger.info('Linear orchestration re-trigger — no new sub-issues to add', { + issue_id: issue.id, orchestration_id: discovery.orchestrationId, + }); + return; + } + const snapshot = await loadOrchestration(ddb, ORCHESTRATION_TABLE, discovery.orchestrationId); + let releasedAdded = 0; + if (snapshot) { + // Release only the newly-added 'ready' nodes. Pass the FULL child set + // as allChildren so A4 base-branch selection sees finished + // predecessors' branches (a new node stacks on its done predecessor). + const releasableRows = snapshot.children.filter( + (c) => discovery.releasableSubIssueIds.includes(c.sub_issue_id) && c.child_status === 'ready', + ); + if (releasableRows.length > 0) { + const budget = USER_CONCURRENCY_TABLE + ? await readConcurrencyBudget( + ddb, USER_CONCURRENCY_TABLE, snapshot.meta.release_context.platform_user_id, MAX_CONCURRENT) + : undefined; + const results = await releaseReadyChildren( + ddb, + ORCHESTRATION_TABLE, + releasableRows, + snapshot.meta.release_context, + createTaskCore, + new Date().toISOString(), + snapshot.children, // full set → A4 base branch off finished predecessors + 'main', + budget, + ); + releasedAdded = results.filter((r) => r.kind === 'released').length; + } + } + logger.info('Linear orchestration extended — added sub-issues', { + issue_id: issue.id, + orchestration_id: discovery.orchestrationId, + added: discovery.addedSubIssueIds.length, + released_now: releasedAdded, + }); + // #247 UX.2: no standalone '➕ Added' comment — the new row appearing in + // the maturing panel IS the signal (the user just added the sub-issue in + // Linear, so they don't need a ping). Refresh the panel so it shows the + // new row(s) + reverts the header to in-progress. Re-load post-release so + // a just-released added node shows 'running'. Best-effort. + if (WORKSPACE_REGISTRY_TABLE && snapshot) { + try { + const fresh = await loadOrchestration(ddb, ORCHESTRATION_TABLE, discovery.orchestrationId); + const children = fresh?.children ?? snapshot.children; + const meta = (fresh ?? snapshot).meta; + const newId = await upsertEpicPanel({ + ctx: { linearWorkspaceId: workspaceId, registryTableName: WORKSPACE_REGISTRY_TABLE }, + parentLinearIssueId: issue.id, + ...(meta.status_comment_id !== undefined && { statusCommentId: meta.status_comment_id }), + children, + inProgress: true, // the extend re-opened the epic + }); + if (newId && meta.status_comment_id === undefined) { + await setStatusCommentId(ddb, ORCHESTRATION_TABLE, discovery.orchestrationId, newId); + } + } catch (err) { + logger.warn('Failed to refresh panel on extend (non-fatal)', { + issue_id: issue.id, + orchestration_id: discovery.orchestrationId, + error: err instanceof Error ? err.message : String(err), + }); + } + } + return; + } + // discovery.kind === 'single_task' → fall through to the single-task + // path below (issue had no sub-issues). } + const taskDescription = buildTaskDescription(issue, contextHint); + // Extract embedded image URLs from the issue description markdown. // These become URL attachments that are fetched and screened during context hydration. const attachments = extractImageUrlAttachments(issue.description); @@ -332,6 +629,415 @@ export async function handler(event: ProcessorEvent): Promise { }); } +/** + * #247 A6 comment trigger. A Linear comment with an ``@bgagent`` mention on an + * orchestrated sub-issue runs a ``coding/pr-iteration-v1`` task on that + * sub-issue's PR; the comment text is the instruction. When that task + * completes, the reconciler cascades the re-stack to dependents (A6.2). + * + * Resolution: comment.issueId (the sub-issue) → its parent (Linear fetch) → + * deriveOrchestrationId(parent) → loadOrchestration → the child row for the + * sub-issue → its PR number (from the child's task record). All best-effort; + * a non-orchestration comment, a missing mention, or an un-started sub-issue is + * a clean no-op (no failure comment — comments are conversational). + */ +async function handleCommentTrigger(payload: LinearCommentEvent): Promise { + // Orchestration must be enabled + a workspace token resolvable. + if (!ORCHESTRATION_TABLE || !WORKSPACE_REGISTRY_TABLE) { + return; + } + const body = payload.data?.body; + const trigger = parseCommentTrigger(body); + if (!trigger.triggered) { + // Ordinary human discussion or the agent's own progress comment — ignore. + return; + } + const subIssueId = payload.data?.issueId ?? payload.data?.issue?.id; + const workspaceId = payload.organizationId ?? ''; + if (!subIssueId || !workspaceId) { + logger.info('A6 comment: missing issueId/workspace — ignoring', { has_issue: Boolean(subIssueId) }); + return; + } + + const resolved = await resolveLinearOauthToken(workspaceId, WORKSPACE_REGISTRY_TABLE); + if (!resolved) { + logger.info('A6 comment: workspace not resolvable — ignoring', { linear_workspace_id: workspaceId }); + return; + } + + const commentedIssueId = subIssueId; + const commentId = payload.data.id; + // The ✅/❌ ack must reply to the thread ROOT — Linear rejects a reply whose + // parentId is itself a reply. When the trigger is a thread-reply, data.parentId + // is the root; otherwise the comment IS the root. The 👀 still goes on the + // actual comment the human wrote (reactions work at any thread depth). + const replyTargetId = payload.data.parentId ?? commentId; + + // #247 UX.18: is the commented issue itself a PARENT epic? deriveOrchestrationId + // is a pure hash of the issue id, so the parent's own id maps to ITS + // orchestration; a sub-issue's id hashes to nothing. The maturing panel lives + // on the parent, so reviewers comment THERE ("@bgagent for the footer, …") — + // route that to the sub-issue it names. (Was a silent drop: the parent has no + // PR, so it fell to the standalone GSI path → miss → ignored.) + const ownOrchestrationId = deriveOrchestrationId(commentedIssueId); + const parentSnapshot = await loadOrchestration(ddb, ORCHESTRATION_TABLE, ownOrchestrationId); + if (parentSnapshot && parentSnapshot.meta.parent_linear_issue_id === commentedIssueId) { + await handleParentEpicCommentTrigger({ + orchestrationId: ownOrchestrationId, + snapshot: parentSnapshot, + workspaceId, + commentId, + replyTargetId, + trigger, + resolved, + registryTableName: WORKSPACE_REGISTRY_TABLE, + }); + return; + } + + // Sub-issue → parent → orchestration. When ANY of these don't hold (no + // parent, parent isn't an orchestration, or this isn't a STARTED child), + // the issue may still be a plain (non-orchestration) issue that ABCA opened + // a PR for — fall through to the standalone path (#247 UX.3), which iterates + // on that PR with the same 👀/reply ack but no dependency cascade. + const parentId = await fetchIssueParentId(resolved.accessToken, commentedIssueId); + const orchestrationId = parentId ? deriveOrchestrationId(parentId) : null; + const snapshot = orchestrationId + ? await loadOrchestration(ddb, ORCHESTRATION_TABLE, orchestrationId) + : null; + const child = snapshot?.children.find((c) => c.sub_issue_id === commentedIssueId); + if (!snapshot || !child || !child.child_task_id) { + await handleStandaloneCommentTrigger({ + subIssueId: commentedIssueId, + workspaceId, + commentId, + replyTargetId, + trigger, + resolved, + registryTableName: WORKSPACE_REGISTRY_TABLE, + }); + return; + } + + await iterateOrchestrationChild({ + orchestrationId: orchestrationId!, + snapshot, + child, + workspaceId, + commentId, + replyTargetId, + trigger, + resolved, + registryTableName: WORKSPACE_REGISTRY_TABLE, + }); +} + +/** + * #247 UX.18 — an ``@bgagent`` comment left on the PARENT epic. The maturing + * panel lives on the parent, so a reviewer's natural move is to comment there. + * The parent has no PR of its own, so we route the request to the sub-issue it + * names (by identifier or title keyword) and iterate THAT sub-issue's PR. When + * the comment names no single sub-issue, we 👀 + post a "which one?" reply + * (with a best-effort suggestion + the create-a-sub-issue path) — NEVER a + * silent drop, and NEVER auto-creating new work (user's call). + */ +async function handleParentEpicCommentTrigger(args: { + orchestrationId: string; + snapshot: NonNullable>>; + workspaceId: string; + commentId: string; + replyTargetId: string; + trigger: CommentTrigger; + resolved: { accessToken: string; oauthSecretArn: string; workspaceSlug: string }; + registryTableName: string; +}): Promise { + const { orchestrationId, snapshot, workspaceId, commentId, replyTargetId, trigger, resolved, registryTableName } = args; + const feedbackCtx = { linearWorkspaceId: workspaceId, registryTableName }; + + // #247 UX.20: claim-once BEFORE any side-effect. Linear redelivers a comment + // webhook when the handler exceeds its ~5s ack window (this path does several + // Linear API calls and can run >5s), and EACH redelivery would otherwise + // re-react + re-post the disambiguation reply — live-caught spamming 50+ + // duplicate replies. The conditional claim (keyed on this comment id) lets + // only the FIRST delivery proceed; redeliveries no-op here. The marker + // self-expires via the table TTL. (The iterate path also has its own + // createTaskCore idempotency key — this is the outer guard that also covers + // the 👀 + the ask-reply, which have no other dedup.) + const ttlEpochSeconds = Math.floor(Date.now() / 1000) + ACK_CLAIM_TTL_SECONDS; + const won = await claimCommentAck( + ddb, ORCHESTRATION_TABLE!, orchestrationId, commentId, new Date().toISOString(), ttlEpochSeconds, + ); + if (!won) { + logger.info('A6 comment (parent epic): redelivery — already handled this comment, skipping', { + orchestration_id: orchestrationId, comment_id: commentId, + }); + return; + } + + // ACK immediately — a parent comment is never silently dropped again. + await reactToComment(feedbackCtx, commentId, EMOJI_STARTED); + + // Only STARTED children with a task are iterable candidates; match against all + // real nodes for the disambiguation list, but iterate only a started one. + const match = parseParentNodeReference(trigger.instruction, snapshot.children); + const target = match.reason === null ? match.matches[0] : null; + + if (!target || !target.child_task_id) { + // No confident single match (or matched a not-yet-started node) → ask. + const reason = match.reason === 'ambiguous' ? 'ambiguous' : 'none'; + const suggestion = reason === 'none' ? suggestClosestNode(trigger.instruction, snapshot.children) : null; + const body = renderParentDisambiguationReply(reason, snapshot.children, suggestion); + await replyToComment(feedbackCtx, snapshot.meta.parent_linear_issue_id, replyTargetId, body); + logger.info('A6 comment (parent epic): no single iterable sub-issue matched — asked', { + orchestration_id: orchestrationId, reason, match_count: match.matches.length, + }); + return; + } + + const prNumber = await resolveChildPrNumber(target.child_task_id); + if (prNumber === null) { + const body = renderParentDisambiguationReply('none', snapshot.children, target); + await replyToComment(feedbackCtx, snapshot.meta.parent_linear_issue_id, replyTargetId, body); + logger.info('A6 comment (parent epic): matched sub-issue has no PR yet — asked', { + orchestration_id: orchestrationId, sub_issue_id: target.sub_issue_id, + }); + return; + } + + // Resolve the FULL child row (the matcher returns a trimmed view without + // ``repo``) so the iteration carries the sub-issue's repo. + const childRow = snapshot.children.find((c) => c.sub_issue_id === target.sub_issue_id)!; + + // Route to the matched sub-issue exactly as if the human had commented there. + // The 👀 is already on the parent comment; the ✅/❌ reply threads back to it. + await iterateOrchestrationChild({ + orchestrationId, + snapshot, + child: childRow, + workspaceId, + commentId, + replyTargetId, + trigger, + resolved, + registryTableName, + // #247 UX.19: the trigger comment lives on the PARENT epic, not the + // sub-issue — the reconciler must reply with the parent issue id. + triggerCommentIssueId: snapshot.meta.parent_linear_issue_id, + // Already acked on the parent comment above. + skipAck: true, + prNumber, + }); + logger.info('A6 comment (parent epic): routed to sub-issue', { + orchestration_id: orchestrationId, sub_issue_id: target.sub_issue_id, pr_number: prNumber, + }); +} + +/** + * Spawn a ``coding/pr-iteration-v1`` task for one orchestration sub-issue from + * an ``@bgagent`` comment (#247 A6 + UX.18). Shared by the direct sub-issue + * path (comment on the sub-issue) and the parent-epic path (comment on the + * epic, routed here). Acks the trigger comment with 👀 (unless already acked), + * marks the task as a cascade SOURCE so the reconciler re-stacks dependents, + * and threads ✅/❌ back to ``replyTargetId`` on completion. + */ +async function iterateOrchestrationChild(args: { + orchestrationId: string; + snapshot: NonNullable>>; + child: { sub_issue_id: string; repo: string; child_task_id?: string }; + workspaceId: string; + commentId: string; + replyTargetId: string; + /** + * The Linear ISSUE the trigger comment lives on — the sub-issue for a direct + * comment, the PARENT epic for a UX.18 parent-routed comment. The reconciler + * replies ✅/❌ using THIS as commentCreate's issueId (#247 UX.19). Defaults to + * the sub-issue id. + */ + triggerCommentIssueId?: string; + trigger: CommentTrigger; + resolved: { oauthSecretArn: string; workspaceSlug: string }; + registryTableName: string; + skipAck?: boolean; + prNumber?: number; +}): Promise { + const { + orchestrationId, snapshot, child, workspaceId, commentId, replyTargetId, + trigger, resolved, registryTableName, + } = args; + const subIssueId = child.sub_issue_id; + const triggerCommentIssueId = args.triggerCommentIssueId ?? subIssueId; + + const prNumber = args.prNumber ?? (child.child_task_id ? await resolveChildPrNumber(child.child_task_id) : null); + if (prNumber === null || prNumber === undefined) { + logger.warn('A6 comment: sub-issue has no resolvable PR — cannot iterate', { + orchestration_id: orchestrationId, sub_issue_id: subIssueId, child_task_id: child.child_task_id, + }); + return; + } + + // Attribute to the orchestration's release user (the comment author may not + // be a linked platform user; the orchestration already ran under this id). + const platformUserId = snapshot.meta.release_context.platform_user_id; + + // #247 UX.3: ACK the request the instant we commit to acting on it. 👀 on the + // TRIGGERING comment is the zero-clutter "on it" signal. The parent-epic path + // already acked, so it passes skipAck. + if (!args.skipAck) { + await reactToComment({ linearWorkspaceId: workspaceId, registryTableName }, commentId, EMOJI_STARTED); + } + + // Idempotency: one iteration per (sub-issue, comment). The comment id is + // unique per comment, so a webhook retry of the same comment dedups. + const idempotencyKey = `iterate_${subIssueId}_${commentId}`.replace(/[^A-Za-z0-9_-]/g, '').slice(0, MAX_IDEMPOTENCY_KEY_LENGTH); + + const channelMetadata: Record = { + orchestration_id: orchestrationId, + orchestration_sub_issue_id: subIssueId, + // Mark this as a cascade SOURCE so the reconciler re-stacks dependents + // when the iteration completes (A6.2 reads this flag). + orchestration_iteration: 'true', + // #247 UX.3: the reconciler replies ✅/❌ to the thread ROOT when the + // iteration lands (threaded ack — closes the conversation the human opened). + trigger_comment_id: replyTargetId, + // #247 UX.19: the issue that comment lives on, so the reconciler's reply + // uses the right commentCreate issueId (parent epic for a routed comment; + // the sub-issue for a direct comment). + trigger_comment_issue_id: triggerCommentIssueId, + linear_workspace_id: workspaceId, + linear_oauth_secret_arn: resolved.oauthSecretArn, + linear_workspace_slug: resolved.workspaceSlug, + // The agent addresses the real sub-issue (reactions/comments). + linear_issue_id: subIssueId, + }; + + try { + const result = await createTaskCore( + { + repo: child.repo, + workflow_ref: 'coding/pr-iteration-v1', + pr_number: prNumber, + task_description: buildIterationInstruction(trigger), + }, + { userId: platformUserId, channelSource: 'linear', channelMetadata, idempotencyKey }, + idempotencyKey, + ); + logger.info('A6 comment: iteration task created for sub-issue PR', { + orchestration_id: orchestrationId, sub_issue_id: subIssueId, pr_number: prNumber, status_code: result.statusCode, + }); + } catch (err) { + logger.error('A6 comment: createTaskCore threw for iteration', { + orchestration_id: orchestrationId, + sub_issue_id: subIssueId, + error: err instanceof Error ? err.message : String(err), + }); + } +} + +/** + * #247 UX.3 — the GENERALIZED comment trigger. An ``@bgagent`` comment on a + * PLAIN Linear issue (no orchestration epic) that ABCA already opened a PR for + * runs a ``coding/pr-iteration-v1`` task on that PR, with the same 👀-on-receipt + * / threaded-reply-on-completion ack as the orchestration path — but NO + * dependency cascade (there are no dependents). The issue → newest-task → PR + * link comes from the ``LinearIssueIndex`` GSI (orchestration sub-issues use + * the orchestration table instead; this is the everything-else case). + * + * The completion reply is posted by the fanout dispatcher (``dispatchToLinear``) + * — a standalone iteration carries ``trigger_comment_id`` but NO + * ``orchestration_iteration`` marker, so the reconciler ignores it and fanout + * owns the ✅/❌ reply. A clean no-op when the issue was never run by ABCA + * (GSI miss) or its task opened no PR. + */ +async function handleStandaloneCommentTrigger(args: { + subIssueId: string; + workspaceId: string; + commentId: string; + /** Thread ROOT to reply to (= parentId when the trigger is a reply, else commentId). */ + replyTargetId: string; + trigger: CommentTrigger; + resolved: { accessToken: string; oauthSecretArn: string; workspaceSlug: string }; + registryTableName: string; +}): Promise { + const { subIssueId: issueId, workspaceId, commentId, replyTargetId, trigger, resolved, registryTableName } = args; + + const task = await resolveTaskByLinearIssue(ddb, process.env.TASK_TABLE_NAME!, issueId); + if (!task) { + logger.info('A6 comment (standalone): issue has no ABCA task — ignoring', { linear_issue_id: issueId }); + return; + } + const prNumber = prNumberFromTask(task); + if (prNumber === null || !task.repo) { + logger.info('A6 comment (standalone): ABCA task has no resolvable PR/repo — cannot iterate', { + linear_issue_id: issueId, task_id: task.task_id, has_repo: Boolean(task.repo), + }); + return; + } + if (!task.user_id) { + logger.warn('A6 comment (standalone): task missing user_id — cannot attribute iteration', { + linear_issue_id: issueId, task_id: task.task_id, + }); + return; + } + + // ACK the instant we commit (same as the orchestration path). + const feedbackCtx = { linearWorkspaceId: workspaceId, registryTableName }; + await reactToComment(feedbackCtx, commentId, EMOJI_STARTED); + + const idempotencyKey = `iterate_${issueId}_${commentId}`.replace(/[^A-Za-z0-9_-]/g, '').slice(0, MAX_IDEMPOTENCY_KEY_LENGTH); + const channelMetadata: Record = { + // NO orchestration_id / orchestration_iteration — the reconciler skips + // this; the fanout dispatcher posts the ✅/❌ reply on terminal. Reply to + // the thread ROOT (replyTargetId), never to a reply. + trigger_comment_id: replyTargetId, + linear_issue_id: issueId, + linear_workspace_id: workspaceId, + linear_oauth_secret_arn: resolved.oauthSecretArn, + linear_workspace_slug: resolved.workspaceSlug, + }; + + try { + const result = await createTaskCore( + { + repo: task.repo, + workflow_ref: 'coding/pr-iteration-v1', + pr_number: prNumber, + task_description: buildIterationInstruction(trigger), + }, + { userId: task.user_id, channelSource: 'linear', channelMetadata, idempotencyKey }, + idempotencyKey, + ); + logger.info('A6 comment (standalone): iteration task created for issue PR', { + linear_issue_id: issueId, pr_number: prNumber, status_code: result.statusCode, + }); + } catch (err) { + logger.error('A6 comment (standalone): createTaskCore threw for iteration', { + linear_issue_id: issueId, + error: err instanceof Error ? err.message : String(err), + }); + } +} + +/** Read a child task's PR number (numeric pr_number, else parse pr_url). Null if neither. */ +async function resolveChildPrNumber(taskId: string): Promise { + try { + const res = await ddb.send(new GetCommand({ TableName: process.env.TASK_TABLE_NAME!, Key: { task_id: taskId } })); + const pr = res.Item?.pr_number; + if (typeof pr === 'number') return pr; + const url = res.Item?.pr_url; + if (typeof url === 'string') { + const m = url.match(/\/pull\/(\d+)\b/); + if (m) return Number(m[1]); + } + return null; + } catch (err) { + logger.warn('A6 comment: failed to read sub-issue task record for PR number', { + task_id: taskId, + error: err instanceof Error ? err.message : String(err), + }); + return null; + } +} + /** * Decide whether a Linear Issue event should trigger a task. * @@ -403,13 +1109,17 @@ function buildCreateTaskFailureMessage(statusCode: number, rawBody: string): str return `❌ ABCA couldn't create this task (status ${statusCode}). Check the ABCA admin logs for details.`; } -function buildTaskDescription(issue: LinearIssueEvent['data']): string { +function buildTaskDescription(issue: LinearIssueEvent['data'], contextHint: string = ''): string { const parts: string[] = []; if (issue.identifier && issue.title) { parts.push(`${issue.identifier}: ${issue.title}`); } else if (issue.title) { parts.push(issue.title); } + if (contextHint) { + parts.push(''); + parts.push(contextHint); + } if (issue.description && issue.description.trim()) { parts.push(''); parts.push(issue.description.trim()); @@ -423,29 +1133,58 @@ function buildTaskDescription(issue: LinearIssueEvent['data']): string { * Scans for standard markdown image references: `![alt](url)`. * Only HTTPS URLs are included (security: no HTTP, no data: URIs). * Capped at 10 images per issue to stay within attachment limits. + * + * Linear-hosted upload URLs (`uploads.linear.app`) are SKIPPED because + * they require the workspace's OAuth token to fetch — the orchestrator's + * URL-resolver runs unauthenticated and would fail closed with 401, + * killing the task before the agent ever starts. The agent picks these + * up at runtime via `mcp__linear-server__extract_images` (which mints + * fresh signed URLs) per the on-demand prompt addendum, so dropping + * them from the pre-fetch path doesn't lose coverage — it just shifts + * the fetch from "Lambda with no auth" to "agent with the OAuth token." + * + * Trade-off: Linear-hosted images skip the Bedrock Guardrail screening + * pass that runs at task-creation time. The description text itself is + * still screened via the input guardrail; the bytes are not. Acceptable + * for now — the agent treats those images as untrusted input anyway. */ function extractImageUrlAttachments(description: string | undefined): Attachment[] { if (!description) return []; const imagePattern = /!\[[^\]]*\]\((https:\/\/[^)]+)\)/g; const attachments: Attachment[] = []; + let skippedLinearUploads = 0; let match: RegExpExecArray | null; while ((match = imagePattern.exec(description)) !== null) { if (attachments.length >= 10) break; const url = match[1]; + if (isLinearUploadsUrl(url)) { + skippedLinearUploads += 1; + continue; + } attachments.push({ type: 'url', url }); } - if (attachments.length > 0) { + if (attachments.length > 0 || skippedLinearUploads > 0) { logger.info('Extracted image URL attachments from Linear issue description', { count: attachments.length, + skipped_linear_uploads: skippedLinearUploads, }); } return attachments; } +function isLinearUploadsUrl(url: string): boolean { + try { + const host = new URL(url).hostname.toLowerCase(); + return host === 'uploads.linear.app' || host.endsWith('.uploads.linear.app'); + } catch { + return false; + } +} + async function lookupPlatformUser(workspaceId: string, userId: string): Promise { const key = `${workspaceId}#${userId}`; const result = await ddb.send(new GetCommand({ diff --git a/cdk/src/handlers/linear-webhook.ts b/cdk/src/handlers/linear-webhook.ts index 33f870ba..8ef61245 100644 --- a/cdk/src/handlers/linear-webhook.ts +++ b/cdk/src/handlers/linear-webhook.ts @@ -162,19 +162,29 @@ export async function handler(event: APIGatewayProxyEvent): Promise = new Set([ + TaskStatus.COMPLETED, + TaskStatus.FAILED, + TaskStatus.CANCELLED, + TaskStatus.TIMED_OUT, +]); + +/** A terminal task event extracted from a TaskTable stream record. */ +interface TerminalTaskEvent { + readonly taskId: string; + readonly status: TaskStatusType; + readonly buildPassed?: boolean; + /** Raw agent error_message, if any — drives the UX.5 failure-reply detail. */ + readonly errorMessage?: string; + readonly orchestrationId?: string; + /** + * A6 cascade (#247 redesign): set when this terminal task is an + * ITERATION or RESTACK on an orchestration node (carries + * ``orchestration_sub_issue_id`` in channel_metadata but is NOT itself a + * child-row task — its task_id isn't a ``child_task_id``). On COMPLETED we + * re-stack that node's DIRECT dependents. The marker is set by the comment + * trigger (pr-iteration) and by restack tasks themselves (so a restack's + * completion cascades the next hop). + */ + readonly cascadeSubIssueId?: string; + /** + * True when the cascade source was an ITERATION (a human @bgagent comment), + * vs a restack (a predecessor-change ripple). Drives the panel's "updating + * per 's comment" vs "updating to include 's change" phrasing. + */ + readonly cascadeIsIteration?: boolean; + /** + * #247 UX.3: the Linear comment id that triggered this iteration (set only + * for iterations — a human @bgagent comment). When the iteration task lands, + * the reconciler posts a threaded ✅/❌ reply BENEATH this comment, closing + * the conversation the human opened. Absent on restack cascades (no human + * comment to reply to). + */ + readonly triggerCommentId?: string; + /** + * #247 UX.19: the Linear ISSUE the trigger comment lives on. Usually the + * iterated sub-issue, but for a comment left on the PARENT epic (routed to a + * sub-issue via UX.18) it's the PARENT issue id. The threaded ✅/❌ reply must + * use THIS as commentCreate's issueId — Linear rejects a reply whose parentId + * belongs to a different issue. Absent on older tasks → reply falls back to + * the sub-issue id (the prior behavior). + */ + readonly triggerCommentIssueId?: string; +} + +/** + * Extract a terminal-task event from a TaskTable stream record. Returns + * null for records we don't act on (inserts, non-terminal MODIFYs, + * non-orchestration tasks, malformed images). + */ +export function parseTerminalTaskRecord(record: DynamoDBRecord): TerminalTaskEvent | null { + if (record.eventName !== 'MODIFY' && record.eventName !== 'INSERT') return null; + const img = record.dynamodb?.NewImage; + if (!img) return null; + + const taskId = img.task_id?.S; + const status = img.status?.S as TaskStatusType | undefined; + if (!taskId || !status) return null; + if (!TERMINAL.has(status)) return null; + + // Only orchestration children carry orchestration_id. Non-orchestration + // tasks stream through here too (single consumer on the whole table) — + // skip them cheaply. + // + // createTaskCore persists channel metadata as a nested ``channel_metadata`` + // MAP, NOT as a top-level attribute — so read orchestration_id from there. + // (A top-level ``orchestration_id`` exists on the TaskRecord type for + // future use, but createTaskCore doesn't populate it from channel context; + // releaseChild threads the id via channelMetadata.orchestration_id.) + const orchestrationId = + img.orchestration_id?.S + ?? img.channel_metadata?.M?.orchestration_id?.S; + if (!orchestrationId) return null; + + const buildPassed = img.build_passed?.BOOL; + const errorMessage = img.error_message?.S; + + // A6 cascade marker: an iteration/restack task names the node it acted on + // via channel_metadata. A restack task also carries + // ``restack_predecessor_sub_issue_id`` — its presence (or the explicit + // ``orchestration_iteration`` flag the comment trigger sets) marks this as + // a cascade SOURCE rather than a normal child task. We resolve the acted-on + // node from ``orchestration_sub_issue_id`` and confirm "is this a child row?" + // in the handler (a child-row task drives normal gating; a non-child-row + // task with this marker drives the cascade). + const cm = img.channel_metadata?.M; + const isIteration = cm?.orchestration_iteration?.S === 'true'; + const isCascadeSource = + cm?.restack_predecessor_sub_issue_id?.S !== undefined || isIteration; + const cascadeSubIssueId = isCascadeSource ? cm?.orchestration_sub_issue_id?.S : undefined; + // #247 UX.3: the human comment that triggered this iteration, if any. + const triggerCommentId = isIteration ? cm?.trigger_comment_id?.S : undefined; + // #247 UX.19: the issue that comment lives on (parent epic for a UX.18 + // parent-routed comment; the sub-issue for a direct comment). + const triggerCommentIssueId = isIteration ? cm?.trigger_comment_issue_id?.S : undefined; + + return { + taskId, + status, + ...(buildPassed !== undefined && { buildPassed }), + ...(errorMessage !== undefined && { errorMessage }), + orchestrationId, + ...(cascadeSubIssueId !== undefined && { cascadeSubIssueId }), + ...(cascadeSubIssueId !== undefined && { cascadeIsIteration: isIteration }), + ...(triggerCommentId !== undefined && { triggerCommentId }), + ...(triggerCommentIssueId !== undefined && { triggerCommentIssueId }), + }; +} + +/** + * Resolve the sub_issue_id for a terminal task within its orchestration. + * Prefers the ChildTaskIndex GSI (task_id → row); the orchestration_id on + * the task record is the authoritative grouping. + */ +async function resolveSubIssueId(taskId: string): Promise { + const res = await ddb.send(new QueryCommand({ + TableName: ORCHESTRATION_TABLE, + IndexName: OrchestrationTable.CHILD_TASK_INDEX, + KeyConditionExpression: 'child_task_id = :tid', + ExpressionAttributeValues: { ':tid': taskId }, + Limit: 1, + })); + const item = res.Items?.[0] as OrchestrationChildRow | undefined; + return item?.sub_issue_id ?? null; +} + +/** + * Batch-read each child's PR url from the TaskTable for the final rollup + * (#323). pr_url lands on the TaskRecord in a separate write from the + * status transition, so it is not on the orchestration row — but by the + * time the orchestration is all-terminal the PRs have settled, so a read + * here is reliable. Best-effort: a failed/partial read just yields fewer + * links (never throws out of the reconcile). Returns ``sub_issue_id → pr_url``. + */ +async function resolveChildPrUrls( + children: readonly OrchestrationChildRow[], +): Promise> { + const withTask = children.filter((c) => c.child_task_id); + if (withTask.length === 0) return {}; + const taskToSub = new Map(withTask.map((c) => [c.child_task_id!, c.sub_issue_id])); + const keys = [...taskToSub.keys()].map((task_id) => ({ task_id })); + const out: Record = {}; + try { + // BatchGet caps at 100 keys/request; an orchestration is far smaller, + // but chunk defensively so a large epic never throws on the limit. + for (let i = 0; i < keys.length; i += 100) { + const chunk = keys.slice(i, i + 100); + const res = await ddb.send(new BatchGetCommand({ + RequestItems: { [TASK_TABLE]: { Keys: chunk, ProjectionExpression: 'task_id, pr_url' } }, + })); + for (const rec of res.Responses?.[TASK_TABLE] ?? []) { + const taskId = rec.task_id as string | undefined; + const prUrl = rec.pr_url as string | undefined; + const sub = taskId ? taskToSub.get(taskId) : undefined; + if (sub && prUrl) out[sub] = prUrl; + } + } + } catch (err) { + logger.warn('Rollup pr_url batch-read failed (non-fatal) — rollup posts without links', { + error: err instanceof Error ? err.message : String(err), + }); + } + return out; +} + +/** + * #247: read the integration node's deploy-preview screenshot URL from its + * TaskRecord (persisted by the screenshot pipeline) so the parent panel can + * embed the combined preview. Best-effort — null when the node has no task, + * no preview deployed yet, or the read fails. Only the integration node is + * read (one Get), since that's the only node whose preview is "combined". + */ +async function resolveCombinedScreenshotUrl( + taskId?: string, +): Promise<{ url: string; previewUrl?: string } | null> { + if (!taskId) return null; + try { + const res = await ddb.send(new GetCommand({ + TableName: TASK_TABLE, + Key: { task_id: taskId }, + ProjectionExpression: 'screenshot_url, screenshot_preview_url', + })); + const url = res.Item?.screenshot_url; + if (typeof url !== 'string' || url.length === 0) return null; + const previewUrl = res.Item?.screenshot_preview_url; + // #247 UX.17: the live preview-deploy URL makes the panel's combined + // preview a clickable deep-link to the running combined site. + return { + url, + ...(typeof previewUrl === 'string' && previewUrl.length > 0 && { previewUrl }), + }; + } catch (err) { + logger.warn('Combined screenshot read failed (non-fatal) — panel posts without it', { + task_id: taskId, error: err instanceof Error ? err.message : String(err), + }); + return null; + } +} + +/** Apply one terminal child's reconcile plan. */ +async function reconcileTerminalChild(evt: TerminalTaskEvent): Promise { + const orchestrationId = evt.orchestrationId!; + + const subIssueId = await resolveSubIssueId(evt.taskId); + if (!subIssueId) { + logger.warn('Reconciler could not resolve sub_issue_id for terminal task', { + task_id: evt.taskId, + orchestration_id: orchestrationId, + }); + return; + } + + const snapshot = await loadOrchestration(ddb, ORCHESTRATION_TABLE, orchestrationId); + if (!snapshot) { + logger.warn('Reconciler found no orchestration snapshot (TTL-reaped?)', { + orchestration_id: orchestrationId, + task_id: evt.taskId, + }); + return; + } + + const children: ReconcileChild[] = snapshot.children.map((c) => ({ + sub_issue_id: c.sub_issue_id, + depends_on: c.depends_on, + child_status: c.child_status, + })); + + const outcome: TerminalOutcome = { + sub_issue_id: subIssueId, + status: evt.status as TerminalOutcome['status'], + ...(evt.buildPassed !== undefined && { build_passed: evt.buildPassed }), + }; + + const plan = computeReconcilePlan(outcome, children); + const now = new Date().toISOString(); + + // 1. Persist status updates (terminal child + any skips). Each is + // conditional on the row not already being in the target state so a + // replayed event is a no-op. + for (const update of plan.statusUpdates) { + // ``toRelease`` rows are handled by releaseChild below (which flips + // them to released conditionally); skip them here to avoid a + // double-write race. + if (plan.toRelease.includes(update.sub_issue_id)) continue; + try { + await ddb.send(new UpdateCommand({ + TableName: ORCHESTRATION_TABLE, + Key: { orchestration_id: orchestrationId, sub_issue_id: update.sub_issue_id }, + UpdateExpression: 'SET child_status = :s, updated_at = :now', + ConditionExpression: 'child_status <> :s', + ExpressionAttributeValues: { ':s': update.child_status, ':now': now }, + })); + } catch (err) { + if (isConditionalCheckFailed(err)) continue; // already in target state + throw err; + } + } + + // 2. Re-evaluate releasability against a FRESH read, not the initial + // snapshot. + // + // Concurrency (failure-matrix row 3): when two predecessors of the + // same child D finish simultaneously, each reconciler invocation + // loads its own snapshot, persists only ITS child as succeeded, and + // — working from its stale snapshot — sees D's OTHER predecessor not + // yet succeeded, so neither releases D and it strands ``blocked``. + // The plan's ``toRelease`` (computed from the initial snapshot) is + // therefore unreliable under concurrency. Reloading after the + // status write means whichever invocation reads last sees BOTH + // predecessors succeeded and releases D; the conditional + // ready→released flip in releaseChild dedups if both happen to see it. + const fresh = await loadOrchestration(ddb, ORCHESTRATION_TABLE, orchestrationId); + const freshChildren = fresh?.children ?? snapshot.children; + const succeeded = new Set( + freshChildren.filter((c) => c.child_status === 'succeeded').map((c) => c.sub_issue_id), + ); + const releasableRows = freshChildren + .filter((c) => c.child_status === 'blocked' && c.depends_on.every((d) => succeeded.has(d))) + .map((c) => ({ ...c, child_status: 'ready' as const })); + + if (releasableRows.length > 0) { + const releaseCtx = (fresh ?? snapshot).meta.release_context; + // #331: throttle this pass to the user's free concurrency budget so a + // wide fan-out doesn't over-release children that admission then + // hard-fails (the cap is a throttle, not a guillotine). Leftover ready + // children are released by the next reconcile (a sibling completing + // re-fires this handler) or the #303 sweep, as slots free. Unset table + // → release all (back-compat; admission still gates). + const budget = USER_CONCURRENCY_TABLE + ? await readConcurrencyBudget(ddb, USER_CONCURRENCY_TABLE, releaseCtx.platform_user_id, MAX_CONCURRENT) + : undefined; + const results = await releaseReadyChildren( + ddb, + ORCHESTRATION_TABLE, + releasableRows, + releaseCtx, + createTaskCore, + now, + // #247 A4: pass the full child set so each releasable child's base + // branch can be derived from its predecessors' persisted branches. + freshChildren, + 'main', + budget, + ); + logger.info('Reconciler released children', { + orchestration_id: orchestrationId, + trigger_sub_issue_id: subIssueId, + released: results.filter((r) => r.kind === 'released').length, + requested: releasableRows.length, + ...(budget !== undefined && { concurrency_budget: budget }), + }); + } + + // Refresh the panel + settle the parent state against the fresh view. + await refreshPanelAndSettle(orchestrationId, freshChildren, (fresh ?? snapshot).meta, now); +} + +/** + * #247 UX.2: maintain the SINGLE maturing epic panel — one comment, edited in + * place — and settle the parent state when the epic reaches all-terminal. + * Shared by the normal child-gating path (``reconcileTerminalChild``) AND the + * cascade path (``cascadeRestack``): a re-stack/iteration task completing must + * ALSO clear its node's ``🔄 updating`` row and re-run the completion check, or + * an epic whose only remaining activity is a cascade hangs forever at + * "🔄 N/M" with a stale updating row (live-caught under the UX.6 stress test — + * a re-stack of a no-dependents node returned early and never refreshed). + * + * Best-effort; only when the workspace registry is configured. The panel BODY + * edit is idempotent (same body = no-op), so it always runs; the parent-STATE + * mirror is claimed once via ``claimRollup`` on the first all-terminal caller. + */ +async function refreshPanelAndSettle( + orchestrationId: string, + children: readonly OrchestrationChildRow[], + meta: { linear_workspace_id: string; parent_linear_issue_id: string; status_comment_id?: string; release_context: { channel_source?: string } }, + now: string, +): Promise { + if (!WORKSPACE_REGISTRY_TABLE) return; + + // Completion check: every child terminal (succeeded/failed/skipped — + // released is NOT terminal). + const allTerminal = children.every((c) => + c.child_status === 'succeeded' || c.child_status === 'failed' || c.child_status === 'skipped', + ); + + const prUrls = await resolveChildPrUrls(children); + const integration = children.find((c) => isIntegrationNode(c.sub_issue_id)); + const combinedPrUrl = integration ? prUrls[integration.sub_issue_id] : undefined; + // #247 (task #57): embed the integration node's combined deploy preview in + // the panel when the epic is complete. Only read it on the all-terminal + // settle (the integration node has deployed by then); skip the extra Get on + // every in-flight edit. + const combinedScreenshot = (allTerminal && integration) + ? await resolveCombinedScreenshotUrl(integration.child_task_id) + : null; + + if (allTerminal) { + logger.info('Orchestration complete', { + event: ORCH_LOG.orchestrationComplete, + orchestration_id: orchestrationId, + parent_linear_issue_id: meta.parent_linear_issue_id, + succeeded: children.filter((c) => c.child_status === 'succeeded').length, + failed: children.filter((c) => c.child_status === 'failed').length, + skipped: children.filter((c) => c.child_status === 'skipped').length, + }); + } + + // Idempotency for the PARENT-STATE mirror: the orchestration can reach "all + // terminal" on more than one stream event. Mirror only once, on the first + // all-terminal caller. The panel BODY edit is naturally idempotent. + const won = !allTerminal || await claimRollup(ddb, ORCHESTRATION_TABLE, orchestrationId, now); + + const newId = await upsertEpicPanel({ + ctx: { linearWorkspaceId: meta.linear_workspace_id, registryTableName: WORKSPACE_REGISTRY_TABLE }, + parentLinearIssueId: meta.parent_linear_issue_id, + ...(meta.status_comment_id !== undefined && { statusCommentId: meta.status_comment_id }), + children, + prUrls, + ...(combinedPrUrl !== undefined && { combinedPrUrl }), + ...(combinedScreenshot !== null && { combinedScreenshotUrl: combinedScreenshot.url }), + ...(combinedScreenshot?.previewUrl !== undefined && { combinedPreviewUrl: combinedScreenshot.previewUrl }), + inProgress: !allTerminal, + mirrorParentState: allTerminal ? won : false, + ...(meta.release_context.channel_source !== undefined && { + channelSource: meta.release_context.channel_source as ChannelSource, + }), + }); + // Persist a freshly-created panel comment id so later edits reuse it. + if (newId && !meta.status_comment_id) { + try { + await setStatusCommentId(ddb, ORCHESTRATION_TABLE, orchestrationId, newId); + } catch (err) { + logger.warn('Failed to persist panel comment id (non-fatal)', { + orchestration_id: orchestrationId, error: err instanceof Error ? err.message : String(err), + }); + } + } +} + +/** + * A6 cascade (#247 redesign). A terminal ITERATION or RESTACK task on node X + * just completed — re-stack X's DIRECT dependents so they pick up X's new + * branch. Each dependent's own restack completion re-fires this handler and + * cascades the next hop (see ``planDirectRestack``). Only on COMPLETED — a + * failed iteration leaves dependents on the prior (still-valid) base. + * + * Idempotent: the per-dependent task's idempotency key includes the SOURCE + * task id, so the same completion never spawns a dependent's restack twice; + * a different source (the next real change) gets a new key. Best-effort — + * a failure to spawn one dependent does not block the others. + */ +async function cascadeRestack(evt: TerminalTaskEvent): Promise { + const orchestrationId = evt.orchestrationId!; + const changedSubIssueId = evt.cascadeSubIssueId!; + const succeeded = evt.status === TaskStatus.COMPLETED && evt.buildPassed !== false; + const now = new Date().toISOString(); + + // #247 UX.3: an ITERATION carries the human comment that triggered it. When + // it lands — success OR failure — reply ✅/❌ in a thread beneath that + // comment, closing the conversation the human opened. This runs regardless + // of whether there are dependents to re-stack (a leaf node has none) and + // before the success-gate below (a failed iteration still gets its ❌ reply). + if (evt.triggerCommentId) { + await replyToIterationComment(evt, changedSubIssueId, succeeded); + } + + // Only a successful change should cascade onto dependents. + if (!succeeded) { + logger.info('A6 cascade: source task not successful — not cascading', { + orchestration_id: orchestrationId, + changed_sub_issue_id: changedSubIssueId, + status: evt.status, + }); + return; + } + + const snapshot = await loadOrchestration(ddb, ORCHESTRATION_TABLE, orchestrationId); + if (!snapshot) { + logger.warn('A6 cascade: orchestration snapshot not found', { orchestration_id: orchestrationId }); + return; + } + + const steps = planDirectRestack(snapshot.children, changedSubIssueId); + if (steps.length === 0) { + logger.info('A6 cascade: no started direct dependents to re-stack', { + orchestration_id: orchestrationId, + changed_sub_issue_id: changedSubIssueId, + }); + // The cascade source (this re-stack/iteration) itself just completed and + // carried a '🔄 updating' row on the panel. With no dependents to ripple + // to, NOTHING else will fire for this node — so we MUST refresh here to + // clear its updating row and re-run the completion check. Without this, an + // epic whose only remaining activity is a leaf-node re-stack hangs forever + // at "🔄 N/M" with a stale updating row (live-caught, UX.6 stress test). + // Re-load so the panel reflects this node's freshly-persisted terminal + // status, then settle. + const fresh = await loadOrchestration(ddb, ORCHESTRATION_TABLE, orchestrationId); + await refreshPanelAndSettle(orchestrationId, (fresh ?? snapshot).children, (fresh ?? snapshot).meta, now); + return; + } + + logger.info('A6 cascade: re-stacking direct dependents', { + orchestration_id: orchestrationId, + changed_sub_issue_id: changedSubIssueId, + source_task_id: evt.taskId, + dependent_count: steps.length, + }); + + // Human-readable label for the changed node (the predecessor that was + // revised), used in the surfacing comments. Prefer its Linear identifier. + const meta = snapshot.meta; + const changedRow = snapshot.children.find((c) => c.sub_issue_id === changedSubIssueId); + // Friendly short name — for the integration node this is "the integration", + // NOT its raw synthetic title (which read clumsily in the possessive cascade + // reason "…'s change"; live-caught under the UX.6 stress test). + const changedLabel = cascadeNodeLabel(changedSubIssueId, changedRow?.linear_identifier, changedRow?.title); + + const feedbackCtx = WORKSPACE_REGISTRY_TABLE + ? { linearWorkspaceId: meta.linear_workspace_id, registryTableName: WORKSPACE_REGISTRY_TABLE } + : undefined; + + const updatingIds: string[] = []; + for (const step of steps) { + const created = await spawnRestackTask(step, meta.release_context.platform_user_id, evt.taskId, changedSubIssueId); + // Surface ONLY on a genuinely NEW restack task (201). A 200 means an + // idempotent replay (the cascade source's stream record is redelivered + // multiple times — observed 3× live), so don't re-mark. 'failed' = skip. + if (created !== 'created') continue; + updatingIds.push(step.child.sub_issue_id); + } + + // #247 UX.2: instead of standalone '🔄 Re-stacked' / 'revised' comments, + // refresh the SINGLE epic panel so the impacted rows show '🔄 updating per + // ' and the header reverts to in-progress. The dependent's own + // sub-issue gets the react/reply ack (UX.3), not a status comment here. The + // 'updating' rows settle back to ✅ when their restack tasks complete — those + // completions route to cascadeRestack (NOT reconcileTerminalChild) and clear + // the row via refreshPanelAndSettle (the no-dependents path), per UX.15. + if (feedbackCtx && updatingIds.length > 0) { + // A cascade re-opened an epic that may have ALREADY completed (a comment on + // a finished epic). Release the once-only rollup claim so the parent state + // can re-settle (👀→✅) when the re-stacks finish — else claimRollup stays + // failed forever and the reaction never re-mirrors (#247 UX.15 stress-caught). + await clearRollupClaim(ddb, ORCHESTRATION_TABLE, orchestrationId, now); + const reason = evt.cascadeIsIteration + ? `per ${changedLabel}'s comment` + : `to include ${changedLabel}'s change`; + const updating: Record = {}; + for (const id of updatingIds) updating[id] = reason; + const prUrls = await resolveChildPrUrls(snapshot.children); + const integration = snapshot.children.find((c) => isIntegrationNode(c.sub_issue_id)); + await upsertEpicPanel({ + ctx: feedbackCtx, + parentLinearIssueId: meta.parent_linear_issue_id, + ...(meta.status_comment_id !== undefined && { statusCommentId: meta.status_comment_id }), + children: snapshot.children, + prUrls, + updating, + ...(integration && prUrls[integration.sub_issue_id] !== undefined + && { combinedPrUrl: prUrls[integration.sub_issue_id] }), + inProgress: true, // a cascade re-opened the epic + ...(meta.release_context.channel_source !== undefined + && { channelSource: meta.release_context.channel_source as ChannelSource }), + }); + } +} + +/** + * #247 UX.3: post the threaded ✅/❌ reply beneath the human ``@bgagent`` + * comment that triggered this iteration. The 👀 reaction already landed (the + * processor's instant ack); this reply closes the loop when the work lands. + * + * Idempotent: the cascade source's stream record is redelivered multiple times + * (observed 3× live), so we claim the right to reply exactly once by + * conditionally stamping ``ack_replied_at`` on the iteration task's own + * TaskTable record (its ``task_id`` is the per-iteration unit). The first + * caller wins and posts; redeliveries lose the conditional write and skip. + * Best-effort throughout — a Linear or DDB hiccup never blocks the cascade. + */ +async function replyToIterationComment( + evt: TerminalTaskEvent, + changedSubIssueId: string, + succeeded: boolean, +): Promise { + if (!WORKSPACE_REGISTRY_TABLE) return; + const commentId = evt.triggerCommentId!; + + // Resolve the workspace for the reply. The iteration task carries it in + // channel_metadata; rather than re-read the record, load the orchestration + // meta (already cached-cheap) for the workspace id. + const snapshot = await loadOrchestration(ddb, ORCHESTRATION_TABLE, evt.orchestrationId!); + if (!snapshot) return; + const ctx = { + linearWorkspaceId: snapshot.meta.linear_workspace_id, + registryTableName: WORKSPACE_REGISTRY_TABLE, + }; + + // Claim the one reply for this iteration task. + let won = false; + try { + await ddb.send(new UpdateCommand({ + TableName: TASK_TABLE, + Key: { task_id: evt.taskId }, + UpdateExpression: 'SET ack_replied_at = :now', + ConditionExpression: 'attribute_not_exists(ack_replied_at)', + ExpressionAttributeValues: { ':now': new Date().toISOString() }, + })); + won = true; + } catch (err) { + if ((err as { name?: string })?.name !== 'ConditionalCheckFailedException') { + logger.warn('UX.3 ack: claim write failed (skipping reply)', { + task_id: evt.taskId, + error: err instanceof Error ? err.message : String(err), + }); + } + return; // lost the claim (replay) or errored → don't double-reply + } + if (!won) return; + + const body = succeeded + ? await buildIterationAckSuccess(evt) + : renderFailureReply({ + status: evt.status, + buildPassed: evt.buildPassed, + ...(evt.errorMessage !== undefined && { errorMessage: evt.errorMessage }), + taskId: evt.taskId, + }); + // The reply's issueId MUST be the issue the trigger comment lives on — + // Linear rejects a threaded reply whose parentId belongs to a different + // issue. For a comment left on the PARENT epic (UX.18 routing) that's the + // parent issue, NOT changedSubIssueId. Fall back to the sub-issue id for + // tasks created before UX.19 (no triggerCommentIssueId persisted). + const replyIssueId = evt.triggerCommentIssueId ?? changedSubIssueId; + await replyToComment(ctx, replyIssueId, commentId, body); + + // #247 UX.21: settle the comment + sub-issue so all three views agree (panel + // row, sub-issue state, comment reaction) — the platform owns this, not the + // agent (whose prompt-driven state-setting flapped In Progress/In Review). + // - swap the TRIGGER comment's 👀 → ✅ (success) / ❌ (failure), so the + // comment itself reads done at a glance, not just the threaded reply. + // - on success, advance the SUB-ISSUE to In Review (its PR is updated & + // open, awaiting human merge — same convention the epic uses). On + // failure, leave the state (the ❌ + reply convey it). Never demote. + // Best-effort + idempotent (the ack_replied_at claim above already gates this + // to once per iteration; swapCommentReaction/transition re-converge anyway). + await swapCommentReaction(ctx, commentId, succeeded ? EMOJI_SUCCESS : EMOJI_FAILURE); + if (succeeded) { + await transitionIssueState(ctx, changedSubIssueId, 'started', ['In Review']); + } +} + +/** Build the ✅ ack reply, linking the (re-pushed) PR when resolvable. */ +async function buildIterationAckSuccess(evt: TerminalTaskEvent): Promise { + const prNumber = await resolvePrNumber(evt.taskId); + return prNumber !== null + ? `✅ Updated — PR #${prNumber}.` + : '✅ Updated.'; +} + +/** + * Spawn one coding/restack-v1 task for a direct dependent. Best-effort. + * Returns ``'created'`` for a genuinely new task (201), ``'exists'`` for an + * idempotent replay (200 — the source event was redelivered), or ``'failed'``. + * The caller surfaces the re-stack to the user ONLY on ``'created'`` so + * redelivered stream records don't post duplicate comments. + */ +async function spawnRestackTask( + step: RestackStep, + platformUserId: string, + sourceTaskId: string, + changedSubIssueId: string, +): Promise<'created' | 'exists' | 'failed'> { + const child = step.child; + const prNumber = await resolvePrNumber(child.child_task_id); + if (prNumber === null) { + logger.warn('A6 cascade: dependent has no resolvable PR number — skipping', { + orchestration_id: child.orchestration_id, + sub_issue_id: child.sub_issue_id, + child_task_id: child.child_task_id, + }); + return 'failed'; + } + + // Idempotency keyed on the SOURCE task id: this exact completion re-stacks + // a given dependent at most once. Within [A-Za-z0-9_-], ≤128 chars. + const idempotencyKey = `restack_${child.sub_issue_id}_${sourceTaskId}`.replace(/[^A-Za-z0-9_-]/g, '').slice(0, MAX_IDEMPOTENCY_KEY_LENGTH); + + try { + const result = await createTaskCore( + { + repo: child.repo, + workflow_ref: 'coding/restack-v1', + pr_number: prNumber, + }, + { + userId: platformUserId, + channelSource: 'webhook', + channelMetadata: { + orchestration_id: child.orchestration_id, + // This dependent is the next cascade SOURCE: when its restack + // completes, parseTerminalTaskRecord sees restack_predecessor_* + // and cascades to ITS dependents. + orchestration_sub_issue_id: child.sub_issue_id, + restack_predecessor_sub_issue_id: changedSubIssueId, + // repo.py merges these updated predecessor branches into the + // dependent's existing branch before the agent runs. + orchestration_merge_branches: JSON.stringify(step.mergeBranches), + }, + idempotencyKey, + }, + idempotencyKey, + ); + logger.info('A6 cascade: created restack task for dependent', { + orchestration_id: child.orchestration_id, + sub_issue_id: child.sub_issue_id, + pr_number: prNumber, + status_code: result.statusCode, + }); + // 201 = newly created, 200 = idempotent replay (task already existed from a + // prior delivery of this same source event). Only 201 should surface a + // user-facing comment; 200 means we already did. Other codes = not created. + if (result.statusCode === 201) return 'created'; + if (result.statusCode === 200) return 'exists'; + return 'failed'; + } catch (err) { + logger.error('A6 cascade: createTaskCore threw for dependent', { + orchestration_id: child.orchestration_id, + sub_issue_id: child.sub_issue_id, + error: err instanceof Error ? err.message : String(err), + }); + return 'failed'; + } +} + +/** + * Read a dependent's PR number from its TaskRecord. Prefers numeric + * ``pr_number``; orchestration child tasks commonly persist only ``pr_url`` + * (``.../pull/N``) with ``pr_number`` null — fall back to parsing it. + */ +async function resolvePrNumber(taskId?: string): Promise { + if (!taskId) return null; + try { + const res = await ddb.send(new GetCommand({ TableName: TASK_TABLE, Key: { task_id: taskId } })); + const pr = res.Item?.pr_number; + if (typeof pr === 'number') return pr; + const url = res.Item?.pr_url; + if (typeof url === 'string') { + const m = url.match(/\/pull\/(\d+)\b/); + if (m) return Number(m[1]); + } + return null; + } catch (err) { + logger.warn('A6 cascade: failed to read dependent TaskRecord for PR number', { + task_id: taskId, + error: err instanceof Error ? err.message : String(err), + }); + return null; + } +} + +/** + * Lambda entry point — TaskTable stream handler. + * + * Processes records sequentially; a failure on one record throws so the + * stream retries the batch (idempotent replay is safe). Non-terminal / + * non-orchestration records are skipped cheaply. + */ +export async function handler(event: DynamoDBStreamEvent): Promise { + let processed = 0; + for (const record of event.Records) { + const evt = parseTerminalTaskRecord(record); + if (!evt) continue; + // A6 cascade: an iteration/restack task on a node X (NOT a child-row task) + // re-stacks X's direct dependents. Routed here, not through child gating. + if (evt.cascadeSubIssueId) { + await cascadeRestack(evt); + } else { + await reconcileTerminalChild(evt); + } + processed += 1; + } + logger.info('Orchestration reconciler batch processed', { + records: event.Records.length, + reconciled: processed, + }); +} + +function isConditionalCheckFailed(err: unknown): boolean { + return ( + typeof err === 'object' + && err !== null + && 'name' in err + && (err as { name?: string }).name === 'ConditionalCheckFailedException' + ); +} diff --git a/cdk/src/handlers/reconcile-stranded-orchestrations.ts b/cdk/src/handlers/reconcile-stranded-orchestrations.ts new file mode 100644 index 00000000..a550372d --- /dev/null +++ b/cdk/src/handlers/reconcile-stranded-orchestrations.ts @@ -0,0 +1,257 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Scheduled backstop for Linear orchestration (#247 A3, gap #303). + * + * The live reconciler (``orchestration-reconciler``) releases + * dependency-unblocked children by reacting to TaskTable-stream terminal + * events. If the reconciler is unavailable when a child reaches terminal + * state (deploy window, throttle, OOM, a poison-record batch parked in + * the DLQ), **that stream event is lost and never reprocessed** — the + * dependent children never get released and the orchestration stalls + * forever with no recovery. + * + * Observed live on dev (2026-06-09): a child reached COMPLETED during a + * reconciler OOM window; after the fix deployed, the completion event was + * gone, so its dependent stayed ``blocked`` until a manual nudge. + * + * This scheduled sweep is the recovery path. It also fixes the + * crash-after-flip hole that the F2 fix relies on (a child stuck + * ``released`` whose task is long-terminal, or a ``ready`` child whose + * release never created a task — see + * ``docs/research/orchestration-reconciler-correctness.md``). + * + * For each active orchestration it re-derives the gating truth from + * persisted state and: + * - releases any ``blocked``/``ready`` child whose predecessors are all + * ``succeeded`` (lost release-event recovery), and + * - re-evaluates children whose own task already reached terminal but + * whose row never advanced (lost terminal-event recovery), advancing + * the row + cascading skips/releases accordingly. + * + * Idempotent: ``releaseChild`` is idempotency-keyed and the row flips are + * conditional, so re-running the sweep (or racing the live reconciler) is + * safe. + */ + +import { DynamoDBClient } from '@aws-sdk/client-dynamodb'; +import { + DynamoDBDocumentClient, + ScanCommand, + GetCommand, + UpdateCommand, +} from '@aws-sdk/lib-dynamodb'; +import { createTaskCore } from './shared/create-task-core'; +import { logger } from './shared/logger'; +import { readConcurrencyBudget, releaseReadyChildren } from './shared/orchestration-release'; +import { + loadOrchestration, + ORCHESTRATION_META_SK, + type OrchestrationChildRow, +} from './shared/orchestration-store'; +import { TaskStatus, type TaskStatusType } from '../constructs/task-status'; + +const ddb = DynamoDBDocumentClient.from(new DynamoDBClient({})); +const ORCHESTRATION_TABLE = process.env.ORCHESTRATION_TABLE_NAME!; +const TASK_TABLE = process.env.TASK_TABLE_NAME!; +// #331: throttle the sweep's releases to the user's free concurrency budget +// too (it is the drain path for children left ``ready`` by the live +// reconciler's throttle). Unset → release-all (back-compat; admission gates). +const USER_CONCURRENCY_TABLE = process.env.USER_CONCURRENCY_TABLE_NAME; +const MAX_CONCURRENT = Number(process.env.MAX_CONCURRENT_TASKS_PER_USER ?? '10'); + +/** Terminal child-statuses (orchestration-local). */ +const TERMINAL_CHILD = new Set(['succeeded', 'failed', 'skipped']); + +/** A task is success for gating iff COMPLETED with build not-failed. */ +function taskIsSuccess(rec: Record | undefined): boolean { + return rec?.status === TaskStatus.COMPLETED && rec?.build_passed !== false; +} +function taskIsTerminal(status: TaskStatusType | undefined): boolean { + return status === TaskStatus.COMPLETED || status === TaskStatus.FAILED + || status === TaskStatus.CANCELLED || status === TaskStatus.TIMED_OUT; +} + +/** Scan the table for parent-meta rows → one per orchestration. */ +async function findOrchestrationIds(): Promise { + const ids: string[] = []; + let lastKey: Record | undefined; + do { + const resp = await ddb.send(new ScanCommand({ + TableName: ORCHESTRATION_TABLE, + FilterExpression: 'sub_issue_id = :meta', + ExpressionAttributeValues: { ':meta': ORCHESTRATION_META_SK }, + ProjectionExpression: 'orchestration_id', + ExclusiveStartKey: lastKey as Record | undefined, + })); + for (const item of resp.Items ?? []) { + if (item.orchestration_id) ids.push(item.orchestration_id as string); + } + lastKey = resp.LastEvaluatedKey as Record | undefined; + } while (lastKey); + return ids; +} + +/** Fetch a released child's task record (status + build_passed). */ +async function getTaskRecord(taskId: string): Promise | undefined> { + const res = await ddb.send(new GetCommand({ TableName: TASK_TABLE, Key: { task_id: taskId } })); + return res.Item; +} + +/** + * Reconcile one orchestration from persisted truth. Returns the number of + * children released by this pass (for logging/metrics). + */ +async function reconcileOrchestration(orchestrationId: string): Promise { + const snap = await loadOrchestration(ddb, ORCHESTRATION_TABLE, orchestrationId); + if (!snap) return 0; + + // Skip orchestrations already fully terminal — nothing to recover. + const allTerminal = snap.children.every((c) => TERMINAL_CHILD.has(c.child_status)); + if (allTerminal) return 0; + + const now = new Date().toISOString(); + + // 1. Recover LOST TERMINAL events: a ``released`` child whose task has + // already reached terminal but whose row never advanced. Advance the + // row to succeeded/failed so step 2 can gate dependents correctly. + for (const child of snap.children) { + if (child.child_status !== 'released' || !child.child_task_id) continue; + const rec = await getTaskRecord(child.child_task_id); + if (!taskIsTerminal(rec?.status as TaskStatusType | undefined)) continue; // still running + const newStatus = taskIsSuccess(rec) ? 'succeeded' : 'failed'; + await advanceChildStatus(orchestrationId, child.sub_issue_id, newStatus, now); + } + + // 2. Re-load (statuses may have advanced) and release any blocked/ready + // child whose predecessors are all succeeded, plus skip children with + // a failed predecessor. Derive everything from the fresh persisted + // state — the same truth the live reconciler uses. + const fresh = await loadOrchestration(ddb, ORCHESTRATION_TABLE, orchestrationId); + if (!fresh) return 0; + + const statusOf = new Map(fresh.children.map((c) => [c.sub_issue_id, c.child_status])); + + // Cascade skips: any child with a failed/skipped predecessor → skipped. + let changed = true; + while (changed) { + changed = false; + for (const c of fresh.children) { + if (statusOf.get(c.sub_issue_id) !== 'blocked' && statusOf.get(c.sub_issue_id) !== 'ready') continue; + const deadDep = c.depends_on.some((d) => { + const s = statusOf.get(d); + return s === 'failed' || s === 'skipped'; + }); + if (deadDep) { + await advanceChildStatus(orchestrationId, c.sub_issue_id, 'skipped', now); + statusOf.set(c.sub_issue_id, 'skipped'); + changed = true; + } + } + } + + // Releasable: children whose deps are ALL succeeded and that have NOT yet + // started a task. Includes: + // - ``blocked`` children (lost release-event recovery), and + // - ``ready`` children with no ``child_task_id`` — left un-started by the + // live reconciler's #331 concurrency throttle (or a prior create_failed). + // A ``ready`` child that already has a task was genuinely released; re- + // releasing is idempotent, but we skip it to keep the budget for new work. + const releasableRows: OrchestrationChildRow[] = fresh.children + .filter((c) => { + const s = statusOf.get(c.sub_issue_id); + const depsReady = c.depends_on.every((d) => statusOf.get(d) === 'succeeded'); + if (!depsReady) return false; + if (s === 'blocked') return true; + if (s === 'ready' && !c.child_task_id) return true; // throttle-deferred + return false; + }) + .map((c) => ({ ...c, child_status: 'ready' as const })); + + if (releasableRows.length === 0) return 0; + + // #331: throttle the sweep's releases to the free budget too. + const budget = USER_CONCURRENCY_TABLE + ? await readConcurrencyBudget(ddb, USER_CONCURRENCY_TABLE, fresh.meta.release_context.platform_user_id, MAX_CONCURRENT) + : undefined; + const results = await releaseReadyChildren( + ddb, ORCHESTRATION_TABLE, releasableRows, fresh.meta.release_context, createTaskCore, now, + // #247 A4: full child set for predecessor-branch-derived base selection. + fresh.children, + 'main', + budget, + ); + const released = results.filter((r) => r.kind === 'released').length; + if (released > 0) { + logger.warn('Stranded orchestration recovered — released children the live reconciler missed', { + orchestration_id: orchestrationId, + released, + candidates: releasableRows.length, + }); + } + return released; +} + +/** Conditionally advance a child row's status (no-op if already there). */ +async function advanceChildStatus( + orchestrationId: string, + subIssueId: string, + status: string, + now: string, +): Promise { + try { + await ddb.send(new UpdateCommand({ + TableName: ORCHESTRATION_TABLE, + Key: { orchestration_id: orchestrationId, sub_issue_id: subIssueId }, + UpdateExpression: 'SET child_status = :s, updated_at = :now', + ConditionExpression: 'child_status <> :s', + ExpressionAttributeValues: { ':s': status, ':now': now }, + })); + } catch (err) { + if ((err as { name?: string })?.name === 'ConditionalCheckFailedException') return; + throw err; + } +} + +/** + * Scheduled entry point. Sweeps every active orchestration. A failure on + * one orchestration is logged and does not abort the rest. + */ +export async function handler(): Promise { + const ids = await findOrchestrationIds(); + let totalReleased = 0; + let swept = 0; + for (const id of ids) { + try { + totalReleased += await reconcileOrchestration(id); + swept += 1; + } catch (err) { + logger.error('Stranded-orchestration sweep failed for one orchestration (continuing)', { + orchestration_id: id, + error: err instanceof Error ? err.message : String(err), + }); + } + } + logger.info('Stranded-orchestration sweep complete', { + orchestrations_swept: swept, + orchestrations_found: ids.length, + children_released: totalReleased, + }); +} diff --git a/cdk/src/handlers/shared/create-task-core.ts b/cdk/src/handlers/shared/create-task-core.ts index 3db2ebaa..d9eb9279 100644 --- a/cdk/src/handlers/shared/create-task-core.ts +++ b/cdk/src/handlers/shared/create-task-core.ts @@ -116,7 +116,11 @@ export async function createTaskCore( if (!isValidWorkflowRef(body.workflow_ref)) { return errorResponse(400, ErrorCode.VALIDATION_ERROR, 'Invalid workflow_ref. Expected "/-vN[@]".', requestId); } - const resolvedWorkflow = resolveWorkflowRef(body.workflow_ref); + // Pass whether the request carries a repo: a repo-bound task with no + // explicit workflow_ref resolves to coding/new-task-v1 (the disciplined + // coding workflow), not the repo-less default/agent-v1 (#296 regression — + // see resolveWorkflowRef rung 3a). + const resolvedWorkflow = resolveWorkflowRef(body.workflow_ref, Boolean(body.repo)); if (resolvedWorkflow === null) { // Distinguish an unknown id from an unsatisfiable @version pin so the caller // learns which it is (#296 finding #6 — a bad pin no longer silently runs @@ -620,6 +624,12 @@ export async function createTaskCore( ...(context.idempotencyKey && { idempotency_key: context.idempotencyKey }), channel_source: context.channelSource, channel_metadata: context.channelMetadata, + // #247 UX.3: hoist linear_issue_id to the top level so the sparse + // LinearIssueIndex GSI can resolve an issue → its newest task + PR (a GSI + // cannot key off the nested channel_metadata map). Linear-origin only. + ...(context.channelMetadata?.linear_issue_id && { + linear_issue_id: context.channelMetadata.linear_issue_id, + }), ...(attachmentRecords.length > 0 && { attachments: attachmentRecords }), status_created_at: `${initialStatus}#${now}`, created_at: now, diff --git a/cdk/src/handlers/shared/failure-reply.ts b/cdk/src/handlers/shared/failure-reply.ts new file mode 100644 index 00000000..23c34356 --- /dev/null +++ b/cdk/src/handlers/shared/failure-reply.ts @@ -0,0 +1,115 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * #247 UX.5 — "failure is a conversation". Renders the threaded ❌ reply the + * agent posts beneath a human's ``@bgagent`` comment when the requested + * iteration does not land cleanly. Two distinct shapes, per the user's design: + * + * - BUILD/TEST failure (the agent ran and opened/updated a PR, but the build + * or tests are red): a sanitized ONE-LINE reason pointing at the PR's + * checks. We deliberately do NOT dump the raw build output — it's untrusted + * repo code and the per-test detail isn't persisted platform-side; the PR's + * checks tab is the authoritative, safe place to read it. + * + * - AGENT-ITSELF failure (the agent crashed / timed out / hit a cap before a + * clean terminal): the classified one-line title + a TRUNCATED excerpt of + * the raw error, plus a pointer to the full CloudWatch logs by task id. + * + * Both always end by inviting a reply — the failure reply is answerable, so + * the user replies ``@bgagent `` and the comment trigger re-runs the + * iteration on the same PR (UX.3). Pure + deterministic; no I/O. + */ + +import { classifyError } from './error-classifier'; +import type { TaskStatusType } from '../../constructs/task-status'; + +/** Max chars of the raw agent error surfaced inline (the rest is in CloudWatch). */ +const EXCERPT_MAX = 200; + +export interface FailureReplyInput { + /** Terminal task status. */ + readonly status: TaskStatusType | string; + /** Whether the post-change build/tests passed. false ⇒ build/test failure. */ + readonly buildPassed?: boolean | null; + /** Raw agent error_message, if any (drives the agent-failure classification). */ + readonly errorMessage?: string | null; + /** Task id — surfaced so the user can find the run in CloudWatch. */ + readonly taskId: string; +} + +/** + * The agent pipeline's signature for "the AGENT finished fine, but the build + * verification GATE failed" (a build/test regression). Live-verified + * (2026-06-16): the pipeline gates this to ``status=FAILED`` with + * ``error_message="Task did not succeed (agent_status='success', build_ok=False)"`` + * and leaves the separate ``build_passed`` attribute null — so the previous + * ``COMPLETED && build_passed===false`` check NEVER matched a real regression + * and every build failure fell through to the (wrong) agent-crash copy. We + * key off the real persisted signal instead. See + * ``agent/src/pipeline.py`` ``_resolve_overall_task_status`` / + * ``_apply_post_hook_gates``. + */ +const BUILD_GATE_FAILED_RE = /agent_status=['"]?(success|end_turn)['"]?.*build_ok\s*=\s*False/i; + +/** + * True when the failure is a BUILD/TEST failure (the agent completed and a PR + * exists, but the verification gate is red) vs an agent-itself failure + * (crash / cap / timeout). Two shapes are accepted: + * - the live gating shape: ``error_message`` says ``agent_status='success' … + * build_ok=False`` (the agent succeeded; only the build gate failed); OR + * - the explicit field shape: a terminal task with ``build_passed === false`` + * and no crash error_message (defensive — e.g. an informational-gate path + * that surfaces build_passed directly). + */ +function isBuildFailure(input: FailureReplyInput): boolean { + if (input.errorMessage && BUILD_GATE_FAILED_RE.test(input.errorMessage)) { + return true; + } + return input.buildPassed === false && !input.errorMessage; +} + +/** Collapse whitespace + clip to EXCERPT_MAX chars with an ellipsis. */ +function excerpt(raw: string): string { + const oneLine = raw.replace(/\s+/g, ' ').trim(); + return oneLine.length > EXCERPT_MAX ? `${oneLine.slice(0, EXCERPT_MAX)}…` : oneLine; +} + +/** + * Render the ❌ failure reply body. Best-effort, never throws. + */ +export function renderFailureReply(input: FailureReplyInput): string { + if (isBuildFailure(input)) { + // Build/test failure — one line, point at the PR's checks (the safe, + // authoritative detail surface). No raw output dump. + return ( + "❌ I made the change, but the build/tests didn't pass — see the PR's " + + 'checks for details. Reply with guidance and I\'ll try again.' + ); + } + + // Agent-itself failure: classified title + truncated excerpt + CloudWatch. + const classification = classifyError(input.errorMessage); + const title = classification?.title ?? "the task didn't complete"; + const detail = input.errorMessage ? ` ${excerpt(input.errorMessage)}` : ''; + return ( + `❌ ${title} —${detail} see CloudWatch for task \`${input.taskId}\`. ` + + 'Reply with guidance and I\'ll try again.' + ); +} diff --git a/cdk/src/handlers/shared/linear-feedback.ts b/cdk/src/handlers/shared/linear-feedback.ts index 6597d912..0cbb83dd 100644 --- a/cdk/src/handlers/shared/linear-feedback.ts +++ b/cdk/src/handlers/shared/linear-feedback.ts @@ -35,8 +35,14 @@ const LINEAR_GRAPHQL_URL = 'https://api.linear.app/graphql'; const REQUEST_TIMEOUT_MS = 5000; -/** Reaction emoji short-code for the failure marker. Matches `EMOJI_FAILURE` in `agent/src/linear_reactions.py`. */ -const EMOJI_FAILURE = 'x'; +/** + * Reaction emoji short-codes. Match the agent-side child markers in + * ``agent/src/linear_reactions.py`` so the PARENT epic shows the same + * status signal as its sub-issues: 👀 at start, ✅/❌ at completion. + */ +export const EMOJI_STARTED = 'eyes'; +export const EMOJI_SUCCESS = 'white_check_mark'; +export const EMOJI_FAILURE = 'x'; const COMMENT_CREATE_MUTATION = ` mutation CreateComment($issueId: String!, $body: String!) { @@ -46,6 +52,46 @@ mutation CreateComment($issueId: String!, $body: String!) { } `.trim(); +/** Create a comment and return its id (for later edit-in-place). */ +const COMMENT_CREATE_RETURNING_ID_MUTATION = ` +mutation CreateCommentReturningId($issueId: String!, $body: String!) { + commentCreate(input: { issueId: $issueId, body: $body }) { + success + comment { id } + } +} +`.trim(); + +/** Edit an existing comment in place (#247 #3 live status block). */ +const COMMENT_UPDATE_MUTATION = ` +mutation UpdateComment($id: String!, $body: String!) { + commentUpdate(id: $id, input: { body: $body }) { + success + } +} +`.trim(); + +/** + * Post a THREADED REPLY beneath an existing comment (#247 UX.3 ack trail). + * ``parentId`` is the comment being replied to; the reply notifies and reads + * as a conversation turn under it. Returns the new reply's id (for a possible + * later edit), distinct from a top-level comment. + * + * IMPORTANT (live-verified 2026-06-16): Linear's ``commentCreate`` requires + * ``issueId`` to be present EVEN for a threaded reply — ``parentId`` alone + * fails ``commentCreate`` argument validation ("Exactly one of …issueId must + * be defined"). So the reply carries BOTH the parent comment id and its + * issue id. + */ +const COMMENT_REPLY_RETURNING_ID_MUTATION = ` +mutation ReplyToComment($issueId: String!, $parentId: String!, $body: String!) { + commentCreate(input: { issueId: $issueId, parentId: $parentId, body: $body }) { + success + comment { id } + } +} +`.trim(); + const REACTION_CREATE_MUTATION = ` mutation ReactIssue($issueId: String!, $emoji: String!) { reactionCreate(input: { issueId: $issueId, emoji: $emoji }) { @@ -54,6 +100,112 @@ mutation ReactIssue($issueId: String!, $emoji: String!) { } `.trim(); +/** + * React to a specific COMMENT (not the issue) — the instant "on it" ack on an + * ``@bgagent`` comment (#247 UX.3). (Verified: ``reactionCreate`` input accepts + * ``commentId``.) + */ +const REACTION_CREATE_ON_COMMENT_MUTATION = ` +mutation ReactComment($commentId: String!, $emoji: String!) { + reactionCreate(input: { commentId: $commentId, emoji: $emoji }) { + success + } +} +`.trim(); + +const REACTION_DELETE_MUTATION = ` +mutation UnreactIssue($id: String!) { + reactionDelete(id: $id) { success } +} +`.trim(); + +/** Read an issue's reactions (id + emoji) — to swap one bgagent marker for another. */ +const ISSUE_REACTIONS_QUERY = ` +query IssueReactions($issueId: String!) { + issue(id: $issueId) { reactions { id emoji } } +} +`.trim(); + +/** Read a COMMENT's reactions (id + emoji) — to swap the comment's bgagent marker (#247 UX.21). */ +const COMMENT_REACTIONS_QUERY = ` +query CommentReactions($commentId: String!) { + comment(id: $commentId) { reactions { id emoji } } +} +`.trim(); + +/** + * The bgagent status-marker emojis we manage on the PARENT epic. Mirrors + * ``_BGAGENT_EMOJIS`` in ``agent/src/linear_reactions.py``. Only these are + * ever deleted by {@link swapIssueReaction} — a human's reaction is never + * touched. + */ +const BGAGENT_EMOJIS: ReadonlySet = new Set([EMOJI_STARTED, EMOJI_SUCCESS, EMOJI_FAILURE]); + +/** + * Fetch the workflow states for the TEAM that owns ``issueId``, so we can + * resolve a target state by its semantic ``type`` (Linear state IDs are + * per-team UUIDs, not knowable a priori). ``type`` values: + * ``backlog`` | ``unstarted`` (Todo) | ``started`` (In Progress / In Review) | + * ``completed`` (Done) | ``canceled``. + */ +const ISSUE_TEAM_STATES_QUERY = ` +query IssueTeamStates($issueId: String!) { + issue(id: $issueId) { + state { id type name position } + team { states { nodes { id type name position } } } + } +} +`.trim(); + +const ISSUE_SET_STATE_MUTATION = ` +mutation SetIssueState($issueId: String!, $stateId: String!) { + issueUpdate(id: $issueId, input: { stateId: $stateId }) { + success + } +} +`.trim(); + +interface TeamState { + readonly id: string; + readonly type: string; + readonly name: string; + readonly position: number; +} + +async function graphqlData( + accessToken: string, + query: string, + variables: Record, +): Promise | null> { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); + try { + const resp = await fetch(LINEAR_GRAPHQL_URL, { + method: 'POST', + headers: { 'Authorization': `Bearer ${accessToken}`, 'Content-Type': 'application/json' }, + body: JSON.stringify({ query, variables }), + signal: controller.signal, + }); + if (!resp.ok) { + logger.warn('Linear feedback GraphQL non-2xx', { status: resp.status }); + return null; + } + const body = (await resp.json()) as { data?: Record; errors?: unknown }; + if (body.errors) { + logger.warn('Linear feedback GraphQL errors', { errors: body.errors }); + return null; + } + return body.data ?? null; + } catch (err) { + logger.warn('Linear feedback request failed', { + error: err instanceof Error ? err.message : String(err), + }); + return null; + } finally { + clearTimeout(timer); + } +} + /** * Outcome of a Linear API call. ``retryable`` distinguishes transient * failures (network error, request timeout, HTTP 5xx/429) — where a @@ -165,6 +317,35 @@ export async function postIssueComment( return graphqlRequest(token, COMMENT_CREATE_MUTATION, { issueId, body }); } +/** + * Upsert the orchestration live status block (#247 #3): if + * ``existingCommentId`` is given, EDIT that comment in place; otherwise + * CREATE a fresh comment and return its id so the caller can persist it and + * edit on the next transition. Returns the comment id on success (the + * existing id on update, the new id on create), or null on any failure. + * Best-effort — never throws; the status block is advisory. + */ +export async function upsertStatusComment( + ctx: LinearFeedbackContext, + issueId: string, + body: string, + existingCommentId?: string, +): Promise { + const token = await resolveToken(ctx); + if (!token) return null; + + if (existingCommentId) { + // graphqlRequest now returns a LinearPostResult — read .ok (an object is + // always truthy, so a bare `ok ?` would wrongly report success). + const ok = (await graphqlRequest(token, COMMENT_UPDATE_MUTATION, { id: existingCommentId, body })).ok; + return ok ? existingCommentId : null; + } + + const data = await graphqlData(token, COMMENT_CREATE_RETURNING_ID_MUTATION, { issueId, body }); + const created = data?.commentCreate as { success?: boolean; comment?: { id?: string } } | undefined; + return created?.success && created.comment?.id ? created.comment.id : null; +} + /** * Add an emoji reaction onto a Linear issue. Defaults to ❌ — the failure marker * the agent uses on the success/failure side. Same result contract as @@ -180,6 +361,121 @@ export async function addIssueReaction( return graphqlRequest(token, REACTION_CREATE_MUTATION, { issueId, emoji }); } +/** + * React to a specific Linear COMMENT (#247 UX.3 ack model). Used as the + * instant "on it" acknowledgement when a human ``@bgagent``s a comment — + * 👀 ({@link EMOJI_STARTED}) lands immediately, before the iteration task is + * even created, so the human knows the agent saw their request with zero + * comment clutter. Best-effort; returns true on success. + */ +export async function reactToComment( + ctx: LinearFeedbackContext, + commentId: string, + emoji: string = EMOJI_STARTED, +): Promise { + const token = await resolveToken(ctx); + if (!token) return false; + // graphqlRequest returns a LinearPostResult (upstream #311/#332); this + // best-effort helper just needs the success bool. + return (await graphqlRequest(token, REACTION_CREATE_ON_COMMENT_MUTATION, { commentId, emoji })).ok; +} + +/** + * Post a THREADED REPLY beneath a Linear comment (#247 UX.3 ack model). Used + * when the agent's work on an ``@bgagent`` comment lands ("✅ Updated — PR #178") + * or fails ("❌ …"). Unlike an edit, a reply NOTIFIES and reads as a + * conversation turn under the original request, keeping the thread contextual. + * Returns the new reply's comment id (for a possible later edit) or null on any + * failure. Best-effort — never throws. + * + * ``issueId`` is the issue the parent comment lives on — Linear requires it on + * ``commentCreate`` even for a reply (see {@link COMMENT_REPLY_RETURNING_ID_MUTATION}). + */ +export async function replyToComment( + ctx: LinearFeedbackContext, + issueId: string, + parentCommentId: string, + body: string, +): Promise { + const token = await resolveToken(ctx); + if (!token) return null; + const data = await graphqlData(token, COMMENT_REPLY_RETURNING_ID_MUTATION, { + issueId, parentId: parentCommentId, body, + }); + const created = data?.commentCreate as { success?: boolean; comment?: { id?: string } } | undefined; + return created?.success && created.comment?.id ? created.comment.id : null; +} + +/** + * Swap the PARENT epic's bgagent status marker so only ONE is shown at a + * time (👀 → ✅/❌), mirroring the children's reaction behaviour. The + * children capture the reaction id in-process and delete it; the parent's + * markers are added across SEPARATE lambda invocations (👀 at seed, ✅/❌ at + * completion), so we instead query the issue's reactions, delete every + * bgagent marker EXCEPT the target, then add the target if absent. Only + * bgagent emojis (👀/✅/❌) are ever removed — a human's reaction is left + * untouched. Best-effort; returns true if the target marker is present + * afterwards. + */ +export async function swapIssueReaction( + ctx: LinearFeedbackContext, + issueId: string, + emoji: string, +): Promise { + const token = await resolveToken(ctx); + if (!token) return false; + + const data = await graphqlData(token, ISSUE_REACTIONS_QUERY, { issueId }); + const reactions = ((data?.issue as { reactions?: Array<{ id: string; emoji: string }> } | undefined)?.reactions) ?? []; + + // Delete our stale markers (any bgagent emoji that isn't the target). + let targetPresent = false; + for (const r of reactions) { + if (r.emoji === emoji) { targetPresent = true; continue; } + if (BGAGENT_EMOJIS.has(r.emoji)) { + await graphqlRequest(token, REACTION_DELETE_MUTATION, { id: r.id }); + } + } + + if (targetPresent) return true; // already the only marker after the deletes above + return (await graphqlRequest(token, REACTION_CREATE_MUTATION, { issueId, emoji })).ok; +} + +/** + * Swap the bgagent status marker on a COMMENT (👀 → ✅/❌), so the trigger + * comment shows ONE marker reflecting the outcome — mirrors + * {@link swapIssueReaction} but on a comment (#247 UX.21). The 👀 lands at + * receipt ({@link reactToComment}); when the iteration settles we swap it for + * ✅ (success) / ❌ (failure) so the comment itself reads done at a glance, not + * just the threaded reply. Queries the comment's reactions, deletes every + * bgagent marker except the target, adds the target if absent. Only bgagent + * emojis (👀/✅/❌) are removed — a human's reaction is never touched. + * Idempotent (a reconciler redelivery re-converges to the same single marker). + * Best-effort; returns true if the target marker is present afterwards. + */ +export async function swapCommentReaction( + ctx: LinearFeedbackContext, + commentId: string, + emoji: string, +): Promise { + const token = await resolveToken(ctx); + if (!token) return false; + + const data = await graphqlData(token, COMMENT_REACTIONS_QUERY, { commentId }); + const reactions = ((data?.comment as { reactions?: Array<{ id: string; emoji: string }> } | undefined)?.reactions) ?? []; + + let targetPresent = false; + for (const r of reactions) { + if (r.emoji === emoji) { targetPresent = true; continue; } + if (BGAGENT_EMOJIS.has(r.emoji)) { + await graphqlRequest(token, REACTION_DELETE_MUTATION, { id: r.id }); + } + } + + if (targetPresent) return true; + return (await graphqlRequest(token, REACTION_CREATE_ON_COMMENT_MUTATION, { commentId, emoji })).ok; +} + /** * Convenience: post a feedback comment **and** drop a ❌ reaction in one call. * Both calls run in parallel; both are best-effort. Returns void — callers @@ -195,3 +491,103 @@ export async function reportIssueFailure( addIssueReaction(ctx, issueId, EMOJI_FAILURE), ]); } + +/** + * Pick the target workflow state by semantic preference. ``preferredNames`` + * (case-insensitive) is tried first so e.g. "In Review" wins over "In + * Progress" when both share Linear ``type: started``; falls back to the + * lowest-``position`` state of ``type``. Returns null if the team has no + * state of that type. + */ +function pickState( + states: readonly TeamState[], + type: string, + preferredNames: readonly string[], +): TeamState | null { + const ofType = states.filter((s) => s.type === type); + if (ofType.length === 0) return null; + for (const name of preferredNames) { + const hit = ofType.find((s) => s.name.toLowerCase() === name.toLowerCase()); + if (hit) return hit; + } + return [...ofType].sort((a, b) => a.position - b.position)[0]; +} + +/** + * Transition a Linear issue to a workflow state chosen by semantic ``type`` + * (+ optional name preference). Used by the #247 reconciler to move the + * PARENT epic through its lifecycle — ``In Progress`` when the orchestration + * seeds, ``In Review`` when all children succeed — since the parent spawns no + * task and Linear's GitHub automation (which moves the children on PR-open) + * never touches it. + * + * Best-effort, like the rest of this module: resolves the team's states, + * picks the target, and issues ``issueUpdate``. Returns true only on a + * confirmed transition. Skips (returns false) if the issue is already in the + * target state or moving backward (we never demote, e.g. a human already + * pushed the epic to Done). Never throws. + */ +export async function transitionIssueState( + ctx: LinearFeedbackContext, + issueId: string, + targetType: 'started' | 'completed', + preferredNames: readonly string[] = [], +): Promise { + const token = await resolveToken(ctx); + if (!token) return false; + + const data = await graphqlData(token, ISSUE_TEAM_STATES_QUERY, { issueId }); + const issue = data?.issue as + | { state?: TeamState; team?: { states?: { nodes?: TeamState[] } } } + | undefined; + const states = issue?.team?.states?.nodes ?? []; + if (states.length === 0) { + logger.warn('Linear state transition: no team states resolved', { issue_id: issueId }); + return false; + } + + const target = pickState(states, targetType, preferredNames); + if (!target) { + logger.warn('Linear state transition: no state of target type', { issue_id: issueId, target_type: targetType }); + return false; + } + + const current = issue?.state; + if (current?.id === target.id) { + // Already there — idempotent no-op (e.g. reconciler re-fires). + return false; + } + // Never move backward. Order by state TYPE first (the lifecycle: + // backlog → unstarted → started → completed/canceled), then by position + // within the same type. Raw position is NOT lifecycle order — e.g. Done + // (completed, position 3) sorts numerically before In Review (started, + // position 1002), so a position-only guard would wrongly demote a + // human-completed epic back to In Review. We never demote across types + // (a human/automation advanced it) nor backward within a type. + if (current) { + const TYPE_RANK: Record = { + backlog: 0, unstarted: 1, started: 2, completed: 3, canceled: 3, triage: 0, + }; + const curRank = TYPE_RANK[current.type] ?? 0; + const tgtRank = TYPE_RANK[target.type] ?? 0; + const backward = curRank > tgtRank || (curRank === tgtRank && current.position >= target.position); + if (backward) { + logger.info('Linear state transition: skipping backward move', { + issue_id: issueId, + current_state: current.name, + target_state: target.name, + }); + return false; + } + } + + const ok = (await graphqlRequest(token, ISSUE_SET_STATE_MUTATION, { issueId, stateId: target.id })).ok; + if (ok) { + logger.info('Linear issue state transitioned', { + issue_id: issueId, + from: current?.name, + to: target.name, + }); + } + return ok; +} diff --git a/cdk/src/handlers/shared/linear-issue-context-probe.ts b/cdk/src/handlers/shared/linear-issue-context-probe.ts new file mode 100644 index 00000000..5c613d7e --- /dev/null +++ b/cdk/src/handlers/shared/linear-issue-context-probe.ts @@ -0,0 +1,172 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { logger } from './logger'; + +/** + * Best-effort probe for additional Linear context attached to an issue — + * paperclip attachments and project documents — that the agent should + * fetch on demand via the Linear MCP at runtime. + * + * The webhook payload itself does NOT carry attachments or + * project.documents, so we ask Linear's GraphQL API once at task-creation + * time. The result is a tiny presence signal (titles + counts) that lets + * the webhook processor prepend a hint to the task description; it does + * NOT pre-fetch bodies, screen content, or upload to S3 — that path is + * still owned by `extractImageUrlAttachments` for description-embedded + * markdown images. + */ + +const LINEAR_GRAPHQL_URL = 'https://api.linear.app/graphql'; +const REQUEST_TIMEOUT_MS = 5000; + +/** + * Cap on attachment titles listed inline in the task-description hint; any + * beyond this are summarized as "(+N more)" so the prepended hint stays short. + */ +const MAX_HINTED_ATTACHMENT_TITLES = 5; + +const ISSUE_CONTEXT_QUERY = ` +query IssueContext($id: String!) { + issue(id: $id) { + id + attachments(first: 25) { + nodes { + id + title + } + } + project { + id + name + documents(first: 1) { + nodes { id } + } + } + } +} +`.trim(); + +export interface LinearIssueContextProbe { + /** Paperclip attachment titles surfaced on the issue, if any. */ + readonly attachmentTitles: readonly string[]; + /** Project name (only present when the issue belongs to a project). */ + readonly projectName: string | null; + /** True when the issue's project has at least one document attached. */ + readonly projectHasDocuments: boolean; +} + +const EMPTY: LinearIssueContextProbe = { + attachmentTitles: [], + projectName: null, + projectHasDocuments: false, +}; + +/** + * Issue the GraphQL query. Returns an empty probe on any failure + * (network, auth, GraphQL errors). Never throws — the caller treats + * absence of context the same as no extra context being available. + */ +export async function probeLinearIssueContext( + accessToken: string, + issueId: string, +): Promise { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); + try { + const resp = await fetch(LINEAR_GRAPHQL_URL, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${accessToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query: ISSUE_CONTEXT_QUERY, + variables: { id: issueId }, + }), + signal: controller.signal, + }); + if (!resp.ok) { + logger.warn('Linear issue context probe non-2xx', { status: resp.status, issue_id: issueId }); + return EMPTY; + } + const body = (await resp.json()) as { + data?: { + issue?: { + attachments?: { nodes?: Array<{ id?: string; title?: string }> }; + project?: { + id?: string; + name?: string; + documents?: { nodes?: Array<{ id?: string }> }; + } | null; + }; + }; + errors?: unknown; + }; + if (body.errors) { + logger.warn('Linear issue context probe graphql errors', { issue_id: issueId, errors: body.errors }); + return EMPTY; + } + const issue = body.data?.issue; + if (!issue) return EMPTY; + const attachmentTitles = (issue.attachments?.nodes ?? []) + .map((a) => (typeof a?.title === 'string' ? a.title.trim() : '')) + .filter((t): t is string => t.length > 0); + const project = issue.project ?? null; + const projectName = typeof project?.name === 'string' && project.name.trim() ? project.name.trim() : null; + const projectHasDocuments = (project?.documents?.nodes ?? []).length > 0; + return { attachmentTitles, projectName, projectHasDocuments }; + } catch (err) { + logger.warn('Linear issue context probe request failed', { + issue_id: issueId, + error: err instanceof Error ? err.message : String(err), + }); + return EMPTY; + } finally { + clearTimeout(timer); + } +} + +/** + * Render a one-paragraph hint the webhook processor prepends to the task + * description when the probe surfaced anything worth flagging. Returns + * an empty string when there's nothing to hint about — the processor + * skips the prepend in that case. + * + * The wording deliberately points at MCP tool names so the agent's + * channel-prompt addendum reinforces (and is reinforced by) the same + * vocabulary. + */ +export function renderIssueContextHint(probe: LinearIssueContextProbe): string { + const bits: string[] = []; + if (probe.attachmentTitles.length > 0) { + const titles = probe.attachmentTitles + .slice(0, MAX_HINTED_ATTACHMENT_TITLES).map((t) => `"${t}"`).join(', '); + const more = probe.attachmentTitles.length > MAX_HINTED_ATTACHMENT_TITLES + ? ` (+${probe.attachmentTitles.length - MAX_HINTED_ATTACHMENT_TITLES} more)` : ''; + bits.push(`paperclip attachments — ${titles}${more} (fetch via \`mcp__linear-server__get_issue\` then \`mcp__linear-server__get_attachment\`)`); + } + if (probe.projectHasDocuments && probe.projectName) { + bits.push(`project "${probe.projectName}" has wiki documents (browse with \`mcp__linear-server__list_documents\` if the task is ambiguous)`); + } else if (probe.projectHasDocuments) { + bits.push('the project has wiki documents (browse with `mcp__linear-server__list_documents` if the task is ambiguous)'); + } + if (bits.length === 0) return ''; + return `Linear may have additional context for this issue: ${bits.join('; ')}.`; +} diff --git a/cdk/src/handlers/shared/linear-issue-lookup.ts b/cdk/src/handlers/shared/linear-issue-lookup.ts index b2373887..f3ce1485 100644 --- a/cdk/src/handlers/shared/linear-issue-lookup.ts +++ b/cdk/src/handlers/shared/linear-issue-lookup.ts @@ -52,6 +52,35 @@ export function extractLinearIdentifier(text: string | null | undefined): string return match ? `${match[1]}-${match[2]}` : null; } +/** + * Pull the Linear identifier out of an ABCA-generated git branch name. + * + * This is the *authoritative* identifier source for the screenshot + * router, and it must be tried before PR title/body. ABCA derives every + * task branch as `bgagent/{taskId}/{slug}` where the slug is + * `slugify("ABCA-151: ")` — so the identifier is ALWAYS the + * leading slug segment (see `generateBranchName` / `slugify` in + * `gateway.ts`, and the `${identifier}: ${title}` description built in + * `linear-webhook-processor.ts` / `orchestration-release.ts`). + * + * Why branch-first matters (issue #247): in a stacked sub-issue + * orchestration, an agent's PR *body* commonly narrates the predecessor + * issue ("cherry-picked from ABCA-151 … Closes ABCA-152") before the + * issue the PR actually closes. `extractLinearIdentifier` returns the + * first match in document order, so body-first routing misattributes the + * screenshot to the predecessor. The branch name has no such ambiguity — + * it encodes exactly one issue, the PR's own. + * + * The slug is lowercased by `slugify`, so we upper-case before matching + * (the identifier regex anchors on `[A-Z]`). The ULID `taskId` segment + * contains no `-`, so it can never produce a false `<KEY>-<n>` match + * ahead of the real identifier. + */ +export function extractLinearIdentifierFromBranch(branchName: string | null | undefined): string | null { + if (!branchName) return null; + return extractLinearIdentifier(branchName.toUpperCase()); +} + /** * Resolved Linear issue location, paired with the workspace that owns * it. The screenshot processor uses these to construct a diff --git a/cdk/src/handlers/shared/linear-subissue-fetch.ts b/cdk/src/handlers/shared/linear-subissue-fetch.ts new file mode 100644 index 00000000..7380379c --- /dev/null +++ b/cdk/src/handlers/shared/linear-subissue-fetch.ts @@ -0,0 +1,274 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Fetch a Linear parent issue's sub-issue dependency graph (issue #247, + * Mode A — PR A2). Reads ``children`` (sub-issues) and, per child, its + * ``inverseRelations`` of type ``blocks`` (the issues that block it) to + * build ``depends_on`` edges, then hands the result to + * ``orchestration-dag.ts::validateDag``. + * + * Direct GraphQL against Linear, Bearer-authenticated with the + * per-workspace OAuth token resolved by ``resolveLinearOauthToken``. + * Mirrors the request shape proven in ``linear-feedback.ts``. + * + * Unlike the best-effort feedback path, discovery is load-bearing: a + * fetch failure must be distinguishable from "this issue genuinely has + * no sub-issues" so the caller (the webhook processor) can decide + * whether to fall back to a single task or surface an error. Hence the + * discriminated ``FetchSubIssueGraphResult`` rather than a bare array. + */ + +import { logger } from './logger'; +import type { DagNode } from './orchestration-dag'; + +const LINEAR_GRAPHQL_URL = 'https://api.linear.app/graphql'; + +const REQUEST_TIMEOUT_MS = 8000; + +/** Linear `IssueRelation.type` value meaning "source blocks target". */ +const RELATION_TYPE_BLOCKS = 'blocks'; + +/** + * Page size for the children / relations connections. Bounded by + * ``max_sub_issues`` policy downstream; a parent with more children + * than this is over-cap and will be rejected before execution, so a + * single page is sufficient for the MVP (no cursor pagination). + */ +const CONNECTION_PAGE_SIZE = 100; + +/** + * GraphQL: fetch a parent issue's children and each child's blockers. + * + * For child C, ``inverseRelations`` of type ``blocks`` are relations + * whose *source* issue blocks C — i.e. C's predecessors. We take the + * related issue id from each as a ``depends_on`` edge. + */ +const SUB_ISSUE_GRAPH_QUERY = ` +query SubIssueGraph($issueId: String!, $first: Int!) { + issue(id: $issueId) { + id + identifier + children(first: $first) { + nodes { + id + identifier + title + inverseRelations(first: $first) { + nodes { + type + issue { id } + } + } + } + } + } +} +`.trim(); + +/** One sub-issue plus the metadata the orchestration row needs. */ +export interface SubIssueNode extends DagNode { + /** Linear sub-issue UUID (same as ``id``). */ + readonly id: string; + /** Human-readable identifier (e.g. ``ENG-42``) for comments/logs. */ + readonly identifier?: string; + /** Sub-issue title for the task description. */ + readonly title?: string; + /** Sub-issue ids that block this one (intra-epic predecessors). */ + readonly depends_on: readonly string[]; +} + +export type FetchSubIssueGraphResult = + | { readonly kind: 'ok'; readonly parentIssueId: string; readonly children: readonly SubIssueNode[] } + | { readonly kind: 'no_children'; readonly parentIssueId: string } + | { readonly kind: 'error'; readonly message: string }; + +interface RawRelationNode { + readonly type?: string; + readonly issue?: { readonly id?: string } | null; +} + +interface RawChildNode { + readonly id?: string; + readonly identifier?: string; + readonly title?: string; + readonly inverseRelations?: { readonly nodes?: readonly RawRelationNode[] } | null; +} + +interface RawSubIssueGraph { + readonly data?: { + readonly issue?: { + readonly id?: string; + readonly children?: { readonly nodes?: readonly RawChildNode[] } | null; + } | null; + }; + readonly errors?: unknown; +} + +export interface FetchSubIssueGraphOptions { + /** Override fetch for tests. */ + readonly fetchImpl?: typeof fetch; +} + +/** + * Fetch + shape a parent issue's sub-issue dependency graph. + * + * Returns: + * - ``ok`` — at least one child; ``children`` carry ``depends_on`` + * edges restricted to siblings within this child set (edges pointing + * outside the set are dropped here and surface as a dangling-edge + * rejection only if the caller chooses to keep them; we keep them so + * ``validateDag`` can flag a genuinely malformed graph). + * - ``no_children`` — the issue exists but has no sub-issues (caller + * falls back to a single task). + * - ``error`` — network / auth / GraphQL failure (caller surfaces + * a retryable error; does NOT silently treat as "no children"). + * + * Never throws. + */ +export async function fetchSubIssueGraph( + accessToken: string, + parentIssueId: string, + options: FetchSubIssueGraphOptions = {}, +): Promise<FetchSubIssueGraphResult> { + const fetchImpl = options.fetchImpl ?? fetch; + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); + + let raw: RawSubIssueGraph; + try { + const resp = await fetchImpl(LINEAR_GRAPHQL_URL, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${accessToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + query: SUB_ISSUE_GRAPH_QUERY, + variables: { issueId: parentIssueId, first: CONNECTION_PAGE_SIZE }, + }), + signal: controller.signal, + }); + if (!resp.ok) { + logger.warn('Linear sub-issue fetch non-2xx', { status: resp.status, parent_issue_id: parentIssueId }); + return { kind: 'error', message: `Linear API returned status ${resp.status}.` }; + } + raw = (await resp.json()) as RawSubIssueGraph; + } catch (err) { + logger.warn('Linear sub-issue fetch failed', { + parent_issue_id: parentIssueId, + error: err instanceof Error ? err.message : String(err), + }); + return { kind: 'error', message: 'Could not reach the Linear API to read sub-issues.' }; + } finally { + clearTimeout(timer); + } + + if (raw.errors) { + logger.warn('Linear sub-issue fetch GraphQL errors', { parent_issue_id: parentIssueId, errors: raw.errors }); + return { kind: 'error', message: 'Linear API reported an error reading sub-issues.' }; + } + + const issue = raw.data?.issue; + if (!issue || !issue.id) { + return { kind: 'error', message: 'Linear issue not found or not accessible with the workspace token.' }; + } + + const childNodes = issue.children?.nodes ?? []; + if (childNodes.length === 0) { + return { kind: 'no_children', parentIssueId: issue.id }; + } + + // Restrict depends_on edges to ids that are themselves children of + // this parent — a "blocks" relation pointing at an issue outside the + // epic is not an intra-epic ordering constraint. (validateDag also + // guards dangling edges, but filtering here keeps the persisted graph + // clean and the dangling check meaningful for genuinely malformed + // intra-epic references only.) + const childIds = new Set( + childNodes.map((c) => c.id).filter((id): id is string => typeof id === 'string'), + ); + + const children: SubIssueNode[] = []; + for (const c of childNodes) { + if (!c.id) continue; + const blockers = (c.inverseRelations?.nodes ?? []) + .filter((r) => r.type === RELATION_TYPE_BLOCKS) + .map((r) => r.issue?.id) + .filter((id): id is string => typeof id === 'string' && id !== c.id && childIds.has(id)); + children.push({ + id: c.id, + ...(c.identifier !== undefined && { identifier: c.identifier }), + ...(c.title !== undefined && { title: c.title }), + // Dedup edges (Linear can surface a relation from both directions). + depends_on: [...new Set(blockers)], + }); + } + + return { kind: 'ok', parentIssueId: issue.id, children }; +} + +/** GraphQL: an issue's parent id (for the A6 comment trigger — sub-issue → parent). */ +const ISSUE_PARENT_QUERY = ` +query IssueParent($issueId: String!) { + issue(id: $issueId) { id parent { id } } +}`; + +/** + * Fetch a sub-issue's parent issue id (#247 A6 comment trigger). A Linear + * comment names the issue it is on (the sub-issue); to find its orchestration + * we need the PARENT (orchestration_id is derived from the parent). Returns the + * parent id, or null when the issue has no parent (a top-level issue — not part + * of any orchestration) or on any fetch/auth/GraphQL failure. Never throws. + */ +export async function fetchIssueParentId( + accessToken: string, + issueId: string, + options: FetchSubIssueGraphOptions = {}, +): Promise<string | null> { + const fetchImpl = options.fetchImpl ?? fetch; + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); + try { + const resp = await fetchImpl(LINEAR_GRAPHQL_URL, { + method: 'POST', + headers: { 'Authorization': `Bearer ${accessToken}`, 'Content-Type': 'application/json' }, + body: JSON.stringify({ query: ISSUE_PARENT_QUERY, variables: { issueId } }), + signal: controller.signal, + }); + if (!resp.ok) { + logger.warn('Linear issue-parent fetch non-2xx', { status: resp.status, issue_id: issueId }); + return null; + } + const raw = (await resp.json()) as { data?: { issue?: { parent?: { id?: string } } }; errors?: unknown }; + if (raw.errors) { + logger.warn('Linear issue-parent fetch GraphQL errors', { issue_id: issueId, errors: raw.errors }); + return null; + } + return raw.data?.issue?.parent?.id ?? null; + } catch (err) { + logger.warn('Linear issue-parent fetch failed', { + issue_id: issueId, + error: err instanceof Error ? err.message : String(err), + }); + return null; + } finally { + clearTimeout(timer); + } +} diff --git a/cdk/src/handlers/shared/linear-task-by-issue.ts b/cdk/src/handlers/shared/linear-task-by-issue.ts new file mode 100644 index 00000000..c333e7f9 --- /dev/null +++ b/cdk/src/handlers/shared/linear-task-by-issue.ts @@ -0,0 +1,94 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { type DynamoDBDocumentClient, QueryCommand } from '@aws-sdk/lib-dynamodb'; +import { logger } from './logger'; +import { TaskTable } from '../../constructs/task-table'; + +/** + * The fields the #247 UX.3 standalone comment trigger needs from the newest + * ABCA task that worked on a given Linear issue. Projected by the + * ``LinearIssueIndex`` GSI. + */ +export interface LinearIssueTask { + readonly task_id: string; + readonly user_id?: string; + readonly repo?: string; + readonly pr_url?: string; + readonly pr_number?: number; + readonly status?: string; +} + +/** + * Resolve a Linear issue UUID → its NEWEST ABCA task via the sparse + * ``LinearIssueIndex`` GSI (#247 UX.3). The GSI is keyed + * ``(linear_issue_id, created_at)``; we query descending and take the first + * row, so a re-labelled / re-run issue resolves to its latest task (the one + * holding the live PR). Returns null when no task exists for the issue (the + * issue was never run by ABCA, or its task predates the GSI back-fill) or on + * any error — the caller treats null as "not an ABCA-owned issue, ignore". + * + * Best-effort: never throws. + */ +export async function resolveTaskByLinearIssue( + ddb: DynamoDBDocumentClient, + taskTableName: string, + linearIssueId: string, +): Promise<LinearIssueTask | null> { + try { + const res = await ddb.send(new QueryCommand({ + TableName: taskTableName, + IndexName: TaskTable.LINEAR_ISSUE_INDEX, + KeyConditionExpression: 'linear_issue_id = :iid', + ExpressionAttributeValues: { ':iid': linearIssueId }, + ScanIndexForward: false, // newest created_at first + Limit: 1, + })); + const item = res.Items?.[0]; + if (!item) return null; + return { + task_id: item.task_id as string, + ...(item.user_id !== undefined && { user_id: item.user_id as string }), + ...(item.repo !== undefined && { repo: item.repo as string }), + ...(item.pr_url !== undefined && { pr_url: item.pr_url as string }), + ...(item.pr_number !== undefined && { pr_number: item.pr_number as number }), + ...(item.status !== undefined && { status: item.status as string }), + }; + } catch (err) { + logger.warn('UX.3 standalone: LinearIssueIndex query failed — treating issue as non-ABCA', { + linear_issue_id: linearIssueId, + error: err instanceof Error ? err.message : String(err), + }); + return null; + } +} + +/** + * Extract a PR number from a task's ``pr_number`` (preferred) or by parsing + * ``/pull/<n>`` out of ``pr_url``. Returns null when neither yields a number — + * the task ran but never opened a PR, so there's nothing to iterate on. + */ +export function prNumberFromTask(task: LinearIssueTask): number | null { + if (typeof task.pr_number === 'number') return task.pr_number; + if (typeof task.pr_url === 'string') { + const m = task.pr_url.match(/\/pull\/(\d+)\b/); + if (m) return Number(m[1]); + } + return null; +} diff --git a/cdk/src/handlers/shared/orchestration-base-branch.ts b/cdk/src/handlers/shared/orchestration-base-branch.ts new file mode 100644 index 00000000..8f054033 --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-base-branch.ts @@ -0,0 +1,93 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Pure base-branch selection for stacked child PRs (#247 A4). + * + * A released child must SEE its predecessors' code without waiting for a + * human merge. A git branch has exactly one base, so: + * - 0 predecessors (root) → branch off the repo default branch (main). + * - 1 predecessor (linear) → stack: base = that predecessor's branch + * (a true stacked PR; the child's diff shows only its own changes). + * - N predecessors (diamond) → branch off main and MERGE all + * predecessor branches into the child's branch before work starts, + * so the child sees every predecessor's code. (No human merge needed; + * starts as soon as all predecessors are task-complete.) + * + * Pure: takes the predecessors' resolved branch names + the repo default + * branch, returns the base + merge-list the release path threads to the + * agent. No I/O, so the diamond/linear/root branching is unit-testable in + * isolation. + */ + +/** A predecessor whose branch the child may stack on / merge in. */ +export interface PredecessorBranch { + readonly sub_issue_id: string; + /** The predecessor task's current head branch (persisted branch_name). */ + readonly branch_name: string; +} + +export interface BaseBranchSelection { + /** Branch the child is cut from (and its PR targets). */ + readonly base_branch: string; + /** + * Predecessor branches to merge into the child's branch before work + * (multi-predecessor only). Empty for root + linear children. + */ + readonly merge_branches: readonly string[]; + /** Shape, for logging/observability. */ + readonly shape: 'root' | 'linear' | 'diamond'; +} + +export interface SelectBaseBranchParams { + /** Predecessors of the child being released (already terminal-success). */ + readonly predecessors: readonly PredecessorBranch[]; + /** Repo default branch (root base / diamond base). Defaults to 'main'. */ + readonly defaultBranch?: string; +} + +/** + * Choose a child's base branch + any predecessor branches to merge in. + * + * Predecessors missing a usable ``branch_name`` are dropped from the + * merge/stack decision (they can't be stacked on); if that leaves a + * single-predecessor child with no branch, it degrades to a root-style + * branch off main rather than producing an invalid base. + */ +export function selectBaseBranch(params: SelectBaseBranchParams): BaseBranchSelection { + const defaultBranch = params.defaultBranch ?? 'main'; + // Dedup BEFORE the count check: two predecessors resolving to the same + // branch are one stack target, not a diamond — stack cleanly rather + // than needlessly branching off main to "merge" a single branch. + const branches = [...new Set( + params.predecessors + .map((p) => p.branch_name) + .filter((b): b is string => typeof b === 'string' && b.length > 0), + )].sort(); + + if (branches.length === 0) { + return { base_branch: defaultBranch, merge_branches: [], shape: 'root' }; + } + if (branches.length === 1) { + return { base_branch: branches[0], merge_branches: [], shape: 'linear' }; + } + // Diamond: branch off the default branch, merge every distinct + // predecessor branch in (already deduped + sorted above). + return { base_branch: defaultBranch, merge_branches: branches, shape: 'diamond' }; +} diff --git a/cdk/src/handlers/shared/orchestration-comment-trigger.ts b/cdk/src/handlers/shared/orchestration-comment-trigger.ts new file mode 100644 index 00000000..71868ef2 --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-comment-trigger.ts @@ -0,0 +1,113 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Pure logic for the A6 comment trigger (#247 redesign). A reviewer who wants + * a sub-issue's PR changed mentions ``@bgagent`` in a Linear comment on that + * sub-issue; the platform runs a ``coding/pr-iteration-v1`` task on the + * sub-issue's PR (and the reconciler then cascades the re-stack to dependents). + * + * This module decides — from a comment body alone — whether the comment is an + * instruction for the agent and what the instruction text is. Kept pure (no + * I/O, no Linear/AWS types) so the mention parsing is unit-testable and reused + * regardless of how the comment arrives. The processor does the I/O (resolve + * sub-issue → orchestration → PR, spawn the task). + */ + +/** The mention token that turns a Linear comment into an agent instruction. */ +export const MENTION_TOKEN = '@bgagent'; + +export interface CommentTrigger { + /** True when the comment is an explicit instruction for the agent. */ + readonly triggered: boolean; + /** + * The instruction text with the mention token stripped, trimmed. Empty when + * not triggered, or when the mention had no accompanying text (the caller + * treats an empty instruction as "address the latest review" — still valid). + */ + readonly instruction: string; +} + +/** + * Decide whether a comment body is an ``@bgagent`` instruction, and extract + * the instruction text. + * + * Rules (deliberately strict to avoid false-positives on human discussion and, + * critically, on the agent's OWN progress comments which never contain the + * mention token): + * - Must contain ``@bgagent`` (case-insensitive), as a token boundary so + * ``@bgagentx`` / an email-like ``foo@bgagent.io`` do NOT trigger. + * - The instruction is everything after stripping the token (all occurrences), + * collapsed/trimmed. A bare ``@bgagent`` with no text still triggers + * (instruction === ''). + */ +export function parseCommentTrigger(body: string | undefined | null): CommentTrigger { + if (!body) return { triggered: false, instruction: '' }; + // SELF-COMMENT GUARD (#247 UX.20 — live-caught infinite loop): the bot's OWN + // rendered comments must NEVER trigger it, or it talks to itself forever. + // This bit me when the disambiguation reply embedded a literal "@bgagent + // ABCA-123: …" example — the reply re-matched the mention and spawned another + // reply, ~50 deep. The agent's progress comments are also bot-authored. + // Cheapest robust signal that needs no actor-identity config: a body that + // STARTS WITH one of our own template markers is ours, not a user + // instruction. (Linear strips a leading emoji to its own line sometimes, so + // we test the trimmed start.) Keep this list in sync with the rendered + // comment prefixes (panel, acks, disambiguation, agent progress). + if (isBotAuthoredComment(body)) return { triggered: false, instruction: '' }; + // Token-boundary match: @bgagent not immediately followed by a word char or + // a '.' (so it won't fire on @bgagentbot or an @bgagent.io address). + const re = /@bgagent(?![\w.])/gi; + if (!re.test(body)) return { triggered: false, instruction: '' }; + const instruction = body.replace(/@bgagent(?![\w.])/gi, ' ').replace(/\s+/g, ' ').trim(); + return { triggered: true, instruction }; +} + +/** + * Markers that begin a comment the BOT itself rendered (panel, acks, + * disambiguation reply, agent progress). A comment starting with any of these + * is never a human instruction — used to break self-trigger loops (#247 UX.20). + */ +const BOT_COMMENT_PREFIXES = [ + '👋', // disambiguation "which sub-issue?" reply + '✅', // "✅ Updated — PR #…" ack / "✅ **ABCA orchestration complete**" panel + '❌', // failure reply + '⚠️', // "finished with failures" panel + '🔄', // in-progress panel + '🤖', // agent progress ("🤖 Starting…") + '🖼️', // preview screenshot comment + '🔗', // "PR opened" / combined-PR +] as const; + +/** True when ``body`` is one of the bot's own rendered comments (loop guard). */ +export function isBotAuthoredComment(body: string): boolean { + const trimmed = body.trimStart(); + return BOT_COMMENT_PREFIXES.some((p) => trimmed.startsWith(p)); +} + +/** + * Build the task description handed to ``coding/pr-iteration-v1`` from the + * comment instruction. When the reviewer left explicit text, that IS the + * instruction; when they only mentioned ``@bgagent`` with no text, fall back + * to a generic "address the latest review feedback on this PR" so the agent + * still has a directive. + */ +export function buildIterationInstruction(trigger: CommentTrigger): string { + if (trigger.instruction.length > 0) return trigger.instruction; + return 'Address the latest review feedback on this pull request.'; +} diff --git a/cdk/src/handlers/shared/orchestration-dag.ts b/cdk/src/handlers/shared/orchestration-dag.ts new file mode 100644 index 00000000..ea52c710 --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-dag.ts @@ -0,0 +1,193 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Pure dependency-graph (DAG) logic for Linear parent/sub-issue + * orchestration (issue #247, Mode A — PR A2). No I/O: takes a set of + * nodes with ``depends_on`` edges and either rejects the graph (cycle, + * dangling edge) or returns a topological layering used by the + * reconciler (A3) to release children in dependency order. + * + * Kept deliberately free of Linear/AWS types so it is trivially unit- + * testable and reusable by the Mode B planner (#299), which validates + * its own generated graph with the same cycle check before writing + * sub-issues back to Linear. + */ + +/** A single node in the dependency graph (one Linear sub-issue). */ +export interface DagNode { + /** Stable identifier — the Linear sub-issue id (the orchestration SK). */ + readonly id: string; + /** Ids this node is blocked by; must all reach terminal-success first. */ + readonly depends_on: readonly string[]; +} + +/** Why a graph was rejected. Surfaced to the user as a terminal comment. */ +export type DagRejectionReason = 'cycle' | 'dangling_edge' | 'duplicate_id'; + +export interface DagValidationOk { + readonly ok: true; + /** + * Topological layers. ``layers[0]`` are roots (no predecessors); + * every node in ``layers[n]`` depends only on nodes in + * ``layers[<n]``. The reconciler uses layer 0 as the initial release + * set; deeper layers are released as predecessors succeed. The flat + * order (``layers.flat()``) is a valid topological sort. + */ + readonly layers: readonly (readonly string[])[]; +} + +export interface DagValidationError { + readonly ok: false; + readonly reason: DagRejectionReason; + /** + * The node ids implicated in the rejection — the cycle members, the + * nodes carrying dangling edges, or the duplicated ids. Sorted for + * stable, testable output. + */ + readonly offendingIds: readonly string[]; + /** Human-readable, user-facing explanation (used verbatim in the Linear comment). */ + readonly message: string; +} + +export type DagValidationResult = DagValidationOk | DagValidationError; + +/** + * Validate a dependency graph and, on success, return its topological + * layering. + * + * Rejects (fail-closed — a bad graph must never start any child): + * - ``duplicate_id`` — two nodes share an id (ambiguous gating). + * - ``dangling_edge`` — a ``depends_on`` points at an id not in the node set. + * - ``cycle`` — the edges form a cycle (no valid start order exists). + * + * Uses Kahn's algorithm: repeatedly peel off nodes with zero remaining + * predecessors. Each peel is one layer. If nodes remain when no node + * has zero in-degree, those nodes form (or feed) a cycle. + */ +export function validateDag(nodes: readonly DagNode[]): DagValidationResult { + // ── Duplicate ids ──────────────────────────────────────────────── + const seen = new Set<string>(); + const duplicates = new Set<string>(); + for (const n of nodes) { + if (seen.has(n.id)) duplicates.add(n.id); + seen.add(n.id); + } + if (duplicates.size > 0) { + const ids = [...duplicates].sort(); + return { + ok: false, + reason: 'duplicate_id', + offendingIds: ids, + message: + `Duplicate sub-issue id(s) in the dependency graph: ${ids.join(', ')}. ` + + 'Each sub-issue must appear once.', + }; + } + + // ── Dangling edges (depends_on → unknown id) ───────────────────── + const ids = new Set(nodes.map((n) => n.id)); + const dangling = new Set<string>(); + for (const n of nodes) { + for (const dep of n.depends_on) { + if (!ids.has(dep)) dangling.add(n.id); + } + } + if (dangling.size > 0) { + const offending = [...dangling].sort(); + return { + ok: false, + reason: 'dangling_edge', + offendingIds: offending, + message: + `Sub-issue(s) ${offending.join(', ')} depend on an issue that isn't part ` + + 'of this parent\'s sub-issue set. Blocking relations must stay within the epic.', + }; + } + + // ── Kahn's algorithm: peel zero-in-degree nodes into layers ────── + // in-degree = number of (deduplicated) predecessors still unresolved. + const remainingDeps = new Map<string, Set<string>>(); + for (const n of nodes) { + remainingDeps.set(n.id, new Set(n.depends_on)); + } + + // Reverse adjacency: dep -> nodes that depend on it (to decrement fast). + const dependents = new Map<string, string[]>(); + for (const n of nodes) { + for (const dep of new Set(n.depends_on)) { + const list = dependents.get(dep) ?? []; + list.push(n.id); + dependents.set(dep, list); + } + } + + const layers: string[][] = []; + let frontier = nodes.filter((n) => remainingDeps.get(n.id)!.size === 0).map((n) => n.id); + let resolvedCount = 0; + + while (frontier.length > 0) { + // Sort each layer for deterministic, testable output. + const layer = [...frontier].sort(); + layers.push(layer); + resolvedCount += layer.length; + + const next: string[] = []; + for (const resolvedId of layer) { + for (const dependentId of dependents.get(resolvedId) ?? []) { + const deps = remainingDeps.get(dependentId)!; + deps.delete(resolvedId); + if (deps.size === 0) next.push(dependentId); + } + } + frontier = next; + } + + if (resolvedCount < nodes.length) { + // Whatever never resolved is in (or downstream of) a cycle. + const stuck = nodes + .filter((n) => remainingDeps.get(n.id)!.size > 0) + .map((n) => n.id) + .sort(); + return { + ok: false, + reason: 'cycle', + offendingIds: stuck, + message: + 'The sub-issue blocking relations form a cycle ' + + `(involving: ${stuck.join(', ')}), so there is no valid order to start them. ` + + 'Remove the circular `blocked by` relation and re-apply the trigger.', + }; + } + + return { ok: true, layers }; +} + +/** + * Convenience: the flat topological order (roots first). Only valid to + * call on a graph ``validateDag`` accepted; throws otherwise so a caller + * can't accidentally order a rejected graph. + */ +export function topologicalOrder(nodes: readonly DagNode[]): readonly string[] { + const result = validateDag(nodes); + if (!result.ok) { + throw new Error(`Cannot order an invalid dependency graph: ${result.reason}`); + } + return result.layers.flat(); +} diff --git a/cdk/src/handlers/shared/orchestration-discovery.ts b/cdk/src/handlers/shared/orchestration-discovery.ts new file mode 100644 index 00000000..ec8011ba --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-discovery.ts @@ -0,0 +1,247 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Orchestration discovery composer (issue #247, Mode A — PR A2). + * + * Ties together the three A2 primitives in one decision function the + * webhook processor calls when a parent issue is labeled: + * + * fetchSubIssueGraph → validateDag → seedOrchestration + * + * and returns a single discriminated outcome the caller acts on: + * + * - ``single_task`` — the issue has no sub-issues; the caller should + * fall through to today's one-issue→one-task path (NOT an error). + * - ``seeded`` — a valid DAG was persisted; the reconciler (A3) + * will release children. Carries the orchestration id + initial + * ready (root) set so the caller / A3 can start them. + * - ``rejected`` — the graph is invalid (cycle / dangling / dup). + * Carries a user-facing message for the terminal Linear comment; + * nothing is persisted. + * - ``error`` — transient failure reaching Linear; the caller + * surfaces a retryable message and does NOT fall back to a single + * task (that would silently drop the epic structure). + * + * The DAG validation + persistence are pure/injected, so this composer + * is fully unit-testable with a mock fetch + mock ddb. + */ + +import type { DynamoDBDocumentClient } from '@aws-sdk/lib-dynamodb'; +import type { FetchSubIssueGraphOptions } from './linear-subissue-fetch'; +import { logger } from './logger'; +import { validateDag } from './orchestration-dag'; +import { + linearGraphSource, + type OrchestrationGraphSource, +} from './orchestration-graph-source'; +import { withIntegrationNode } from './orchestration-integration-node'; +import { deriveOrchestrationId, extendOrchestration, seedOrchestration, type OrchestrationReleaseContext } from './orchestration-store'; + +export interface DiscoverOrchestrationParams { + readonly ddb: DynamoDBDocumentClient; + readonly tableName: string; + /** + * Resolved per-workspace OAuth access token (from resolveLinearOauthToken). + * Used to build the default Linear graph source when ``graphSource`` is + * not supplied. Ignored when ``graphSource`` is given. + */ + readonly accessToken: string; + readonly parentLinearIssueId: string; + readonly linearWorkspaceId: string; + readonly repo: string; + /** ISO timestamp injected for testability. */ + readonly now: string; + /** Optional TTL epoch seconds for the persisted rows. */ + readonly ttl?: number; + /** Release context stamped on the meta row for the reconciler. */ + readonly releaseContext: OrchestrationReleaseContext; + /** Test seam for the (default) Linear fetch. Ignored when ``graphSource`` is set. */ + readonly fetchOptions?: FetchSubIssueGraphOptions; + /** + * #247/#299 trigger-agnostic seam. The producer of the orchestration DAG. + * When omitted, defaults to {@link linearGraphSource} over + * ``accessToken`` + ``parentLinearIssueId`` (Mode A behaviour). A + * declarative caller (CLI/API) or #299 Mode B planner passes its own + * source so the SAME validate→seed→reconcile→rollup pipeline runs over a + * graph produced any way. + */ + readonly graphSource?: OrchestrationGraphSource; +} + +export type DiscoverOrchestrationResult = + | { readonly kind: 'single_task'; readonly parentLinearIssueId: string } + | { + readonly kind: 'seeded'; + readonly orchestrationId: string; + readonly childCount: number; + readonly rootSubIssueIds: readonly string[]; + readonly alreadyExisted: boolean; + } + | { + // An already-seeded orchestration that was EXTENDED with sub-issues + // added to the epic after the first seed (orchestration-extend). Carries + // the new node ids + which are immediately releasable. + readonly kind: 'extended'; + readonly orchestrationId: string; + readonly addedSubIssueIds: readonly string[]; + readonly releasableSubIssueIds: readonly string[]; + } + | { readonly kind: 'rejected'; readonly reason: string; readonly message: string } + | { readonly kind: 'error'; readonly message: string }; + +/** + * Discover, validate, and persist a parent issue's sub-issue DAG. + * Never throws — all failure modes are returned as discriminated + * results so the webhook processor can map each to the right + * user-facing behaviour. + */ +export async function discoverOrchestration( + params: DiscoverOrchestrationParams, +): Promise<DiscoverOrchestrationResult> { + const { ddb, tableName, accessToken, parentLinearIssueId, linearWorkspaceId, repo, now, ttl, releaseContext, fetchOptions, graphSource } = params; + + // ── 1. Produce the orchestration graph ─────────────────────────── + // Default to the Linear native source (Mode A); a declarative / planner + // caller (#299) supplies its own graphSource. The downstream pipeline is + // identical regardless of where the graph came from. + const source = graphSource ?? linearGraphSource(accessToken, parentLinearIssueId, fetchOptions); + const fetched = await source(); + if (fetched.kind === 'error') { + return { kind: 'error', message: fetched.message }; + } + if (fetched.kind === 'no_children') { + logger.info('No orchestration graph — falling back to single task', { + parent_linear_issue_id: parentLinearIssueId, + }); + return { kind: 'single_task', parentLinearIssueId }; + } + + // ── 2. Validate the DAG (cycle / dangling / duplicate rejection) ─ + const validation = validateDag(fetched.children); + if (!validation.ok) { + logger.warn('Orchestration DAG rejected', { + parent_linear_issue_id: parentLinearIssueId, + reason: validation.reason, + offending_ids: validation.offendingIds, + }); + return { kind: 'rejected', reason: validation.reason, message: validation.message }; + } + + // ── 2b. #16: auto-integration node for fan-out. If the validated DAG has + // >1 leaf, append a synthetic node depending on all leaves so a pure + // fan-out still produces ONE combined result (the node is a diamond + // fan-in, reusing A4's merge). No-op for linear chains / explicit + // diamonds (≤1 leaf). The orchestration id is derived deterministically + // from the parent issue, so we can name the synthetic node before seeding. + const orchestrationId = deriveOrchestrationId(parentLinearIssueId); + const augmented = withIntegrationNode(fetched.children, orchestrationId); + let childrenToSeed = augmented.nodes; + if (augmented.added) { + // Re-validate defensively — appending a fan-in over leaves cannot + // introduce a cycle/dangle/dup, but seeding an invalid graph would be + // worse than skipping the synthetic node, so fail-safe to the + // un-augmented graph if it ever does. + const reValidation = validateDag(childrenToSeed); + if (!reValidation.ok) { + logger.error('Integration node produced an invalid DAG — seeding without it', { + parent_linear_issue_id: parentLinearIssueId, + reason: reValidation.reason, + }); + childrenToSeed = fetched.children; + } else { + logger.info('Orchestration fan-out detected — added integration node', { + parent_linear_issue_id: parentLinearIssueId, + orchestration_id: orchestrationId, + // the synthetic node is last; its predecessors are the leaves it merges + leaf_count: childrenToSeed[childrenToSeed.length - 1].depends_on.length, + }); + } + } + + // ── 3. Persist (idempotent on replay) ──────────────────────────── + let seedResult; + try { + seedResult = await seedOrchestration({ + ddb, + tableName, + parentLinearIssueId, + linearWorkspaceId, + repo, + children: childrenToSeed, + now, + releaseContext, + ...(ttl !== undefined && { ttl }), + }); + } catch (err) { + logger.error('Failed to persist orchestration graph', { + parent_linear_issue_id: parentLinearIssueId, + error: err instanceof Error ? err.message : String(err), + }); + return { kind: 'error', message: 'Could not persist the orchestration graph. Please re-apply the trigger.' }; + } + + // ── 3b. Already-seeded → EXTEND with any sub-issues added since the first + // seed (orchestration-extend). seedOrchestration is frozen-at-first-seed, so + // a re-trigger of an existing epic lands here; diff the current graph against + // the persisted children and add genuinely-new nodes. A re-trigger with no + // new nodes is a clean no-op (addedSubIssueIds empty). + if (seedResult.alreadyExisted) { + let extendResult; + try { + extendResult = await extendOrchestration({ + ddb, + tableName, + parentLinearIssueId, + linearWorkspaceId, + repo, + graph: childrenToSeed, + now, + ...(ttl !== undefined && { ttl }), + }); + } catch (err) { + logger.error('Failed to extend orchestration graph', { + parent_linear_issue_id: parentLinearIssueId, + error: err instanceof Error ? err.message : String(err), + }); + return { kind: 'error', message: 'Could not extend the orchestration graph. Please re-apply the trigger.' }; + } + if (extendResult.rejected) { + return { kind: 'rejected', reason: extendResult.rejected.reason, message: extendResult.rejected.message }; + } + return { + kind: 'extended', + orchestrationId: extendResult.orchestrationId, + addedSubIssueIds: extendResult.addedSubIssueIds, + releasableSubIssueIds: extendResult.releasableSubIssueIds, + }; + } + + // Roots = layer 0 of the validated topological layering. The + // reconciler (A3) releases these first. + const rootSubIssueIds = validation.layers[0] ?? []; + + return { + kind: 'seeded', + orchestrationId: seedResult.orchestrationId, + childCount: childrenToSeed.length, + rootSubIssueIds, + alreadyExisted: seedResult.alreadyExisted, + }; +} diff --git a/cdk/src/handlers/shared/orchestration-epic-tip.ts b/cdk/src/handlers/shared/orchestration-epic-tip.ts new file mode 100644 index 00000000..ceb0a909 --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-epic-tip.ts @@ -0,0 +1,94 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Pure "epic tip" selection for #247 UX.4 — where a NEWLY-ADDED sub-issue with + * NO declared dependency should stack. + * + * The user's rule (confirmed 2026-06-16): a node added to an in-flight epic + * must NOT branch off bare ``main`` — it inherits the epic's accumulated, + * unmerged work by stacking on the epic's TIP (the most-recent leaf the rest + * of the graph already builds on). "Fall back to ``main`` only when the + * predecessor is genuinely merged (branch gone)" is handled downstream by the + * agent's runtime base-branch fetch fallback (``agent/src/repo.py`` — a base + * branch that no longer exists on origin degrades to a branch off default), + * so this layer only needs to NAME the tip; it never has to detect merge. + * + * The tip is the **leaf frontier**: nodes that nothing else depends on. Among + * those, we pick the most-recently-created real (non-integration) leaf — the + * single node a linear chain naturally extends from. This keeps the common + * "epic was a chain, add one more step" case a clean linear stack; a fan-out + * epic with multiple independent leaves yields a multi-predecessor (diamond) + * implicit dependency so the new node sees ALL of the accumulated work. + */ + +import { isIntegrationNode } from './orchestration-integration-node'; + +/** Minimal shape needed to compute the tip — a subset of OrchestrationChildRow. */ +export interface TipCandidate { + readonly sub_issue_id: string; + readonly depends_on: readonly string[]; + readonly created_at: string; +} + +/** + * Resolve the implicit predecessor set for a new unconstrained node added to + * an existing epic. Returns the sub_issue_ids the new node should stack on / + * merge in (its synthetic ``depends_on``), or ``[]`` when the epic has no + * usable tip (e.g. empty epic — degrade to root/main). + * + * Algorithm: + * 1. Consider only the EXISTING nodes (the new node isn't in the graph yet). + * 2. The leaf frontier = nodes that appear in no other node's ``depends_on``. + * 3. If an INTEGRATION node exists, it already depends on every real leaf — + * it IS the single combined tip, so stack on it alone (avoids a redundant + * diamond that re-merges what integration already merged). + * 4. Otherwise return every real leaf. One leaf → a clean linear stack; many + * leaves → a diamond so the new node inherits all parallel branches. + * + * Pure + deterministic (ties broken by sub_issue_id); no I/O. + */ +export function resolveEpicTip(existing: readonly TipCandidate[]): string[] { + if (existing.length === 0) return []; + + // A node is depended-upon if it appears in any other node's depends_on. + const dependedUpon = new Set<string>(); + for (const node of existing) { + for (const dep of node.depends_on) dependedUpon.add(dep); + } + + const leaves = existing.filter((n) => !dependedUpon.has(n.sub_issue_id)); + if (leaves.length === 0) { + // Pathological (every node depended upon ⇒ a cycle, which the DAG + // validator rejects upstream). Degrade to root rather than throw. + return []; + } + + // An integration node already merges all real leaves — it is the combined + // tip. Stack on it alone. + const integration = leaves.find((n) => isIntegrationNode(n.sub_issue_id)); + if (integration) return [integration.sub_issue_id]; + + // Real leaves only (defensive — integration handled above). One → linear + // stack; many → diamond. Sorted for deterministic depends_on ordering. + return leaves + .filter((n) => !isIntegrationNode(n.sub_issue_id)) + .map((n) => n.sub_issue_id) + .sort(); +} diff --git a/cdk/src/handlers/shared/orchestration-graph-source.ts b/cdk/src/handlers/shared/orchestration-graph-source.ts new file mode 100644 index 00000000..81516672 --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-graph-source.ts @@ -0,0 +1,102 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Trigger-agnostic orchestration graph source (#247 / #299 seam). + * + * The #247 executor (validate → seed → reconcile → release → stack → + * rollup → parent lifecycle) is source-agnostic: once a DAG of + * ``{ id, depends_on, title? }`` nodes exists it doesn't care where the + * graph came from. What VARIES per trigger is only how the graph is + * *produced*. This module is that seam. + * + * "Sub-issues" is just one way to express a DAG. Three adapter tiers: + * + * 1. NATIVE graph — the tool already has the structure; the adapter + * READS it. Linear: parent → children + ``blocks`` relations + * ({@link linearGraphSource}, wrapping ``fetchSubIssueGraph``). A Jira + * adapter would map epic → stories + issue links the same way. + * + * 2. DECLARATIVE graph — the trigger has no native sub-issues, so the + * caller SUPPLIES the DAG. {@link declarativeGraphSource} takes a + * ready-made node list. This is the slot for: + * - CLI / API: a request body carrying tasks + ``depends_on`` edges. + * - #299 Mode B: a planner agent decomposes ONE task into a phased + * DAG and hands the nodes here — reusing the ENTIRE verified + * executor instead of reimplementing gating/stacking/rollup. + * + * 3. DELEGATE / single — a structureless trigger (e.g. a plain Slack + * message) either stays single-task or references a native epic by id + * (tier 1). No adapter needed here. + * + * A source is a zero-arg async thunk so the caller binds whatever inputs + * it needs (token + issue id for Linear; a node list for declarative) + * before handing ``discoverOrchestration`` a uniform interface. + */ + +import { fetchSubIssueGraph, type FetchSubIssueGraphOptions, type SubIssueNode } from './linear-subissue-fetch'; + +/** + * Channel-neutral graph result. Mirrors ``FetchSubIssueGraphResult`` but + * without Linear's ``parentIssueId`` — the discovery composer already + * holds the parent id separately. + * - ``ok`` — a non-empty DAG to validate + seed. + * - ``no_children`` — no graph; caller falls through to a single task. + * - ``error`` — transient failure; caller surfaces retryable, does + * NOT silently degrade to a single task (that would drop the structure). + */ +export type OrchestrationGraphResult = + | { readonly kind: 'ok'; readonly children: readonly SubIssueNode[] } + | { readonly kind: 'no_children' } + | { readonly kind: 'error'; readonly message: string }; + +/** A bound, zero-arg producer of an orchestration DAG. */ +export type OrchestrationGraphSource = () => Promise<OrchestrationGraphResult>; + +/** + * Tier 1 — Linear native graph. Reads the parent issue's sub-issues + + * blocking relations via the existing ``fetchSubIssueGraph`` and maps the + * result to the channel-neutral shape. + */ +export function linearGraphSource( + accessToken: string, + parentIssueId: string, + fetchOptions?: FetchSubIssueGraphOptions, +): OrchestrationGraphSource { + return async () => { + const fetched = await fetchSubIssueGraph(accessToken, parentIssueId, fetchOptions); + if (fetched.kind === 'error') return { kind: 'error', message: fetched.message }; + if (fetched.kind === 'no_children') return { kind: 'no_children' }; + return { kind: 'ok', children: fetched.children }; + }; +} + +/** + * Tier 2 — declarative graph. The caller already has the node list (a + * CLI/API request, or a #299 planner's decomposition output). An empty + * list means "no graph" → single task. Never errors (the nodes are + * in-memory); DAG validity (cycles/dangling/dupes) is still enforced + * downstream by ``validateDag`` in the discovery composer. + */ +export function declarativeGraphSource(children: readonly SubIssueNode[]): OrchestrationGraphSource { + return async () => { + if (children.length === 0) return { kind: 'no_children' }; + return { kind: 'ok', children }; + }; +} diff --git a/cdk/src/handlers/shared/orchestration-integration-node.ts b/cdk/src/handlers/shared/orchestration-integration-node.ts new file mode 100644 index 00000000..a3dc62ee --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-integration-node.ts @@ -0,0 +1,97 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Auto-integration node for fan-out orchestrations (#247 #16). + * + * When a validated DAG has MORE THAN ONE leaf (a sub-issue with no + * successors), each leaf is an independent PR and nothing combines them — + * there is no single "see it all together" artifact. We append a synthetic + * integration node that depends on ALL leaves. Because it has multiple + * predecessors it is a diamond fan-in, so the existing A4 multi-predecessor + * path (``selectBaseBranch`` → ``_merge_predecessor_branch``) merges every + * leaf branch into the integration branch with no new merge code — its PR + * is the combined result. + * + * Pure (no I/O), so the leaf computation + node construction is unit-tested + * in isolation. The discovery composer calls this AFTER ``validateDag`` + * (it needs the validated node set to compute leaves) and BEFORE + * ``seedOrchestration``, re-validating the augmented graph. + * + * Cases: + * - 0–1 leaf (linear chain, or an explicit diamond fan-in): nothing added — + * a single leaf already IS the combined result. + * - >1 leaf (pure fan-out): one synthetic node added over all leaves. + */ + +import type { SubIssueNode } from './linear-subissue-fetch'; + +/** + * Suffix marking a synthetic, platform-injected node (not a real Linear + * sub-issue). Uses ``_`` separators, NOT ``#``: the node's ``sub_issue_id`` + * flows into ``releaseChild``'s idempotency key (``${orch}_${sub}``), which + * createTaskCore validates against ``/^[a-zA-Z0-9_-]{1,128}$/`` — a ``#`` + * would 400 the child and it would never start (the same trap the meta-row + * ``#meta`` SK can use safely because it never becomes an idempotency key). + */ +export const INTEGRATION_NODE_SUFFIX = '__integration'; + +/** + * True if ``subIssueId`` is a platform-synthesized integration node rather + * than a real Linear sub-issue. Callers that would address a real Linear + * issue (reactions, MCP comments) can guard on this. + */ +export function isIntegrationNode(subIssueId: string): boolean { + return subIssueId.endsWith(INTEGRATION_NODE_SUFFIX); +} + +/** Node ids that no other node depends on — the DAG's leaves. */ +export function computeLeaves(nodes: readonly SubIssueNode[]): readonly string[] { + const hasSuccessor = new Set<string>(); + for (const n of nodes) { + for (const dep of n.depends_on) hasSuccessor.add(dep); + } + return nodes.map((n) => n.id).filter((id) => !hasSuccessor.has(id)); +} + +/** + * Given a validated DAG, return the node list to seed: unchanged when there + * is 0–1 leaf, or with a synthetic integration node appended (depending on + * all leaves) when there is more than one leaf. + * + * ``orchestrationId`` namespaces the synthetic node's id so it is unique + + * recognizable (``<orchestrationId>#integration``). The node carries no + * ``identifier`` (there is no Linear issue) and a fixed ``title`` so the + * status block / rollup render "Integration …" gracefully. + */ +export function withIntegrationNode( + nodes: readonly SubIssueNode[], + orchestrationId: string, +): { readonly nodes: readonly SubIssueNode[]; readonly added: boolean } { + const leaves = computeLeaves(nodes); + if (leaves.length <= 1) { + return { nodes, added: false }; + } + const integration: SubIssueNode = { + id: `${orchestrationId}${INTEGRATION_NODE_SUFFIX}`, + depends_on: leaves, + title: 'Integration — combine sub-issue results', + }; + return { nodes: [...nodes, integration], added: true }; +} diff --git a/cdk/src/handlers/shared/orchestration-log-events.ts b/cdk/src/handlers/shared/orchestration-log-events.ts new file mode 100644 index 00000000..ec68a47c --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-log-events.ts @@ -0,0 +1,96 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Stable, machine-greppable log-event names for Linear orchestration + * (#247). Emitted as the ``event`` field on structured logs. + * + * WHY A CENTRAL MODULE: these strings are a TEST CONTRACT. End-to-end and + * automated dev tests assert on orchestration behavior by grepping + * CloudWatch for these exact event names (the orchestration plane is + * event-driven and has no synchronous API to assert against). Defining + * them in one place means: + * - a test references ``ORCH_LOG.childReleased``, not a copy-pasted + * string that silently drifts when a log line is reworded; + * - renaming an event is a single edit that the type system propagates; + * - this file IS the catalogue of "what to look for in the logs", + * which is exactly the long-term automated-testing question. + * + * Convention: ``orch.<phase>.<outcome>`` so a test can match a whole + * phase with a prefix (``orch.reconcile.*``) or an exact transition. + * Every emit site should also include the structured fields listed in the + * doc comment so log-based assertions can bind to ids, not just names. + */ +export const ORCH_LOG = { + // ── Discovery (webhook → seed) ────────────────────────────────── + /** A labeled parent had a valid sub-issue graph; rows seeded. + * Fields: orchestration_id, parent_linear_issue_id, child_count, root_count. */ + discoverySeeded: 'orch.discovery.seeded', + /** Parent had no sub-issues → fell back to a single task. + * Fields: parent_linear_issue_id. */ + discoverySingleTask: 'orch.discovery.single_task', + /** Graph rejected (cycle / dangling / dup) — no rows, terminal comment. + * Fields: parent_linear_issue_id, reason, offending_ids. */ + discoveryRejected: 'orch.discovery.rejected', + /** Transient Linear error reading sub-issues — terminal comment, no seed. + * Fields: parent_linear_issue_id, message. */ + discoveryError: 'orch.discovery.error', + + // ── Release (root + reconciler) ───────────────────────────────── + /** A child task was created (released). Fields: orchestration_id, + * sub_issue_id, child_task_id, base_branch, merge_branch_count, source + * ('root' | 'reconciler' | 'sweep'). */ + childReleased: 'orch.child.released', + /** A release attempt's createTaskCore returned non-success. Fields: + * orchestration_id, sub_issue_id, status, response_body. */ + childReleaseFailed: 'orch.child.release_failed', + + // ── Reconcile (TaskTable stream → gating) ─────────────────────── + /** A child reached terminal-success; gating re-evaluated. Fields: + * orchestration_id, sub_issue_id, released_count. */ + reconcileSuccess: 'orch.reconcile.success', + /** A child failed/cancelled/timed-out or built-broken; dependents + * skipped. Fields: orchestration_id, sub_issue_id, skipped_ids. */ + reconcileFailurePropagated: 'orch.reconcile.failure_propagated', + + // ── Rollup (parent comment via this plane) ────────────────────── + /** A parent rollup comment was posted. Fields: orchestration_id, + * parent_linear_issue_id, rollup_kind ('progress' | 'complete' | + * 'partial_failure' | 'cancelled'). */ + rollupPosted: 'orch.rollup.posted', + /** Posting the parent rollup comment failed (best-effort). Fields: + * orchestration_id, parent_linear_issue_id, rollup_kind. */ + rollupFailed: 'orch.rollup.failed', + + // ── Completion / cancel ───────────────────────────────────────── + /** Every child reached a terminal orchestration state. Fields: + * orchestration_id, parent_linear_issue_id, succeeded, failed, skipped. */ + orchestrationComplete: 'orch.complete', + /** Parent cancel cascaded to non-terminal children. Fields: + * orchestration_id, parent_linear_issue_id, cancelled_count. */ + cancelCascaded: 'orch.cancel.cascaded', + + // ── Backstop (#303 scheduled sweep) ───────────────────────────── + /** The sweep recovered a child the live reconciler missed. Fields: + * orchestration_id, sub_issue_id, recovery ('lost_release' | + * 'lost_terminal'). */ + sweepRecovered: 'orch.sweep.recovered', +} as const; + +export type OrchLogEvent = (typeof ORCH_LOG)[keyof typeof ORCH_LOG]; diff --git a/cdk/src/handlers/shared/orchestration-parent-comment.ts b/cdk/src/handlers/shared/orchestration-parent-comment.ts new file mode 100644 index 00000000..f5adf2cc --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-parent-comment.ts @@ -0,0 +1,202 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Pure logic for routing an ``@bgagent`` comment left on the PARENT epic to the + * sub-issue it's about (#247 UX.18). + * + * Background: the maturing epic panel lives on the parent epic, so a reviewer's + * natural instinct is to comment there ("@bgagent for the footer, change X"). + * But the parent epic has no PR of its own — only its sub-issues do — so the + * comment-trigger path can't iterate "the parent". Previously such a comment + * fell through to the standalone path, found no task for the parent issue, and + * was SILENTLY DROPPED (live-caught on ABCA-304). This module decides, from the + * instruction text + the orchestration's sub-issue rows, WHICH sub-issue the + * comment targets so the processor can iterate that sub-issue's PR. + * + * Pure (no I/O) so the matching is unit-tested in isolation; the processor does + * the Linear/DDB work (resolve PR, spawn the iteration task, ack). + */ + +import { isIntegrationNode } from './orchestration-integration-node'; + +/** Minimal view of a sub-issue row this matcher needs. */ +export interface ParentCommentNode { + readonly sub_issue_id: string; + readonly linear_identifier?: string; + readonly title?: string; + /** Only a STARTED child (has a task) can be iterated; the matcher reports it but the caller gates on a PR. */ + readonly child_task_id?: string; +} + +export interface ParentNodeMatch { + /** Sub-issues the instruction plausibly targets (excludes the synthetic integration node unless named). */ + readonly matches: readonly ParentCommentNode[]; + /** + * Why the caller can't act on exactly one node: + * - 'none' — no node referenced (generic comment like "@bgagent looks good") + * - 'ambiguous' — the text matched more than one node + * - null — exactly one match (caller iterates it) + */ + readonly reason: 'none' | 'ambiguous' | null; +} + +/** Lowercase, collapse whitespace, strip punctuation that breaks word matching. */ +function normalize(s: string): string { + return s.toLowerCase().replace(/[^a-z0-9\s-]/g, ' ').replace(/\s+/g, ' ').trim(); +} + +/** + * Title "noise" words that carry no routing signal — matching on them would + * make every comment hit every node. We only match a node by its title when a + * SIGNIFICANT (non-noise) word from the title appears in the instruction. + */ +const TITLE_NOISE = new Set([ + 'add', 'a', 'an', 'the', 'to', 'of', 'for', 'and', 'or', 'with', 'new', + 'page', 'section', 'site', 'wide', 'site-wide', 'update', 'change', 'fix', + 'create', 'make', 'support', 'feature', 'this', 'that', 'can', 'you', 'please', +]); + +/** + * Decide which sub-issue(s) an ``@bgagent`` instruction left on the parent epic + * is about. + * + * Matching, in priority order: + * 1. Linear identifier token (``ABCA-305``) — exact, case-insensitive. The + * unambiguous way to target a node; if present it wins outright (a single + * identifier → single match, even if a keyword also matched another node). + * 2. Significant title keyword — a non-noise word from a node's title that + * appears in the instruction (``footer`` → "Add a site-wide footer"). All + * nodes whose title contributes a matched keyword are collected. + * + * The synthetic integration node is excluded from keyword matching (its title + * "Integration — combine sub-issue results" is generic) but CAN be targeted by + * the words "integration"/"combined" or its (nonexistent) identifier — callers + * rarely iterate it, so it only matches on an explicit "integration" mention. + * + * Returns ``reason: null`` only when exactly one node matched. + */ +export function parseParentNodeReference( + instruction: string, + nodes: readonly ParentCommentNode[], +): ParentNodeMatch { + const text = normalize(instruction); + if (!text) return { matches: [], reason: 'none' }; + const tokens = new Set(text.split(' ')); + + // 1) Identifier match wins outright. + const byIdentifier = nodes.filter( + (n) => n.linear_identifier && tokens.has(n.linear_identifier.toLowerCase()), + ); + if (byIdentifier.length === 1) return { matches: byIdentifier, reason: null }; + if (byIdentifier.length > 1) return { matches: byIdentifier, reason: 'ambiguous' }; + + // 2) Significant-title-keyword match. + const byKeyword = nodes.filter((n) => { + if (!n.title) return false; + const explicitIntegration = isIntegrationNode(n.sub_issue_id) + && (tokens.has('integration') || tokens.has('combined')); + if (isIntegrationNode(n.sub_issue_id) && !explicitIntegration) return false; + const significant = normalize(n.title) + .split(' ') + .filter((w) => w.length > 2 && !TITLE_NOISE.has(w)); + return significant.some((w) => tokens.has(w)); + }); + + if (byKeyword.length === 1) return { matches: byKeyword, reason: null }; + if (byKeyword.length > 1) return { matches: byKeyword, reason: 'ambiguous' }; + return { matches: [], reason: 'none' }; +} + +/** + * Best-effort "did you mean …?" suggestion for the disambiguation reply, used + * ONLY when {@link parseParentNodeReference} found no confident match. We never + * ACT on this (no silent iteration of a guess) — it's a hint in the reply so + * the human can confirm with one tap. Scores each real node by how many of its + * significant title words appear in the instruction; returns the single best + * scorer, or null when nothing overlaps at all. The synthetic integration node + * is never suggested. + */ +export function suggestClosestNode( + instruction: string, + nodes: readonly ParentCommentNode[], +): ParentCommentNode | null { + const tokens = new Set(normalize(instruction).split(' ').filter(Boolean)); + if (tokens.size === 0) return null; + let best: ParentCommentNode | null = null; + let bestScore = 0; + for (const n of nodes) { + if (isIntegrationNode(n.sub_issue_id) || !n.title) continue; + const significant = normalize(n.title) + .split(' ') + .filter((w) => w.length > 2 && !TITLE_NOISE.has(w)); + const score = significant.filter((w) => tokens.has(w)).length; + if (score > bestScore) { + bestScore = score; + best = n; + } + } + return bestScore > 0 ? best : null; +} + +function nodeLabel(n: ParentCommentNode): string { + if (n.linear_identifier) return n.title ? `${n.linear_identifier} — ${n.title}` : n.linear_identifier; + return n.title ?? n.sub_issue_id; +} + +/** + * Render the "which sub-issue?" threaded reply posted on the parent epic when + * {@link parseParentNodeReference} can't pin exactly one node. NEVER auto-acts + * and NEVER auto-creates an issue (user's call, #247 UX.18): it (a) surfaces a + * best-effort "did you mean <X>?" suggestion when one overlaps, (b) lists the + * real sub-issues + how to target one, and (c) points at the "create a + * sub-issue for NEW work" path. So a parent comment is never silently dropped, + * but new work only ever begins when the human explicitly creates a sub-issue. + * Pure (string only). + * + * @param suggestion best-effort closest node (from {@link suggestClosestNode}), or null + */ +export function renderParentDisambiguationReply( + reason: 'none' | 'ambiguous', + nodes: readonly ParentCommentNode[], + suggestion?: ParentCommentNode | null, +): string { + const real = nodes.filter((n) => !isIntegrationNode(n.sub_issue_id)); + const lead = reason === 'ambiguous' + ? "That could apply to more than one sub-issue, so I didn't want to guess." + : "I couldn't tell which sub-issue that's about."; + const out: string[] = [`👋 ${lead}`, '']; + if (suggestion) { + out.push( + `Did you mean **${nodeLabel(suggestion)}**? If so, reply ` + + `\`@bgagent ${suggestion.linear_identifier ?? 'that one'}: <what to change>\`.`, + '', + ); + } + out.push( + 'Otherwise, comment on the specific sub-issue, or name it here — e.g. ' + + '`@bgagent ABCA-123: <what to change>`. The sub-issues are:', + '', + ...real.map((n) => `- ${nodeLabel(n)}`), + '', + "If it's **new work** (not a change to one of these), create a new sub-issue " + + 'under this epic and add the `abca` label — I\'ll fold it into the orchestration.', + ); + return out.join('\n'); +} diff --git a/cdk/src/handlers/shared/orchestration-reconcile.ts b/cdk/src/handlers/shared/orchestration-reconcile.ts new file mode 100644 index 00000000..c0c4c74c --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-reconcile.ts @@ -0,0 +1,184 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Pure gating logic for the orchestration reconciler (issue #247, Mode A + * — PR A3). Given a child sub-issue that just reached a terminal state + * plus the current orchestration rows, decide: + * - the new ``child_status`` for the terminal child, + * - which blocked children become releasable (all predecessors + * succeeded), and + * - which children must be skipped (a predecessor failed → transitive + * dependents never start). + * + * No I/O — the reconciler handler applies the returned plan to + * DynamoDB + ``createTaskCore``. Keeping this pure makes the 8-case + * failure matrix from the design doc directly unit-testable. + */ + +import type { ChildStatus } from './orchestration-store'; + +/** Minimal view of an orchestration child row the gating logic needs. */ +export interface ReconcileChild { + readonly sub_issue_id: string; + readonly depends_on: readonly string[]; + readonly child_status: ChildStatus; +} + +/** The terminal outcome of the child that triggered this reconcile. */ +export interface TerminalOutcome { + readonly sub_issue_id: string; + /** Task terminal status. */ + readonly status: 'COMPLETED' | 'FAILED' | 'CANCELLED' | 'TIMED_OUT'; + /** + * Whether the agent build passed. A child can be ``COMPLETED`` with + * ``build_passed === false`` (PR opened but build failed); we do NOT + * release dependents onto broken code. ``undefined`` is treated as + * "not known to have failed" → still a success for gating (matches + * the TaskRecord field being optional/absent on older records). + */ + readonly build_passed?: boolean; +} + +/** A single child-status mutation the handler must persist. */ +export interface StatusUpdate { + readonly sub_issue_id: string; + readonly child_status: ChildStatus; +} + +export interface ReconcilePlan { + /** ``true`` when the terminal child counts as a success for gating. */ + readonly terminalSucceeded: boolean; + /** Status writes to apply (includes the terminal child itself). */ + readonly statusUpdates: readonly StatusUpdate[]; + /** Sub-issue ids that are now releasable (create child task, mark released). */ + readonly toRelease: readonly string[]; + /** True when every child has reached a terminal orchestration state. */ + readonly orchestrationComplete: boolean; +} + +/** Orchestration-local terminal child statuses. */ +const TERMINAL_CHILD_STATUSES: ReadonlySet<ChildStatus> = new Set<ChildStatus>([ + 'succeeded', + 'failed', + 'skipped', +]); + +/** A child counts as "done successfully" for releasing its dependents. */ +function isSuccess(outcome: TerminalOutcome): boolean { + return outcome.status === 'COMPLETED' && outcome.build_passed !== false; +} + +/** + * Compute the reconcile plan for one terminal child. + * + * @param outcome the child that just reached terminal state. + * @param children all rows for the orchestration (including the terminal + * child). ``child_status`` reflects current persisted state. + * + * Gating rules (design §"Failure semantics"): + * - Success: mark the child ``succeeded``. Any ``blocked`` child whose + * predecessors are ALL succeeded (case 2: diamond needs all, not any) + * becomes ``toRelease``. + * - Failure/cancel/timeout, or COMPLETED-with-failed-build (case 1): + * mark the child ``failed``, and transitively mark every dependent + * (direct + indirect) ``skipped`` — they can never start because a + * predecessor will never succeed. + */ +export function computeReconcilePlan( + outcome: TerminalOutcome, + children: readonly ReconcileChild[], +): ReconcilePlan { + const byId = new Map(children.map((c) => [c.sub_issue_id, c])); + const succeeded = isSuccess(outcome); + + // Working copy of statuses so we can reason about "all predecessors + // succeeded" against the post-update world. + const statusOf = new Map<string, ChildStatus>( + children.map((c) => [c.sub_issue_id, c.child_status]), + ); + + const updates: StatusUpdate[] = []; + const setStatus = (id: string, s: ChildStatus): void => { + statusOf.set(id, s); + updates.push({ sub_issue_id: id, child_status: s }); + }; + + // 1. The terminal child itself. + setStatus(outcome.sub_issue_id, succeeded ? 'succeeded' : 'failed'); + + const toRelease: string[] = []; + + if (succeeded) { + // 2. Release any blocked child whose predecessors are ALL succeeded. + for (const c of children) { + if (statusOf.get(c.sub_issue_id) !== 'blocked') continue; + const allSucceeded = c.depends_on.every((dep) => statusOf.get(dep) === 'succeeded'); + if (allSucceeded) { + toRelease.push(c.sub_issue_id); + // Mark released so a sibling finishing in the same batch doesn't + // double-release it. + setStatus(c.sub_issue_id, 'released'); + } + } + } else { + // 3. Transitively skip every dependent of the failed child. + // BFS over the reverse-dependency graph. + const dependents = new Map<string, string[]>(); + for (const c of children) { + for (const dep of c.depends_on) { + const list = dependents.get(dep) ?? []; + list.push(c.sub_issue_id); + dependents.set(dep, list); + } + } + const queue = [outcome.sub_issue_id]; + const skipped = new Set<string>(); + while (queue.length > 0) { + const cur = queue.shift()!; + for (const dependentId of dependents.get(cur) ?? []) { + if (skipped.has(dependentId)) continue; + const cur_status = statusOf.get(dependentId); + // Only skip children that haven't already started/finished. + // A child already ``released``/``succeeded``/``failed`` is left + // as-is (its own terminal event reconciles it). + if (cur_status === 'blocked' || cur_status === 'ready') { + setStatus(dependentId, 'skipped'); + skipped.add(dependentId); + } + queue.push(dependentId); + } + } + } + + // 4. Is the whole orchestration now terminal? Every child either was + // already terminal or just transitioned to one. ``released`` is NOT + // terminal (the released child's own task is still running). + const orchestrationComplete = children.every((c) => { + const s = statusOf.get(c.sub_issue_id)!; + return TERMINAL_CHILD_STATUSES.has(s); + }); + + return { + terminalSucceeded: succeeded, + statusUpdates: updates, + toRelease, + orchestrationComplete, + }; +} diff --git a/cdk/src/handlers/shared/orchestration-release.ts b/cdk/src/handlers/shared/orchestration-release.ts new file mode 100644 index 00000000..6c8b6488 --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-release.ts @@ -0,0 +1,425 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Child-task release for orchestration (issue #247, Mode A — PR A3). + * + * The single path that turns an orchestration child row into a running + * ABCA task. Used in two places: + * - seed time (the webhook processor / discovery): release the root + * children (layer 0) so the graph starts. + * - reconcile time (the TaskTable-stream reconciler): release children + * whose predecessors just all succeeded. + * + * Each release: + * 1. createTaskCore(...) with channelSource 'linear' + orchestration + * metadata, idempotency-keyed on ``orchestration_id#sub_issue_id`` + * so a duplicate stream event / webhook replay never double-creates. + * 2. on 201, conditionally flip the row child_status blocked|ready → + * released and stamp child_task_id (the GSI then resolves the + * task back to its row on the child's terminal event). + * + * The conditional update (``child_status IN (blocked, ready)``) is the + * second idempotency guard: if two reconcile invocations race the same + * release, only one wins the status flip; createTaskCore's own + * idempotency key means the loser doesn't create a second task either. + */ + +import { + type DynamoDBDocumentClient, + GetCommand, + UpdateCommand, +} from '@aws-sdk/lib-dynamodb'; +import type { createTaskCore as CreateTaskCoreFn } from './create-task-core'; +import { logger } from './logger'; +import { selectBaseBranch } from './orchestration-base-branch'; +import { isIntegrationNode } from './orchestration-integration-node'; +import type { + OrchestrationChildRow, + OrchestrationReleaseContext, +} from './orchestration-store'; +import type { ChannelSource } from './types'; + +/** + * The trigger channel an orchestration runs under. Defaults to ``'linear'`` + * everywhere (the only wired trigger today + back-compat for meta rows + * seeded before the field existed). #247 trigger-agnostic seam. + */ +const DEFAULT_ORCHESTRATION_CHANNEL: ChannelSource = 'linear'; + +/** + * #331: read a user's free concurrency budget (``cap - active_count``) so a + * release pass throttles to it instead of over-releasing children that admission + * control would then hard-fail. Best-effort: on any read error returns the full + * ``cap`` (degrade to today's release-all behavior rather than stall the + * orchestration — admission control is still the backstop). Never negative. + * + * NOTE this is an INSTANTANEOUS snapshot — between this read and the child task's + * own admission attempt, other tasks may start. That race is fine: admission + * control remains the hard ceiling; throttling here just keeps the common case + * (a wide fan-out releasing into an empty/quiet user) from mass-failing. A child + * that still loses a tighter race is left ``ready`` and retried (it is not over the + * cap-as-guillotine path because the throttle keeps the batch small). + */ +export async function readConcurrencyBudget( + ddb: DynamoDBDocumentClient, + concurrencyTableName: string, + userId: string, + maxConcurrent: number, +): Promise<number> { + try { + const res = await ddb.send(new GetCommand({ + TableName: concurrencyTableName, + Key: { user_id: userId }, + ProjectionExpression: 'active_count', + })); + const active = Number(res.Item?.active_count ?? 0); + return Math.max(0, maxConcurrent - (Number.isFinite(active) ? active : 0)); + } catch (err) { + logger.warn('Concurrency-budget read failed — releasing without throttle (admission still gates)', { + user_id: userId, + error: err instanceof Error ? err.message : String(err), + }); + return maxConcurrent; + } +} + +export interface ReleaseChildParams { + readonly ddb: DynamoDBDocumentClient; + readonly tableName: string; + /** The orchestration child row to release. */ + readonly row: OrchestrationChildRow; + /** Platform user the child task is attributed to (parent's submitter). */ + readonly platformUserId: string; + /** Linear OAuth secret ARN + slug for the agent's outbound Linear MCP. */ + readonly linearOauthSecretArn?: string; + readonly linearWorkspaceSlug?: string; + readonly linearProjectId?: string; + /** The base branch this child stacks on (#247 A4). Absent → root (off main). */ + readonly baseBranch?: string; + /** + * Predecessor branches to merge into the child's branch before work + * (#247 A4 diamond case). Absent/empty for root + linear children. + */ + readonly mergeBranches?: readonly string[]; + /** Injected createTaskCore (real handler in prod, mock in tests). */ + readonly createTaskCore: typeof CreateTaskCoreFn; + /** ISO timestamp (injected for testability). */ + readonly now: string; + /** + * Trigger channel the child task is created under. Defaults to ``'linear'``. + * Threaded from the orchestration's release context so a non-Linear trigger + * attributes its children to the right plane. #247 trigger-agnostic seam. + */ + readonly channelSource?: ChannelSource; +} + +export type ReleaseChildResult = + | { readonly kind: 'released'; readonly taskId: string } + | { readonly kind: 'create_failed'; readonly statusCode: number; readonly body: string } + | { readonly kind: 'already_released' } + | { readonly kind: 'error'; readonly message: string }; + +/** Build the child task description from the sub-issue's identifier/title. */ +function buildChildDescription(row: OrchestrationChildRow): string { + // #16: the synthetic integration node has no real sub-issue / feature + // work — its job is to merge all leaf branches (already merged into its + // branch by repo.py's predecessor-merge) into one combined result. Give + // the agent a merge-focused instruction rather than a feature prompt. + if (isIntegrationNode(row.sub_issue_id)) { + return [ + 'Integrate the completed sub-issue branches into one combined result.', + '', + "All predecessor sub-issue branches have already been merged into this task's", + 'branch before you started. Your job:', + '- Resolve any merge conflicts left in the working tree.', + '- Ensure the combined result builds and existing tests pass (run the build/tests).', + '- Do NOT add new features — this is an integration/merge task only.', + '- Open a PR with the combined result so the epic has a single reviewable artifact.', + ].join('\n'); + } + const parts: string[] = []; + if (row.linear_identifier && row.title) { + parts.push(`${row.linear_identifier}: ${row.title}`); + } else if (row.title) { + parts.push(row.title); + } else if (row.linear_identifier) { + parts.push(row.linear_identifier); + } + return parts.join('\n') || `Linear sub-issue ${row.sub_issue_id}`; +} + +/** + * Release one orchestration child as an ABCA task. Idempotent: a + * duplicate call (stream redelivery, racing reconcile) does not create a + * second task, and the row flip to ``released`` is conditional. + */ +export async function releaseChild(params: ReleaseChildParams): Promise<ReleaseChildResult> { + const { ddb, tableName, row, platformUserId, baseBranch, createTaskCore, now } = params; + const channelSource = params.channelSource ?? DEFAULT_ORCHESTRATION_CHANNEL; + + const channelMetadata: Record<string, string> = { + linear_workspace_id: row.linear_workspace_id, + orchestration_id: row.orchestration_id, + // The reconciler maps the terminal task back via this (real or synthetic) id. + orchestration_sub_issue_id: row.sub_issue_id, + parent_linear_issue_id: row.parent_linear_issue_id, + }; + // #16: only set linear_issue_id (the agent's reaction/comment target) for a + // REAL Linear sub-issue. A synthetic integration node has no Linear issue — + // passing its id would make the agent's reactionCreate 4xx. Omitting it lets + // the agent skip reactions cleanly. + if (!isIntegrationNode(row.sub_issue_id)) { + channelMetadata.linear_issue_id = row.sub_issue_id; + } + if (row.linear_identifier) channelMetadata.linear_issue_identifier = row.linear_identifier; + if (params.linearProjectId) channelMetadata.linear_project_id = params.linearProjectId; + if (params.linearOauthSecretArn) channelMetadata.linear_oauth_secret_arn = params.linearOauthSecretArn; + if (params.linearWorkspaceSlug) channelMetadata.linear_workspace_slug = params.linearWorkspaceSlug; + // #247 A4: stacked base branch + (diamond) predecessor merge-list. The + // orchestrator reads these to set the agent payload's base_branch + + // merge_branches. Absent for roots (agent branches off main as today). + if (params.baseBranch) channelMetadata.orchestration_base_branch = params.baseBranch; + if (params.mergeBranches && params.mergeBranches.length > 0) { + channelMetadata.orchestration_merge_branches = JSON.stringify(params.mergeBranches); + } + + // Deterministic idempotency key: same child never creates two tasks. + // Separator is '_' (NOT '#') because createTaskCore validates the key + // against /^[a-zA-Z0-9_-]{1,128}$/ — a '#' is rejected with a 400 and + // the child silently never starts. orchestration_id (orch_<32hex>) + + // '_' + sub_issue_id (a UUID, all hyphens) stays within 128 chars and + // inside the allowed charset. + const idempotencyKey = `${row.orchestration_id}_${row.sub_issue_id}`; + + let result; + try { + result = await createTaskCore( + { + repo: row.repo, + task_description: buildChildDescription(row), + }, + { + userId: platformUserId, + channelSource, + channelMetadata, + idempotencyKey, + }, + // requestId — reuse the idempotency key for trace correlation. + idempotencyKey, + ); + } catch (err) { + logger.error('Orchestration child createTaskCore threw', { + orchestration_id: row.orchestration_id, + sub_issue_id: row.sub_issue_id, + error: err instanceof Error ? err.message : String(err), + }); + return { kind: 'error', message: err instanceof Error ? err.message : String(err) }; + } + + // 201 = created; 200 = idempotent replay (task already existed). Both + // mean "a task exists for this child" — treat alike. + if (result.statusCode !== 201 && result.statusCode !== 200) { + // Log the RESPONSE BODY, not just the status — a bare "status:400" + // forces log-archaeology to find the cause (e.g. a rejected + // idempotency key, an un-onboarded repo, a guardrail block). The + // body carries the user-readable error message and code. + logger.warn('Orchestration child task creation returned non-success', { + orchestration_id: row.orchestration_id, + sub_issue_id: row.sub_issue_id, + repo: row.repo, + status: result.statusCode, + response_body: result.body, + idempotency_key: idempotencyKey, + }); + return { kind: 'create_failed', statusCode: result.statusCode, body: result.body }; + } + + const { taskId, branchName } = extractTaskIdAndBranch(result.body); + + // Flip the row to released, conditionally — only from a not-yet-started + // state. A racing release loses here (ConditionalCheckFailed) and + // returns already_released; createTaskCore's idempotency key means the + // loser created no second task. + // + // #247 A4: also persist the child's branch_name so a DEPENDENT child's + // release can stack on / merge it (selectBaseBranch reads predecessor + // branch names off these rows). + try { + await ddb.send(new UpdateCommand({ + TableName: tableName, + Key: { orchestration_id: row.orchestration_id, sub_issue_id: row.sub_issue_id }, + UpdateExpression: + 'SET child_status = :released, child_task_id = :tid, child_branch_name = :bn, updated_at = :now', + ConditionExpression: 'child_status IN (:blocked, :ready)', + ExpressionAttributeValues: { + ':released': 'released', + ':tid': taskId, + ':bn': branchName, + ':now': now, + ':blocked': 'blocked', + ':ready': 'ready', + }, + })); + } catch (err) { + if (isConditionalCheckFailed(err)) { + logger.info('Orchestration child already released (idempotent race)', { + orchestration_id: row.orchestration_id, + sub_issue_id: row.sub_issue_id, + }); + return { kind: 'already_released' }; + } + logger.error('Failed to mark orchestration child released', { + orchestration_id: row.orchestration_id, + sub_issue_id: row.sub_issue_id, + error: err instanceof Error ? err.message : String(err), + }); + return { kind: 'error', message: err instanceof Error ? err.message : String(err) }; + } + + logger.info('Orchestration child released', { + orchestration_id: row.orchestration_id, + sub_issue_id: row.sub_issue_id, + task_id: taskId, + base_branch: baseBranch ?? 'main', + }); + return { kind: 'released', taskId }; +} + +/** + * Release a batch of child rows (the ``ready`` ones), using a shared + * release context (from the meta row). Used both at seed time (release + * roots) and by the reconciler (release newly-unblocked dependents). + * + * Each child is released independently; one failure does not abort the + * rest (a transient create failure for child A shouldn't strand B). The + * caller logs/handles per-child results — a ``create_failed`` row stays + * ``ready`` and is retried on the next reconcile pass. + */ +export async function releaseReadyChildren( + ddb: DynamoDBDocumentClient, + tableName: string, + rows: readonly OrchestrationChildRow[], + releaseContext: OrchestrationReleaseContext, + createTaskCore: typeof CreateTaskCoreFn, + now: string, + /** + * #247 A4: the FULL child set (not just the releasable subset), so a + * child's base branch can be derived from its predecessors' persisted + * ``child_branch_name``. Defaults to ``rows`` for back-compat with + * callers that pass the full set as ``rows`` and release roots (roots + * have no predecessors, so selection degrades to off-main). + */ + allChildren?: readonly OrchestrationChildRow[], + /** Repo default branch for roots + diamond bases. Defaults to 'main'. */ + defaultBranch = 'main', + /** + * #331: max children to actually release this pass — the user's free + * concurrency budget (``cap - active_count``). When set, only this many + * ``ready`` children are released; the rest are LEFT ``ready`` (a no-op, + * not a failure) for a later reconcile pass to pick up as slots free. + * ``undefined`` = release all (back-compat; callers that don't throttle). + * A value ``<= 0`` releases nothing this pass. + */ + maxToRelease?: number, +): Promise<readonly ReleaseChildResult[]> { + const all = allChildren ?? rows; + const branchOf = new Map( + all.filter((c) => c.child_branch_name).map((c) => [c.sub_issue_id, c.child_branch_name as string]), + ); + // #331: throttle to the available budget. Sort by sub_issue_id for a + // deterministic, fair release order across passes. Releasing fewer than + // are ready is intentional — the leftovers stay ``ready`` and the next + // reconcile (sibling completion) or the #303 sweep releases them. + const ready = rows.filter((r) => r.child_status === 'ready'); + const releasable = maxToRelease === undefined + ? ready + : [...ready].sort((a, b) => a.sub_issue_id.localeCompare(b.sub_issue_id)).slice(0, Math.max(0, maxToRelease)); + if (maxToRelease !== undefined && releasable.length < ready.length) { + logger.info('Orchestration release throttled to concurrency budget', { + ready: ready.length, + releasing: releasable.length, + budget: maxToRelease, + }); + } + const results: ReleaseChildResult[] = []; + for (const row of releasable) { + // Derive the base from this child's predecessors' persisted branches. + const selection = selectBaseBranch({ + predecessors: row.depends_on.map((sub) => ({ + sub_issue_id: sub, + branch_name: branchOf.get(sub) ?? '', + })), + defaultBranch, + }); + results.push(await releaseChild({ + ddb, + tableName, + row, + platformUserId: releaseContext.platform_user_id, + ...(releaseContext.linear_oauth_secret_arn !== undefined && { + linearOauthSecretArn: releaseContext.linear_oauth_secret_arn, + }), + ...(releaseContext.linear_workspace_slug !== undefined && { + linearWorkspaceSlug: releaseContext.linear_workspace_slug, + }), + ...(releaseContext.linear_project_id !== undefined && { + linearProjectId: releaseContext.linear_project_id, + }), + // #247 trigger-agnostic: carry the orchestration's channel onto the + // child. ``releaseChild`` defaults to 'linear' when absent. + ...(releaseContext.channel_source !== undefined && { + channelSource: releaseContext.channel_source as ChannelSource, + }), + // Root → 'main' base, no merges (omit so today's off-main behavior + // is unchanged). Linear → predecessor branch. Diamond → main + merges. + ...(selection.shape !== 'root' && { baseBranch: selection.base_branch }), + ...(selection.merge_branches.length > 0 && { mergeBranches: selection.merge_branches }), + createTaskCore, + now, + })); + } + return results; +} + +/** Pull task_id + branch_name out of a createTaskCore success body (best-effort). */ +function extractTaskIdAndBranch(body: string): { taskId: string; branchName: string } { + try { + const parsed = JSON.parse(body) as { + data?: { task_id?: string; branch_name?: string }; + task_id?: string; + branch_name?: string; + }; + return { + taskId: parsed.data?.task_id ?? parsed.task_id ?? '', + branchName: parsed.data?.branch_name ?? parsed.branch_name ?? '', + }; + } catch { + return { taskId: '', branchName: '' }; + } +} + +function isConditionalCheckFailed(err: unknown): boolean { + return ( + typeof err === 'object' + && err !== null + && 'name' in err + && (err as { name?: string }).name === 'ConditionalCheckFailedException' + ); +} diff --git a/cdk/src/handlers/shared/orchestration-restack.ts b/cdk/src/handlers/shared/orchestration-restack.ts new file mode 100644 index 00000000..bb8aeda5 --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-restack.ts @@ -0,0 +1,194 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * A6 re-stack planning (#305) — pure logic. + * + * When a predecessor sub-issue's PR branch changes after a dependent already + * merged it in, the dependent is STALE. This module computes WHICH dependents + * must be re-stacked and IN WHAT ORDER, given an orchestration snapshot and + * the sub-issue whose branch changed. No I/O — the handler does the GitHub / + * DynamoDB / task-creation side effects using this plan. + * + * Rules: + * - Re-stack only the changed node's TRANSITIVE dependents (everything + * downstream that built on it, directly or indirectly). + * - A dependent is re-stackable only if it has actually started — it carries + * a ``child_task_id`` + ``child_branch_name`` (released). A still-``blocked`` + * dependent will pick up the new predecessor code when it is first + * released (A4), so it needs no re-stack. + * - Order dependents in topological order (a dependent is re-stacked only + * after the predecessors it merges have been re-stacked), so each re-stack + * merges already-current predecessor branches. + * - The changed node itself is NOT re-stacked (its own branch is what changed). + */ + +import type { OrchestrationChildRow } from './orchestration-store'; + +/** A single dependent to re-stack, with the predecessor branches to merge in. */ +export interface RestackStep { + /** The dependent sub-issue row to re-stack. */ + readonly child: OrchestrationChildRow; + /** + * The branches to merge into the dependent's branch — its predecessors' + * CURRENT head branches (the changed node's branch + any sibling + * predecessor branches the dependent also depends on). The agent merges + * these into the existing dependent branch. + */ + readonly mergeBranches: readonly string[]; +} + +const RELEASED_OR_TERMINAL: ReadonlySet<string> = new Set([ + 'released', 'succeeded', 'failed', 'skipped', +]); + +/** + * Compute the ordered re-stack plan for ``changedSubIssueId``. + * + * @param children the orchestration's child rows (full snapshot, excl. meta) + * @param changedSubIssueId the sub-issue whose PR branch changed + * @returns dependents to re-stack, in topological order; empty if none + * (the changed node has no started dependents, or isn't in the graph). + */ +export function planRestack( + children: readonly OrchestrationChildRow[], + changedSubIssueId: string, +): readonly RestackStep[] { + const byId = new Map(children.map((c) => [c.sub_issue_id, c])); + if (!byId.has(changedSubIssueId)) return []; + + // ── 1. Transitive dependents of the changed node (BFS down the DAG). ── + // successorsOf[x] = nodes that depend ON x. + const successors = new Map<string, string[]>(); + for (const c of children) { + for (const dep of c.depends_on) { + (successors.get(dep) ?? successors.set(dep, []).get(dep)!).push(c.sub_issue_id); + } + } + const affected = new Set<string>(); + const queue = [...(successors.get(changedSubIssueId) ?? [])]; + while (queue.length > 0) { + const id = queue.shift()!; + if (affected.has(id)) continue; + affected.add(id); + for (const next of successors.get(id) ?? []) queue.push(next); + } + + // ── 2. Keep only dependents that have STARTED (have a branch to re-stack). + // A blocked dependent will see the new code when it is first released. + const restackable = [...affected] + .map((id) => byId.get(id)!) + .filter((c) => c.child_branch_name && RELEASED_OR_TERMINAL.has(c.child_status)); + + // ── 3. Topological order over the affected sub-graph, so a dependent is + // re-stacked after the predecessors it will merge. Kahn over edges among + // the restackable set (+ the changed node as the always-ready source). + const inScope = new Set(restackable.map((c) => c.sub_issue_id)); + const ordered = topoOrder(restackable, inScope); + + // ── 4. For each, the branches to merge = its predecessors' current head + // branches that are in scope (the changed node + affected predecessors). + // The changed node's own branch is included so direct dependents re-merge it. + return ordered.map((child) => { + const mergeBranches = child.depends_on + .filter((dep) => dep === changedSubIssueId || inScope.has(dep)) + .map((dep) => byId.get(dep)?.child_branch_name) + .filter((b): b is string => Boolean(b)); + return { child, mergeBranches }; + }).filter((step) => step.mergeBranches.length > 0); +} + +/** + * Plan the re-stack of a changed node's DIRECT (one-hop) started dependents. + * + * Used by the reconciler-driven cascade (#247 A6 redesign): when an + * iteration/restack task on node X completes, we re-stack only the children + * that depend DIRECTLY on X — each of those, when ITS restack task completes, + * re-fires the reconciler and cascades to ITS dependents. Doing one hop per + * completion (rather than ``planRestack``'s whole transitive set at once) is + * what keeps a chain correct: C must re-stack only AFTER B's branch carries + * the new code, not racing B's restack task. + * + * A direct dependent is re-stackable only if it has STARTED (released/terminal + * with a branch). Its merge-list is its predecessors' current head branches — + * the changed node + any sibling predecessors that have a branch — so a + * diamond fan-in re-merges every arm it depends on. + * + * @param children full orchestration child snapshot (excl. meta) + * @param changedSubIssueId the node whose branch just changed + * @returns the direct dependents to re-stack now (deterministic by id); empty + * if none have started. + */ +export function planDirectRestack( + children: readonly OrchestrationChildRow[], + changedSubIssueId: string, +): readonly RestackStep[] { + const byId = new Map(children.map((c) => [c.sub_issue_id, c])); + if (!byId.has(changedSubIssueId)) return []; + + const directDependents = children + .filter((c) => c.depends_on.includes(changedSubIssueId)) + .filter((c) => c.child_branch_name && RELEASED_OR_TERMINAL.has(c.child_status)) + .sort((a, b) => a.sub_issue_id.localeCompare(b.sub_issue_id)); + + return directDependents + .map((child) => { + // Merge every predecessor that currently has a branch — the changed + // node plus any sibling predecessors (so a diamond fan-in re-merges all + // arms, not just the one that changed). + const mergeBranches = child.depends_on + .map((dep) => byId.get(dep)?.child_branch_name) + .filter((b): b is string => Boolean(b)); + return { child, mergeBranches }; + }) + .filter((step) => step.mergeBranches.length > 0); +} + +/** Kahn's algorithm over the in-scope sub-graph (deterministic by id). */ +function topoOrder( + nodes: readonly OrchestrationChildRow[], + inScope: ReadonlySet<string>, +): readonly OrchestrationChildRow[] { + const byId = new Map(nodes.map((c) => [c.sub_issue_id, c])); + const indeg = new Map<string, number>(); + for (const c of nodes) { + indeg.set(c.sub_issue_id, c.depends_on.filter((d) => inScope.has(d)).length); + } + const ready = nodes + .filter((c) => (indeg.get(c.sub_issue_id) ?? 0) === 0) + .map((c) => c.sub_issue_id) + .sort(); + const out: OrchestrationChildRow[] = []; + while (ready.length > 0) { + const id = ready.shift()!; + out.push(byId.get(id)!); + // decrement successors within scope + for (const c of nodes) { + if (c.depends_on.includes(id)) { + const d = (indeg.get(c.sub_issue_id) ?? 0) - 1; + indeg.set(c.sub_issue_id, d); + if (d === 0) { + ready.push(c.sub_issue_id); + ready.sort(); + } + } + } + } + return out; +} diff --git a/cdk/src/handlers/shared/orchestration-rollup.ts b/cdk/src/handlers/shared/orchestration-rollup.ts new file mode 100644 index 00000000..40e73745 --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-rollup.ts @@ -0,0 +1,586 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Parent rollup comments for Linear orchestration (#247 A5). + * + * The fan-out plane (#243) posts a final-status comment on each CHILD's + * sub-issue. The PARENT issue has no task, so its aggregate rollup is + * posted here, by the reconciler, which already holds the orchestration + * snapshot. The comment renderer is pure (unit-testable); ``postRollup`` + * wraps ``postIssueComment`` best-effort (a failed Linear comment must + * never fail the reconcile — gating is the source of truth). + */ + +import { + EMOJI_FAILURE, + EMOJI_SUCCESS, + type LinearFeedbackContext, + postIssueComment, + swapIssueReaction, + transitionIssueState, + upsertStatusComment, +} from './linear-feedback'; +import { logger } from './logger'; +import { isIntegrationNode } from './orchestration-integration-node'; +import { ORCH_LOG } from './orchestration-log-events'; +import type { OrchestrationChildRow } from './orchestration-store'; +import { encodeMarkdownUrl } from './screenshot-url'; +import type { ChannelSource } from './types'; + +/** Which rollup we're posting — drives the heading + emoji. */ +export type RollupKind = 'complete' | 'partial_failure' | 'cancelled'; + +export interface RollupChildView { + readonly sub_issue_id: string; + readonly linear_identifier?: string; + readonly title?: string; + readonly child_status: string; + readonly child_task_id?: string; + /** + * The child task's PR url, when one was opened (#323). Resolved by the + * reconciler from the TaskTable at rollup time (pr_url lands on the + * TaskRecord in a separate write from the status transition, so it is + * not persisted on the orchestration row). Rendered as a link on the + * child's line; the integration node's PR is additionally surfaced as a + * prominent callout (it is the fan-out's combined deliverable). + */ + readonly pr_url?: string; +} + +const STATUS_ICON: Record<string, string> = { + succeeded: '✅', + failed: '❌', + skipped: '⏭️', + released: '🔄', + ready: '🔄', + blocked: '⏳', +}; + +/** + * Render the parent rollup comment body (pure). Lists each child with its + * status, and a one-line summary. ``kind`` is derived by the caller from + * the terminal child statuses. + */ +export function renderRollupComment( + kind: RollupKind, + children: readonly RollupChildView[], +): string { + const counts = { succeeded: 0, failed: 0, skipped: 0 }; + for (const c of children) { + if (c.child_status === 'succeeded') counts.succeeded += 1; + else if (c.child_status === 'failed') counts.failed += 1; + else if (c.child_status === 'skipped') counts.skipped += 1; + } + + const heading = + kind === 'complete' + ? '✅ **ABCA orchestration complete**' + : kind === 'cancelled' + ? '🛑 **ABCA orchestration cancelled**' + : '⚠️ **ABCA orchestration finished with failures**'; + + const lines = [...children] + .sort((a, b) => (a.linear_identifier ?? a.sub_issue_id).localeCompare(b.linear_identifier ?? b.sub_issue_id)) + .map((c) => { + const icon = STATUS_ICON[c.child_status] ?? '•'; + const label = c.linear_identifier + ? (c.title ? `${c.linear_identifier}: ${c.title}` : c.linear_identifier) + : (c.title ?? c.sub_issue_id); + // #323: append the child's PR link when one was opened, so the parent + // rollup is a single place to reach every sub-issue's PR. + const pr = c.pr_url ? ` — [PR](${c.pr_url})` : ''; + return `- ${icon} ${label} — ${c.child_status}${pr}`; + }); + + const summary = `${counts.succeeded} succeeded, ${counts.failed} failed, ${counts.skipped} skipped ` + + `(of ${children.length}).`; + + // #323: surface the integration node's combined PR as a prominent callout — + // it is the fan-out's single merged deliverable, and (being a synthetic node + // with no Linear sub-issue) it is otherwise unreachable from Linear. Only + // when the integration node actually opened a PR. + const integration = children.find((c) => isIntegrationNode(c.sub_issue_id) && c.pr_url); + const callout = integration + ? ['', `🔗 **Combined PR (all sub-issues merged):** [${integration.pr_url}](${integration.pr_url})`] + : []; + + return [heading, '', summary, ...callout, '', ...lines].join('\n'); +} + +/** + * Render the LIVE status block (pure) — the single edit-in-place comment on + * the parent epic that answers "where are we" during a running + * orchestration (#247 UX, #3). Posted at seed and re-rendered + edited on + * every child transition, so the parent shows current progress without a + * comment stream. Once all children are terminal the reconciler replaces + * the body with the final {@link renderRollupComment}, so this block is the + * in-flight view only. + * + * Per-child line shows the same icons as the rollup (running/blocked/done/ + * failed/skipped) plus the child's PR link when known. + */ +export function renderStatusBlock(children: readonly RollupChildView[]): string { + const terminal = (s: string) => s === 'succeeded' || s === 'failed' || s === 'skipped'; + const done = children.filter((c) => terminal(c.child_status)).length; + + const heading = `🔄 **ABCA orchestration** · ${done}/${children.length} complete`; + + const lines = [...children] + .sort((a, b) => (a.linear_identifier ?? a.sub_issue_id).localeCompare(b.linear_identifier ?? b.sub_issue_id)) + .map((c) => { + const icon = STATUS_ICON[c.child_status] ?? '•'; + const label = c.linear_identifier + ? (c.title ? `${c.linear_identifier}: ${c.title}` : c.linear_identifier) + : (c.title ?? c.sub_issue_id); + // Human-friendly status words for the in-flight view. + const word = + c.child_status === 'released' || c.child_status === 'ready' ? 'running' + : c.child_status === 'blocked' ? 'blocked' + : c.child_status; + // #323: link the PR as soon as it is known, even mid-run. + const pr = c.pr_url ? ` — [PR](${c.pr_url})` : ''; + return `- ${icon} ${label} — ${word}${pr}`; + }); + + return [heading, '', ...lines, '', '_Updates live as sub-issues progress._'].join('\n'); +} + +// ─────────────────────────────────────────────────────────────────────────── +// #247 UX redesign: the single MATURING panel comment. Supersedes the +// separate renderStatusBlock + renderRollupComment — ONE comment, edited in +// place, that shows the full DAG and matures from in-progress → complete and +// back to in-progress on an extend/revision. See project_247_ux_redesign. +// ─────────────────────────────────────────────────────────────────────────── + +/** Per-sub-issue view for the maturing panel — adds the 'updating' context the rollup/block can't express. */ +export interface EpicPanelRow { + readonly sub_issue_id: string; + readonly linear_identifier?: string; + readonly title?: string; + /** Persisted orchestration status: blocked | ready | released | succeeded | failed | skipped. */ + readonly child_status: string; + /** The sub-issue's current PR url, when one exists yet (omitted for a not-yet-PR'd first run). */ + readonly pr_url?: string; + /** + * When this row is being re-built by an in-flight cascade/iteration (its + * persisted status is still 'succeeded' but a new task is updating its PR), + * the human-readable reason — e.g. `per ABCA-289's "button doesn't work"` or + * `to include ABCA-289's change`. Present → the row renders as 🔄 updating. + */ + readonly updatingReason?: string; +} + +export interface EpicPanelParams { + readonly rows: readonly EpicPanelRow[]; + /** + * True when any sub-issue is non-terminal OR any row is mid-update + * (cascade in flight). Drives the in-progress header even when every + * persisted status is terminal (a revision re-opens the epic). + */ + readonly inProgress: boolean; + /** Combined/integration PR url (the fan-out's merged deliverable), when one exists. */ + readonly combinedPrUrl?: string; + /** Combined preview screenshot url, embedded in the panel (auto-refreshes; no separate comment). */ + readonly combinedScreenshotUrl?: string; + /** + * Live deploy-preview URL the combined screenshot was captured from (#247 + * UX.17). When present, the embedded combined preview becomes a clickable + * deep-link to the running combined site. Ignored unless + * ``combinedScreenshotUrl`` is also set. + */ + readonly combinedPreviewUrl?: string; +} + +const PANEL_FOOTER = '_One live panel — updates in place as the epic progresses; no comment stream._'; + +/** + * Truncate a quoted comment for the "updating per …" row, keeping it short. + * Exported so the caller (reconciler) builds the ``updatingReason`` string — + * e.g. ``per ABCA-289's "${truncateQuote(commentBody)}"``. + */ +export function truncateQuote(s: string, max = 40): string { + const oneLine = s.replace(/\s+/g, ' ').trim(); + return oneLine.length <= max ? oneLine : `${oneLine.slice(0, max - 1)}…`; +} + +/** + * SHORT friendly name for a node, used where a node is NAMED inside prose (e.g. + * the cascade reason "updating to include <X>'s change"). The integration node + * gets the friendly "the integration" rather than its raw stored title, so a + * possessive reads cleanly ("the integration's change") instead of leaking the + * clumsy synthetic title. Prefers the Linear identifier (ABCA-42) for real + * nodes. (#247 — live-caught under the UX.6 stress test.) + */ +export function cascadeNodeLabel( + subIssueId: string, + linearIdentifier?: string, + title?: string, +): string { + if (isIntegrationNode(subIssueId)) return 'the integration'; + return linearIdentifier ?? title ?? 'a predecessor'; +} + +/** Friendly label for a row — Linear identifier + title, or 'Integration — combined result' for the synthetic node. */ +function panelLabel(row: EpicPanelRow): string { + if (isIntegrationNode(row.sub_issue_id)) return 'Integration — combined result'; + if (row.linear_identifier) return row.title ? `${row.linear_identifier}: ${row.title}` : row.linear_identifier; + return row.title ?? row.sub_issue_id; +} + +/** + * Render the single maturing epic panel (pure). Edited in place on every event + * (seed/run/extend/revision/complete). Rules: + * - PR link shown ONLY when a PR exists (a first run mid-flight has none). + * - A row with ``updatingReason`` renders as `🔄 … — updating <reason> — [PR]` + * even though its persisted status is still succeeded. + * - Header: in-progress → `🔄 N/M complete`; all settled → `✅ complete` or + * `⚠️ finished with failures`. ``inProgress`` forces 🔄 (a revision re-opens). + * - Integration node renders friendly; never a raw id. + * - Combined PR callout + embedded combined screenshot when present. + */ +export function renderEpicPanel(params: EpicPanelParams): string { + const { rows, inProgress, combinedPrUrl, combinedScreenshotUrl, combinedPreviewUrl } = params; + const terminal = (s: string) => s === 'succeeded' || s === 'failed' || s === 'skipped'; + // "done" counts settled rows that are NOT mid-update (an updating row is back in flight). + const done = rows.filter((r) => terminal(r.child_status) && !r.updatingReason).length; + const anyBad = rows.some((r) => r.child_status === 'failed' || r.child_status === 'skipped'); + + let heading: string; + if (inProgress) { + heading = `🔄 **ABCA orchestration** · ${done}/${rows.length} complete`; + } else if (anyBad) { + heading = '⚠️ **ABCA orchestration finished with failures**'; + } else { + heading = '✅ **ABCA orchestration complete**'; + } + + const lines = [...rows] + .sort((a, b) => (a.linear_identifier ?? a.sub_issue_id).localeCompare(b.linear_identifier ?? b.sub_issue_id)) + .map((r) => { + const label = panelLabel(r); + const pr = r.pr_url ? ` — [PR](${r.pr_url})` : ''; + // A mid-update row: 🔄 + the reason, regardless of persisted status. + if (r.updatingReason) { + return `- 🔄 ${label} — updating ${r.updatingReason}${pr}`; + } + const icon = STATUS_ICON[r.child_status] ?? '•'; + const word = + r.child_status === 'released' || r.child_status === 'ready' ? 'running' + : r.child_status === 'blocked' ? 'blocked' + : r.child_status; + return `- ${icon} ${label} — ${word}${pr}`; + }); + + const callout = combinedPrUrl + ? ['', `🔗 **Combined PR (all sub-issues merged):** [${combinedPrUrl}](${combinedPrUrl})`] + : []; + // #247 UX.17: when we know the live preview-deploy URL, render the embedded + // screenshot as a clickable linked image + a plain "Open the combined + // preview" link, so a reviewer can open the running combined site, not just + // see a static PNG. The preview URL is payload-derived (came from the deploy + // webhook) — percent-encode its parens so a crafted path can't break out of + // the markdown link. The CloudFront screenshot URL is our own key (no + // parens) so it's interpolated as-is. + let shot: string[] = []; + if (combinedScreenshotUrl) { + if (combinedPreviewUrl) { + const safePreview = encodeMarkdownUrl(combinedPreviewUrl); + shot = [ + '', + '🖼️ **Combined preview**', + '', + `[![combined preview](${combinedScreenshotUrl})](${safePreview})`, + '', + `[Open the combined preview](${safePreview})`, + ]; + } else { + shot = ['', '🖼️ **Combined preview**', '', `![combined preview](${combinedScreenshotUrl})`]; + } + } + + return [heading, '', ...lines, ...callout, ...shot, '', PANEL_FOOTER].join('\n'); +} + +/** + * Decide the rollup kind from the (terminal) child statuses. + * - any failed/skipped → partial_failure + * - all succeeded → complete + * (cancelled is passed explicitly by the cancel path, not derived here) + */ +export function rollupKindFromChildren(children: readonly RollupChildView[]): RollupKind { + const anyBad = children.some((c) => c.child_status === 'failed' || c.child_status === 'skipped'); + return anyBad ? 'partial_failure' : 'complete'; +} + +/** + * Build the {@link EpicPanelRow}s for a snapshot's children (#247 UX.2). Maps + * the persisted child rows + a ``sub_issue_id → pr_url`` map + an optional + * ``sub_issue_id → updatingReason`` map (rows a cascade is rebuilding) into the + * panel view. Pure. + */ +export function buildPanelRows( + children: readonly OrchestrationChildRow[], + prUrls: Readonly<Record<string, string>> = {}, + updating: Readonly<Record<string, string>> = {}, +): EpicPanelRow[] { + return children.map((c) => ({ + sub_issue_id: c.sub_issue_id, + ...(c.linear_identifier !== undefined && { linear_identifier: c.linear_identifier }), + ...(c.title !== undefined && { title: c.title }), + child_status: c.child_status, + ...(prUrls[c.sub_issue_id] !== undefined && { pr_url: prUrls[c.sub_issue_id] }), + ...(updating[c.sub_issue_id] !== undefined && { updatingReason: updating[c.sub_issue_id] }), + })); +} + +export interface UpsertEpicPanelParams { + readonly ctx: LinearFeedbackContext; + readonly parentLinearIssueId: string; + /** Existing panel comment id (status_comment_id). When absent, a fresh comment is posted + the id returned. */ + readonly statusCommentId?: string; + readonly children: readonly OrchestrationChildRow[]; + readonly prUrls?: Readonly<Record<string, string>>; + /** sub_issue_id → human reason, for rows a cascade is currently rebuilding. */ + readonly updating?: Readonly<Record<string, string>>; + readonly combinedPrUrl?: string; + readonly combinedScreenshotUrl?: string; + /** Live preview-deploy URL the combined screenshot was captured from (#247 UX.17). */ + readonly combinedPreviewUrl?: string; + /** + * Whether the epic is in progress. When omitted, derived: in progress iff any + * child is non-terminal OR any row has an updating reason. Pass explicitly to + * force (e.g. a revision just started → still in progress even if all + * persisted statuses are terminal). + */ + readonly inProgress?: boolean; + /** + * When true AND the epic is settled, mirror the outcome on the PARENT issue: + * advance state In Review (complete) / leave (failures) + swap reaction to + * ✅/❌. When in progress, revert: state → In Progress + reaction → 👀. Only + * for the Linear channel. Default true. + */ + readonly mirrorParentState?: boolean; + /** Trigger channel; non-'linear' makes this a logged no-op (other planes unwired). */ + readonly channelSource?: ChannelSource; +} + +/** + * Render + upsert the single maturing epic panel, and (optionally) mirror the + * outcome on the parent issue's state + reaction (#247 UX.2). The ONE place + * the parent panel is written — replaces the old renderStatusBlock-edit + + * postRollup + standalone notes. Returns the panel comment id (new or existing), + * or null on a non-linear channel / failure. + * + * - Edits ``statusCommentId`` in place when given; else posts a fresh comment. + * - Header/rows via {@link renderEpicPanel}; ``inProgress`` derived if omitted. + * - On settle (not in progress): advance parent state→In Review (clean) + ✅; + * on failures, leave state + ❌. On in-progress (a revision re-opened it): + * revert state→In Progress + reaction→👀. Sequential calls (each fans out + * into multiple Linear reads) to avoid self-throttling the 5s timeout. + * Best-effort: a Linear hiccup never throws out of the reconcile. + */ +export async function upsertEpicPanel(params: UpsertEpicPanelParams): Promise<string | null> { + const channelSource = params.channelSource ?? 'linear'; + if (channelSource !== 'linear') { + logger.info('Epic panel skipped — channel has no wired plane', { + parent_linear_issue_id: params.parentLinearIssueId, channel_source: channelSource, + }); + return null; + } + const rows = buildPanelRows(params.children, params.prUrls ?? {}, params.updating ?? {}); + const terminal = (s: string) => s === 'succeeded' || s === 'failed' || s === 'skipped'; + const inProgress = params.inProgress + ?? rows.some((r) => !terminal(r.child_status) || r.updatingReason !== undefined); + const body = renderEpicPanel({ + rows, + inProgress, + ...(params.combinedPrUrl !== undefined && { combinedPrUrl: params.combinedPrUrl }), + ...(params.combinedScreenshotUrl !== undefined && { combinedScreenshotUrl: params.combinedScreenshotUrl }), + ...(params.combinedPreviewUrl !== undefined && { combinedPreviewUrl: params.combinedPreviewUrl }), + }); + + let commentId: string | null; + try { + if (params.statusCommentId) { + commentId = await upsertStatusComment(params.ctx, params.parentLinearIssueId, body, params.statusCommentId); + } else { + // Post a fresh comment and capture its id (upsertStatusComment with no id creates + returns it). + commentId = await upsertStatusComment(params.ctx, params.parentLinearIssueId, body); + } + } catch (err) { + logger.warn('Epic panel upsert threw (non-fatal)', { + parent_linear_issue_id: params.parentLinearIssueId, + error: err instanceof Error ? err.message : String(err), + }); + return null; + } + + // Mirror parent state + reaction. Sequential (each fans out into several + // Linear graphql reads; firing together self-throttles the 5s timeout). + if (params.mirrorParentState !== false) { + const anyBad = rows.some((r) => r.child_status === 'failed' || r.child_status === 'skipped'); + try { + if (inProgress) { + // Re-opened (or running): back to In Progress + 👀. + await transitionIssueState(params.ctx, params.parentLinearIssueId, 'started', ['In Progress']); + await swapIssueReaction(params.ctx, params.parentLinearIssueId, 'eyes'); + } else if (!anyBad) { + // Clean completion: work done, awaiting human merge → In Review + ✅. + await transitionIssueState(params.ctx, params.parentLinearIssueId, 'started', ['In Review']); + await swapIssueReaction(params.ctx, params.parentLinearIssueId, EMOJI_SUCCESS); + } else { + // Finished with failures: leave state; ❌ reaction conveys it. + await swapIssueReaction(params.ctx, params.parentLinearIssueId, EMOJI_FAILURE); + } + } catch (err) { + logger.warn('Epic panel parent-state mirror failed (non-fatal)', { + parent_linear_issue_id: params.parentLinearIssueId, + error: err instanceof Error ? err.message : String(err), + }); + } + } + return commentId; +} + +export interface PostRollupParams { + readonly ctx: LinearFeedbackContext; + readonly orchestrationId: string; + readonly parentLinearIssueId: string; + readonly kind: RollupKind; + readonly children: readonly OrchestrationChildRow[]; + /** + * The orchestration's trigger channel. Defaults to ``'linear'`` — the only + * wired rollup plane today. #247 trigger-agnostic seam: a future + * GitHub/Slack/Jira trigger dispatches its own parent-rollup here (open a + * tracking comment / update an epic) instead of the Linear comment + + * state-transition + reaction below. An unrecognised channel is a logged + * no-op so a mis-seeded orchestration never throws out of the reconciler. + */ + readonly channelSource?: ChannelSource; + /** + * #247 #3: the live status-block comment id stamped at seed. When set, the + * final rollup EDITS that comment in place (one comment for the whole run, + * no stream). When absent (seed-time create failed, or an older + * orchestration), the rollup posts a fresh comment. + */ + readonly statusCommentId?: string; + /** + * #323: ``sub_issue_id → pr_url`` for children that opened a PR. Supplied + * by the reconciler (batch-read from the TaskTable at rollup time, when + * pr_urls have settled). Threaded into the rendered comment as per-child + * links + the integration node's combined-PR callout. Absent/partial is + * fine — a missing entry just renders no link. + */ + readonly prUrls?: Readonly<Record<string, string>>; +} + +/** + * Post the parent rollup comment. Best-effort: never throws; logs a + * stable event on both success and failure so automated tests can assert + * on ``orch.rollup.posted`` / ``orch.rollup.failed``. + */ +export async function postRollup(params: PostRollupParams): Promise<boolean> { + const { ctx, orchestrationId, parentLinearIssueId, kind, children, statusCommentId } = params; + const channelSource = params.channelSource ?? 'linear'; + + // #247 trigger-agnostic dispatch. Only the Linear plane is wired today; + // other channels are an explicit logged no-op (the DAG executor + + // gating already ran channel-agnostically — only the parent feedback is + // channel-specific). A new trigger adds its branch here. + if (channelSource !== 'linear') { + logger.info('Parent rollup skipped — channel has no wired rollup plane', { + event: ORCH_LOG.rollupFailed, + orchestration_id: orchestrationId, + channel_source: channelSource, + rollup_kind: kind, + }); + return false; + } + const prUrls = params.prUrls ?? {}; + const body = renderRollupComment( + kind, + children.map((c) => ({ + sub_issue_id: c.sub_issue_id, + ...(c.linear_identifier !== undefined && { linear_identifier: c.linear_identifier }), + ...(c.title !== undefined && { title: c.title }), + child_status: c.child_status, + ...(c.child_task_id !== undefined && { child_task_id: c.child_task_id }), + ...(prUrls[c.sub_issue_id] !== undefined && { pr_url: prUrls[c.sub_issue_id] }), + })), + ); + + let ok = false; + try { + // #247 #3: edit the live status block into the final rollup when we have + // its id (one comment for the whole run); else post a fresh comment. + if (statusCommentId) { + ok = (await upsertStatusComment(ctx, parentLinearIssueId, body, statusCommentId)) !== null; + } else { + // postIssueComment now returns a LinearPostResult (upstream #311/#332). + ok = (await postIssueComment(ctx, parentLinearIssueId, body)).ok; + } + } catch (err) { + logger.warn('Parent rollup comment threw (non-fatal)', { + event: ORCH_LOG.rollupFailed, + orchestration_id: orchestrationId, + parent_linear_issue_id: parentLinearIssueId, + rollup_kind: kind, + error: err instanceof Error ? err.message : String(err), + }); + return false; + } + + if (ok) { + logger.info('Parent rollup comment posted', { + event: ORCH_LOG.rollupPosted, + orchestration_id: orchestrationId, + parent_linear_issue_id: parentLinearIssueId, + rollup_kind: kind, + child_count: children.length, + }); + + // Mirror the child sub-issues' status signal on the PARENT epic: + // - state: on a clean 'complete', advance to In Review (work done, child + // PRs awaiting human merge — NOT Done, since nothing is merged). On a + // partial_failure / cancelled rollup, leave the state in place (the + // comment + ❌ reaction already convey the outcome). + // - reaction: SWAP the seed 👀 for ✅ (complete) / ❌ (otherwise) so the + // parent shows exactly ONE marker at a time, like the children. + // Run SEQUENTIALLY, not concurrently: the state transition (a team-states + // query) and the reaction swap (reactions query + deletes + create) each + // fan out into multiple Linear calls. Firing them together — on top of + // the rollup comment edit just above — self-throttled the 5s-timeout + // graphql reads, so the states query aborted and the transition silently + // no-op'd (parent stuck In Progress). Serialising keeps each read under + // its own budget. Both best-effort; a hiccup never suppresses the rollup. + if (kind === 'complete') { + await transitionIssueState(ctx, parentLinearIssueId, 'started', ['In Review']); + } + await swapIssueReaction(ctx, parentLinearIssueId, kind === 'complete' ? EMOJI_SUCCESS : EMOJI_FAILURE); + } else { + logger.warn('Parent rollup comment post returned false', { + event: ORCH_LOG.rollupFailed, + orchestration_id: orchestrationId, + parent_linear_issue_id: parentLinearIssueId, + rollup_kind: kind, + }); + } + return ok; +} diff --git a/cdk/src/handlers/shared/orchestration-store.ts b/cdk/src/handlers/shared/orchestration-store.ts new file mode 100644 index 00000000..f81c317b --- /dev/null +++ b/cdk/src/handlers/shared/orchestration-store.ts @@ -0,0 +1,654 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Persistence for the orchestration DAG (issue #247, Mode A — PR A2). + * Writes one row per sub-issue to ``OrchestrationTable`` (PK + * ``orchestration_id``, SK ``sub_issue_id``) after the graph has been + * fetched (``linear-subissue-fetch``) and validated + * (``orchestration-dag``). + * + * Idempotency (AC: idempotent on webhook replay): the + * ``orchestration_id`` is *derived deterministically* from the parent + * Linear issue id (not random), and rows are written with a + * ``attribute_not_exists`` condition on first write. A replay of the + * same parent trigger therefore re-derives the same id and the + * conditional writes no-op instead of duplicating children. The + * reconciler (A3) owns child-status transitions; this module only seeds + * the initial ``blocked`` / ``ready`` rows. + */ + +import * as crypto from 'crypto'; +import { + type DynamoDBDocumentClient, + BatchWriteCommand, + GetCommand, + QueryCommand, + UpdateCommand, +} from '@aws-sdk/lib-dynamodb'; +import type { SubIssueNode } from './linear-subissue-fetch'; +import { logger } from './logger'; +import { validateDag } from './orchestration-dag'; +import { resolveEpicTip } from './orchestration-epic-tip'; + +/** Orchestration-local lifecycle marker on each sub-issue row. */ +export type ChildStatus = + | 'ready' // no predecessors / all predecessors succeeded — releasable + | 'blocked' // waiting on predecessors + | 'released' // child task created + | 'succeeded' + | 'failed' + | 'skipped'; // a predecessor failed; this child will never start + +/** One persisted sub-issue row. */ +export interface OrchestrationChildRow { + readonly orchestration_id: string; + readonly sub_issue_id: string; + readonly parent_linear_issue_id: string; + readonly linear_workspace_id: string; + readonly repo: string; + readonly depends_on: readonly string[]; + readonly child_status: ChildStatus; + /** + * The ABCA ``task_id`` created for this child once released. Stamped by + * ``releaseChild`` alongside the ``child_status → released`` flip; + * absent until the child is released. The ``ChildTaskIndex`` GSI is + * keyed on this so the reconciler resolves a terminal task back to its + * orchestration row. + */ + readonly child_task_id?: string; + /** + * The released child task's head branch (#247 A4). Persisted on the + * release flip so a DEPENDENT child can stack on / merge it. Absent + * until released. + */ + readonly child_branch_name?: string; + /** Linear human identifier, when known (e.g. ``ENG-42``). */ + readonly linear_identifier?: string; + /** Sub-issue title, used to build the child task description. */ + readonly title?: string; + readonly created_at: string; + readonly updated_at: string; + /** TTL epoch (seconds) for eventual cleanup. */ + readonly ttl?: number; +} + +/** + * Release context persisted on the parent-meta row so the reconciler can + * release downstream children WITHOUT re-resolving auth (the webhook + * already resolved the platform user + Linear OAuth at seed time). The + * reconciler runs off the TaskTable stream and has no Linear webhook + * payload to re-derive these from. + */ +export interface OrchestrationReleaseContext { + /** Platform user the children are attributed to (parent's submitter). */ + readonly platform_user_id: string; + /** + * The trigger channel that seeded this orchestration. Threaded onto child + * tasks (createTaskCore channelSource) and used by the reconciler to + * dispatch the parent rollup to the right plane. Defaults to ``'linear'`` + * when absent (back-compat: orchestrations seeded before this field + * existed, and the only wired trigger today). #247 trigger-agnostic seam: + * a future GitHub/Slack/Jira trigger seeds with its own source and the + * release + rollup paths follow it without code changes here. + */ + readonly channel_source?: string; + /** Linear OAuth secret ARN for the agent's outbound Linear MCP. */ + readonly linear_oauth_secret_arn?: string; + readonly linear_workspace_slug?: string; + readonly linear_project_id?: string; +} + +export interface SeedOrchestrationParams { + readonly ddb: DynamoDBDocumentClient; + readonly tableName: string; + readonly parentLinearIssueId: string; + readonly linearWorkspaceId: string; + readonly repo: string; + readonly children: readonly SubIssueNode[]; + /** ISO timestamp for created_at/updated_at (injected for testability). */ + readonly now: string; + /** Optional TTL epoch seconds. */ + readonly ttl?: number; + /** Release context stamped on the meta row for the reconciler. */ + readonly releaseContext: OrchestrationReleaseContext; +} + +export interface SeedOrchestrationResult { + readonly orchestrationId: string; + readonly rowsWritten: number; + /** True when an existing orchestration was found (replay) — no new rows. */ + readonly alreadyExisted: boolean; +} + +/** + * Deterministically derive the ``orchestration_id`` from the parent + * Linear issue id. Same parent → same id, which is what makes webhook + * replay idempotent. Prefixed + hashed so the id is opaque and + * fixed-length regardless of the Linear id format. + */ +/** Hex chars of the sha256 kept for the orchestration id (128 bits — ample to + * avoid collisions across a workspace's epics). */ +const ORCH_ID_HASH_HEX_LENGTH = 32; + +export function deriveOrchestrationId(parentLinearIssueId: string): string { + const hash = crypto.createHash('sha256').update(parentLinearIssueId).digest('hex').slice(0, ORCH_ID_HASH_HEX_LENGTH); + return `orch_${hash}`; +} + +/** DynamoDB BatchWriteItem hard limit: at most 25 put/delete requests per call. */ +const DDB_BATCH_WRITE_MAX_ITEMS = 25; + +/** Marker SK for the parent-meta row (sorts before any UUID sub_issue_id). */ +const PARENT_META_SK = '#meta'; + +/** + * Seed ``OrchestrationTable`` with one row per sub-issue plus a parent + * meta row. Idempotent: if the parent meta row already exists (replay), + * returns ``alreadyExisted: true`` and writes nothing. + * + * Initial ``child_status``: ``ready`` when ``depends_on`` is empty + * (a root — the reconciler releases these immediately), else + * ``blocked``. + */ +export async function seedOrchestration( + params: SeedOrchestrationParams, +): Promise<SeedOrchestrationResult> { + const { ddb, tableName, parentLinearIssueId, linearWorkspaceId, repo, children, now, ttl, releaseContext } = params; + const orchestrationId = deriveOrchestrationId(parentLinearIssueId); + + // Idempotency gate: a prior run for this parent already seeded rows. + const existing = await ddb.send(new GetCommand({ + TableName: tableName, + Key: { orchestration_id: orchestrationId, sub_issue_id: PARENT_META_SK }, + })); + if (existing.Item) { + logger.info('Orchestration already seeded — skipping (idempotent replay)', { + orchestration_id: orchestrationId, + parent_linear_issue_id: parentLinearIssueId, + }); + return { orchestrationId, rowsWritten: 0, alreadyExisted: true }; + } + + const childRows: OrchestrationChildRow[] = children.map((c) => ({ + orchestration_id: orchestrationId, + sub_issue_id: c.id, + parent_linear_issue_id: parentLinearIssueId, + linear_workspace_id: linearWorkspaceId, + repo, + depends_on: c.depends_on, + child_status: c.depends_on.length === 0 ? 'ready' : 'blocked', + ...(c.identifier !== undefined && { linear_identifier: c.identifier }), + ...(c.title !== undefined && { title: c.title }), + created_at: now, + updated_at: now, + ...(ttl !== undefined && { ttl }), + })); + + const metaRow = { + orchestration_id: orchestrationId, + sub_issue_id: PARENT_META_SK, + parent_linear_issue_id: parentLinearIssueId, + linear_workspace_id: linearWorkspaceId, + repo, + child_count: children.length, + // Release context for the reconciler (downstream releases run off the + // TaskTable stream with no Linear webhook payload to re-derive these). + platform_user_id: releaseContext.platform_user_id, + ...(releaseContext.channel_source !== undefined && { + channel_source: releaseContext.channel_source, + }), + ...(releaseContext.linear_oauth_secret_arn !== undefined && { + linear_oauth_secret_arn: releaseContext.linear_oauth_secret_arn, + }), + ...(releaseContext.linear_workspace_slug !== undefined && { + linear_workspace_slug: releaseContext.linear_workspace_slug, + }), + ...(releaseContext.linear_project_id !== undefined && { + linear_project_id: releaseContext.linear_project_id, + }), + created_at: now, + updated_at: now, + ...(ttl !== undefined && { ttl }), + }; + + // BatchWrite in chunks of 25 (DDB limit). The meta row goes last so a + // partial failure can't leave a meta row claiming a fully-seeded + // orchestration when child rows are missing — a replay re-derives the + // same id, sees no meta row, and re-seeds. + const allRows: Array<Record<string, unknown>> = [ + ...childRows.map((r) => ({ ...r })), + { ...metaRow }, + ]; + let rowsWritten = 0; + for (let i = 0; i < allRows.length; i += DDB_BATCH_WRITE_MAX_ITEMS) { + const chunk = allRows.slice(i, i + DDB_BATCH_WRITE_MAX_ITEMS); + await ddb.send(new BatchWriteCommand({ + RequestItems: { + [tableName]: chunk.map((Item) => ({ PutRequest: { Item } })), + }, + })); + rowsWritten += chunk.length; + } + + logger.info('Orchestration seeded', { + orchestration_id: orchestrationId, + parent_linear_issue_id: parentLinearIssueId, + child_count: children.length, + rows_written: rowsWritten, + }); + + return { orchestrationId, rowsWritten, alreadyExisted: false }; +} + +/** Result of extending an already-seeded orchestration (#247 orchestration-extend). */ +export interface ExtendOrchestrationResult { + readonly orchestrationId: string; + /** Sub-issue ids newly ADDED to the DAG by this extend (empty if nothing new). */ + readonly addedSubIssueIds: readonly string[]; + /** + * Subset of ``addedSubIssueIds`` that are immediately releasable — their + * predecessors are all already ``succeeded`` (or they're new roots). The + * caller releases these now; the rest are ``blocked`` and the reconciler + * releases them as predecessors finish, exactly like seed-time children. + */ + readonly releasableSubIssueIds: readonly string[]; + /** Why an extend was rejected (cycle introduced by the new edges), if any. */ + readonly rejected?: { readonly reason: string; readonly message: string }; +} + +/** + * Extend an ALREADY-SEEDED orchestration with sub-issues added to the Linear + * epic after the first seed (#247 orchestration-extend). The seed path is + * idempotent (frozen at first seed) so a graph can't grow on its own; this is + * the additive counterpart, invoked when a labeled parent that already has an + * orchestration is re-triggered. + * + * Diffs the freshly-fetched ``graph`` against the persisted children: + * - existing nodes are LEFT UNTOUCHED (their status/branch/task are preserved + * — we never re-seed or reset a node that already ran), + * - genuinely-new nodes are validated (the augmented graph must stay acyclic), + * then added as ``ready`` (deps all already succeeded, or no deps) or + * ``blocked``, + * - the meta ``child_count`` is bumped. + * + * Idempotent: re-running with no new nodes is a no-op (empty result). A cycle + * introduced by the new edges rejects WITHOUT writing anything. + * + * @param graph the full current sub-issue node set (post-#16 augmentation), + * from the same source the seed used. + */ +export async function extendOrchestration(params: { + readonly ddb: DynamoDBDocumentClient; + readonly tableName: string; + readonly parentLinearIssueId: string; + readonly linearWorkspaceId: string; + readonly repo: string; + readonly graph: readonly SubIssueNode[]; + readonly now: string; + readonly ttl?: number; +}): Promise<ExtendOrchestrationResult> { + const { ddb, tableName, parentLinearIssueId, linearWorkspaceId, repo, graph, now, ttl } = params; + const orchestrationId = deriveOrchestrationId(parentLinearIssueId); + + const snapshot = await loadOrchestration(ddb, tableName, orchestrationId); + if (!snapshot) { + // No existing orchestration — caller should have seeded, not extended. + return { orchestrationId, addedSubIssueIds: [], releasableSubIssueIds: [] }; + } + + const existingIds = new Set(snapshot.children.map((c) => c.sub_issue_id)); + const newNodes = graph.filter((n) => !existingIds.has(n.id)); + if (newNodes.length === 0) { + return { orchestrationId, addedSubIssueIds: [], releasableSubIssueIds: [] }; + } + + // Validate the AUGMENTED graph (existing + new) — adding nodes/edges must not + // introduce a cycle or a dangling edge. Reject without writing if it does. + const validation = validateDag(graph.map((n) => ({ id: n.id, depends_on: n.depends_on }))); + if (!validation.ok) { + logger.warn('Orchestration extend rejected — augmented graph invalid', { + orchestration_id: orchestrationId, reason: validation.reason, + }); + return { + orchestrationId, + addedSubIssueIds: [], + releasableSubIssueIds: [], + rejected: { reason: validation.reason, message: validation.message }, + }; + } + + // #247 UX.4: a new node with NO declared dependency must NOT branch off bare + // main — it inherits the epic's accumulated unmerged work by stacking on the + // epic TIP (the existing leaf frontier). We inject that as a synthetic + // ``depends_on`` so the existing A4 gating + base-branch stacking treat it + // like any other dependent; "fall back to main only when merged" is handled + // downstream by the agent's base-fetch fallback. Nodes that DECLARED a + // dependency keep their explicit edges (user intent wins over the tip). + const epicTip = resolveEpicTip(snapshot.children); + const withImplicitDeps = newNodes.map((n) => ({ + node: n, + // Only unconstrained new nodes inherit the tip; and never self-depend + // (the tip is computed from EXISTING nodes, so a new id can't appear). + depends_on: n.depends_on.length > 0 ? n.depends_on : epicTip, + })); + + // A node is immediately releasable iff every predecessor is already + // ``succeeded`` (or it has none). Predecessors may be existing (check their + // persisted status) or other new nodes (not succeeded yet → blocked). + const succeeded = new Set( + snapshot.children.filter((c) => c.child_status === 'succeeded').map((c) => c.sub_issue_id), + ); + const releasable = new Set<string>(); + const newRows: OrchestrationChildRow[] = withImplicitDeps.map(({ node: n, depends_on }) => { + const allDepsSucceeded = depends_on.every((d) => succeeded.has(d)); + if (allDepsSucceeded) releasable.add(n.id); + return { + orchestration_id: orchestrationId, + sub_issue_id: n.id, + parent_linear_issue_id: parentLinearIssueId, + linear_workspace_id: linearWorkspaceId, + repo, + depends_on, + child_status: allDepsSucceeded ? 'ready' : 'blocked', + ...(n.identifier !== undefined && { linear_identifier: n.identifier }), + ...(n.title !== undefined && { title: n.title }), + created_at: now, + updated_at: now, + ...(ttl !== undefined && { ttl }), + }; + }); + + // Persist new child rows (chunks of 25), then bump meta child_count. + for (let i = 0; i < newRows.length; i += DDB_BATCH_WRITE_MAX_ITEMS) { + const chunk = newRows.slice(i, i + DDB_BATCH_WRITE_MAX_ITEMS); + await ddb.send(new BatchWriteCommand({ + RequestItems: { [tableName]: chunk.map((Item) => ({ PutRequest: { Item } })) }, + })); + } + // Bump child_count AND clear rollup_posted_at: if this epic had ALREADY + // reached all-terminal and posted its rollup, adding a node re-opens it. + // Clearing the claim lets the reconciler re-settle the parent state to + // complete (re-claim) once the new node finishes — without this, a + // post-completion addition would leave the epic stuck "in progress" forever + // (#247 UX.4 concurrency: mid-flight additions to a finished epic). + await ddb.send(new UpdateCommand({ + TableName: tableName, + Key: { orchestration_id: orchestrationId, sub_issue_id: PARENT_META_SK }, + UpdateExpression: 'SET child_count = :n, updated_at = :now REMOVE rollup_posted_at', + ExpressionAttributeValues: { ':n': snapshot.children.length + newRows.length, ':now': now }, + })); + + logger.info('Orchestration extended', { + orchestration_id: orchestrationId, + parent_linear_issue_id: parentLinearIssueId, + added: newRows.length, + releasable: releasable.size, + added_ids: newRows.map((r) => r.sub_issue_id), + }); + + return { + orchestrationId, + addedSubIssueIds: newRows.map((r) => r.sub_issue_id), + releasableSubIssueIds: [...releasable], + }; +} + +/** + * Claim the right to post the parent rollup comment exactly once (#247 + * A5). The orchestration can reach "all children terminal" on more than + * one TaskTable-stream event (the last child's record often gets two + * MODIFYs — e.g. status→COMPLETED then pr_url/build_passed written — both + * observing all-terminal), which without a guard posts the rollup twice. + * + * Conditionally stamps ``rollup_posted_at`` on the parent-meta row. The + * first caller wins (returns true → post the comment); a racing/repeat + * caller loses the conditional write (returns false → skip). Mirrors the + * release-flip idempotency pattern. + */ +export async function claimRollup( + ddb: DynamoDBDocumentClient, + tableName: string, + orchestrationId: string, + now: string, +): Promise<boolean> { + try { + await ddb.send(new UpdateCommand({ + TableName: tableName, + Key: { orchestration_id: orchestrationId, sub_issue_id: PARENT_META_SK }, + UpdateExpression: 'SET rollup_posted_at = :now', + ConditionExpression: 'attribute_not_exists(rollup_posted_at)', + ExpressionAttributeValues: { ':now': now }, + })); + return true; + } catch (err) { + if ((err as { name?: string })?.name === 'ConditionalCheckFailedException') return false; + throw err; + } +} + +/** + * Release the once-only rollup claim so a RE-COMPLETING epic can re-settle its + * parent state (#247 — stress-caught). When an already-completed epic re-opens + * (a cascade/iteration revives it), the ``rollup_posted_at`` stamp from the + * FIRST completion would otherwise make {@link claimRollup} fail forever — so + * the panel body re-settles to ✅ but the parent reaction/state never re-mirror + * (stuck on 👀/In Progress). ``extendOrchestration`` already clears it on the + * extend path; the cascade re-open path must too. Best-effort; unconditional + * REMOVE (idempotent — a no-op when already absent). + */ +export async function clearRollupClaim( + ddb: DynamoDBDocumentClient, + tableName: string, + orchestrationId: string, + now: string, +): Promise<void> { + await ddb.send(new UpdateCommand({ + TableName: tableName, + Key: { orchestration_id: orchestrationId, sub_issue_id: PARENT_META_SK }, + UpdateExpression: 'SET updated_at = :now REMOVE rollup_posted_at', + ExpressionAttributeValues: { ':now': now }, + })); +} + +/** + * Claim the one-time "I responded to this comment" marker so a webhook + * REDELIVERY doesn't re-post (#247 UX.20 — live-caught spam). Linear redelivers + * a comment webhook when the handler exceeds its ~5s ack window; without a + * claim, the parent-epic disambiguation reply re-posted on every redelivery + * (50+ duplicates). Keyed on the orchestration + the triggering comment id, so + * the FIRST delivery wins and every redelivery is a no-op. The marker carries a + * TTL (the table's ``ttl`` attribute) so these rows self-expire — they're only + * needed for the redelivery window. Returns true only for the first caller. + * + * @param ttlEpochSeconds absolute epoch-seconds expiry for the marker row. + */ +export async function claimCommentAck( + ddb: DynamoDBDocumentClient, + tableName: string, + orchestrationId: string, + commentId: string, + now: string, + ttlEpochSeconds: number, +): Promise<boolean> { + try { + await ddb.send(new UpdateCommand({ + TableName: tableName, + Key: { orchestration_id: orchestrationId, sub_issue_id: `ack#${commentId}` }, + // attribute_not_exists on the PK is the standard "create-once" guard — + // a replay finds the row present and the condition fails. ``ttl`` is a + // DynamoDB reserved keyword → must be aliased via ExpressionAttributeNames. + UpdateExpression: 'SET acked_at = :now, #ttl = :ttl', + ConditionExpression: 'attribute_not_exists(orchestration_id)', + ExpressionAttributeNames: { '#ttl': 'ttl' }, + ExpressionAttributeValues: { ':now': now, ':ttl': ttlEpochSeconds }, + })); + return true; + } catch (err) { + if ((err as { name?: string })?.name === 'ConditionalCheckFailedException') return false; + throw err; + } +} + +/** Sort-key of the parent-meta row. Exported so the reconciler can + * separate it from child rows after a Query. */ +export const ORCHESTRATION_META_SK = PARENT_META_SK; + +/** Parsed parent-meta row, including the reconciler's release context. */ +export interface OrchestrationMeta { + readonly orchestration_id: string; + readonly parent_linear_issue_id: string; + readonly linear_workspace_id: string; + readonly repo: string; + readonly child_count: number; + readonly release_context: OrchestrationReleaseContext; + /** + * Linear comment id of the live status block (#247 #3), stamped at seed. + * The reconciler edits this comment in place on each child transition and + * one last time with the final rollup. Absent if the seed-time create + * failed (best-effort) — the reconciler then falls back to a fresh + * comment for the final rollup. + */ + readonly status_comment_id?: string; +} + +/** + * Stamp the live status-block comment id on the parent-meta row (#247 #3). + * Called once at seed after the comment is created. Best-effort; a failure + * just means the reconciler can't edit-in-place and posts a fresh final + * rollup instead. Not conditional — the single seed path is the only writer. + */ +export async function setStatusCommentId( + ddb: DynamoDBDocumentClient, + tableName: string, + orchestrationId: string, + commentId: string, +): Promise<void> { + await ddb.send(new UpdateCommand({ + TableName: tableName, + Key: { orchestration_id: orchestrationId, sub_issue_id: PARENT_META_SK }, + UpdateExpression: 'SET status_comment_id = :cid', + ExpressionAttributeValues: { ':cid': commentId }, + })); +} + +/** All rows for one orchestration: the meta row + every child row. */ +export interface OrchestrationSnapshot { + readonly meta: OrchestrationMeta; + readonly children: readonly OrchestrationChildRow[]; +} + +/** + * Load every row for an orchestration (meta + children) in one Query. + * Returns null when the orchestration id has no rows (e.g. TTL-reaped). + * The reconciler calls this after resolving a terminal child's + * orchestration via the ChildTaskIndex GSI. + */ +export async function loadOrchestration( + ddb: DynamoDBDocumentClient, + tableName: string, + orchestrationId: string, +): Promise<OrchestrationSnapshot | null> { + const res = await ddb.send(new QueryCommand({ + TableName: tableName, + KeyConditionExpression: 'orchestration_id = :oid', + ExpressionAttributeValues: { ':oid': orchestrationId }, + })); + const items = (res.Items ?? []) as Array<Record<string, unknown>>; + if (items.length === 0) return null; + + const metaItem = items.find((i) => i.sub_issue_id === PARENT_META_SK); + if (!metaItem) { + logger.warn('Orchestration rows present but meta row missing', { orchestration_id: orchestrationId }); + return null; + } + + const children = items + // Exclude the meta row AND non-child marker rows (e.g. ``ack#<commentId>`` + // dedup markers, #247 UX.20) — only real sub-issue rows are children. + // A real child SK is a Linear issue UUID or the ``…__integration`` synthetic + // id; markers use a ``<kind>#`` prefix that no real SK has. + .filter((i) => i.sub_issue_id !== PARENT_META_SK && !String(i.sub_issue_id).includes('#')) + .map((i) => i as unknown as OrchestrationChildRow); + + const meta: OrchestrationMeta = { + orchestration_id: orchestrationId, + parent_linear_issue_id: metaItem.parent_linear_issue_id as string, + linear_workspace_id: metaItem.linear_workspace_id as string, + repo: metaItem.repo as string, + child_count: (metaItem.child_count as number) ?? children.length, + release_context: { + platform_user_id: metaItem.platform_user_id as string, + ...(metaItem.channel_source !== undefined && { + channel_source: metaItem.channel_source as string, + }), + ...(metaItem.linear_oauth_secret_arn !== undefined && { + linear_oauth_secret_arn: metaItem.linear_oauth_secret_arn as string, + }), + ...(metaItem.linear_workspace_slug !== undefined && { + linear_workspace_slug: metaItem.linear_workspace_slug as string, + }), + ...(metaItem.linear_project_id !== undefined && { + linear_project_id: metaItem.linear_project_id as string, + }), + }, + ...(metaItem.status_comment_id !== undefined && { + status_comment_id: metaItem.status_comment_id as string, + }), + }; + + return { meta, children }; +} + +/** + * Resolve a released child by its head branch, via the ChildBranchIndex GSI. + * Maps a branch name back to the child row (which carries + * ``orchestration_id`` + ``sub_issue_id``). + * + * RETAINED, currently unused. This backed the original A6 GitHub + * ``pull_request`` restack trigger, which the #247 A6 redesign replaced with + * a Linear-comment trigger + reconciler-driven cascade (the cascade resolves + * the changed node by sub_issue_id, not by branch). The helper + its GSI are + * deliberately kept rather than removed: dropping a GSL is a + * CFN-update-unfriendly stack change for zero functional gain, and a + * branch→child lookup is a plausible future need (e.g. a branch-delete + * cleanup path). If it stays unused long-term, remove the helper and the GSI + * together in a dedicated migration. + * + * Returns the child row, or null if no released child owns that branch. The + * GSI is sparse — only released children carry ``child_branch_name`` — so a + * miss is the common, cheap case. ``indexName`` is injected (the CDK construct + * owns the literal) to keep this module free of a CDK dependency. + */ +export async function findOrchestrationChildByBranch( + ddb: DynamoDBDocumentClient, + tableName: string, + indexName: string, + branchName: string, +): Promise<OrchestrationChildRow | null> { + const res = await ddb.send(new QueryCommand({ + TableName: tableName, + IndexName: indexName, + KeyConditionExpression: 'child_branch_name = :b', + ExpressionAttributeValues: { ':b': branchName }, + Limit: 1, + })); + const item = res.Items?.[0] as OrchestrationChildRow | undefined; + return item ?? null; +} diff --git a/cdk/src/handlers/shared/orchestrator.ts b/cdk/src/handlers/shared/orchestrator.ts index 27a8a0a6..632eed3f 100644 --- a/cdk/src/handlers/shared/orchestrator.ts +++ b/cdk/src/handlers/shared/orchestrator.ts @@ -248,6 +248,8 @@ export async function loadBlueprintConfig(task: TaskRecord): Promise<BlueprintCo system_prompt_overrides: repoConfig?.system_prompt_overrides, github_token_secret_arn: repoConfig?.github_token_secret_arn ?? process.env.GITHUB_TOKEN_SECRET_ARN, poll_interval_ms: pollIntervalMs, + build_command: repoConfig?.build_command, + lint_command: repoConfig?.lint_command, cedar_policies: repoConfig?.cedar_policies, approval_gate_cap: repoConfig?.approval_gate_cap, }; @@ -513,6 +515,15 @@ export async function hydrateAndTransition(task: TaskRecord, blueprintConfig?: B resolved_workflow: task.resolved_workflow ?? { id: 'coding/new-task-v1', version: '1.0.0' }, ...(task.pr_number !== undefined && { pr_number: task.pr_number }), ...(hydratedContext.resolved_base_branch && { base_branch: hydratedContext.resolved_base_branch }), + // #247 A4: orchestration children carry their stacked base branch + + // (diamond case) predecessor branches to merge in, via channel_metadata. + // The PR-task ``resolved_base_branch`` path above wins if both are set + // (a task is never both a PR-iteration and an orchestration child). + ...(!hydratedContext.resolved_base_branch + && task.channel_metadata?.orchestration_base_branch + && { base_branch: task.channel_metadata.orchestration_base_branch }), + ...(task.channel_metadata?.orchestration_merge_branches + && { merge_branches: parseMergeBranches(task.channel_metadata.orchestration_merge_branches) }), ...(task.task_description && { prompt: task.task_description }), max_turns: task.max_turns ?? blueprintConfig?.max_turns ?? DEFAULT_MAX_TURNS, ...(effectiveBudget !== undefined && { max_budget_usd: effectiveBudget }), @@ -522,6 +533,11 @@ export async function hydrateAndTransition(task: TaskRecord, blueprintConfig?: B ...(task.trace === true && { trace: true }), ...(blueprintConfig?.model_id && { model_id: blueprintConfig.model_id }), ...(blueprintConfig?.system_prompt_overrides && { system_prompt_overrides: blueprintConfig.system_prompt_overrides }), + // #1: per-repo build/lint verification commands. Absent → agent defaults + // to ``mise run build`` / ``mise run lint``. Set for non-mise repos so + // build-regression gating actually runs the repo's real command. + ...(blueprintConfig?.build_command && { build_command: blueprintConfig.build_command }), + ...(blueprintConfig?.lint_command && { lint_command: blueprintConfig.lint_command }), ...(blueprintConfig?.cedar_policies && blueprintConfig.cedar_policies.length > 0 && { cedar_policies: blueprintConfig.cedar_policies }), // Cedar HITL: the agent's PreToolUse hook uses this to compute // the maxLifetime ceiling on per-gate approval timeouts (§6.5). @@ -893,3 +909,25 @@ async function decrementConcurrency(userId: string): Promise<void> { } } } + +/** + * Parse the JSON-encoded predecessor merge-branch list that the + * orchestration release path stashes in + * ``channel_metadata.orchestration_merge_branches`` (#247 A4, diamond + * case). Best-effort: a malformed value yields an empty list rather than + * failing the orchestration — the child still branches off its base, it + * just won't have the predecessor code merged in (surfaced as a normal + * build failure if it actually needed it, never a silent crash here). + */ +function parseMergeBranches(raw: string): string[] { + try { + const parsed = JSON.parse(raw) as unknown; + if (Array.isArray(parsed) && parsed.every((b) => typeof b === 'string')) { + return parsed as string[]; + } + } catch { + // fall through + } + logger.warn('Ignoring malformed orchestration_merge_branches', { raw }); + return []; +} diff --git a/cdk/src/handlers/shared/repo-config.ts b/cdk/src/handlers/shared/repo-config.ts index 1753d568..a8303cd9 100644 --- a/cdk/src/handlers/shared/repo-config.ts +++ b/cdk/src/handlers/shared/repo-config.ts @@ -40,6 +40,14 @@ export interface RepoConfig { readonly system_prompt_overrides?: string; readonly github_token_secret_arn?: string; readonly poll_interval_ms?: number; + /** + * Per-repo build/lint verification commands (#1 build-gate fix). The agent + * runs these to gate build/lint regressions before opening a PR; default to + * ``mise run build`` / ``mise run lint`` when unset. Set for non-mise repos + * (e.g. ``npm run build``) so build-regression gating actually works. + */ + readonly build_command?: string; + readonly lint_command?: string; readonly egress_allowlist?: string[]; readonly cedar_policies?: string[]; /** @@ -67,6 +75,9 @@ export interface BlueprintConfig { readonly system_prompt_overrides?: string; readonly github_token_secret_arn?: string; readonly poll_interval_ms?: number; + /** Per-repo build/lint verification commands (#1). Default mise when unset. The orchestrator threads these into the agent payload. */ + readonly build_command?: string; + readonly lint_command?: string; readonly egress_allowlist?: string[]; readonly cedar_policies?: string[]; /** diff --git a/cdk/src/handlers/shared/screenshot-url.ts b/cdk/src/handlers/shared/screenshot-url.ts index 50294fd0..9e19a2c7 100644 --- a/cdk/src/handlers/shared/screenshot-url.ts +++ b/cdk/src/handlers/shared/screenshot-url.ts @@ -124,3 +124,23 @@ export function buildScreenshotKey(repo: string, sha: string, deploymentId?: num export function encodeMarkdownUrl(rawUrl: string): string { return rawUrl.replaceAll('(', '%28').replaceAll(')', '%29'); } + +/** + * Pull the ABCA ``taskId`` out of a deploy PR's head branch (#247 — parent + * panel combined screenshot). ABCA names every task branch + * ``bgagent/{taskId}/{slug}`` (see ``generateBranchName``), so the task id is + * always the SECOND path segment. Returns null for any branch that doesn't + * match the ABCA shape (a human-created branch, a fork default, etc.) so the + * screenshot pipeline simply skips persistence for non-ABCA deploys. + */ +// ``bgagent`` / ``{taskId}`` / ``{slug…}`` — the ABCA branch shape needs at +// least these three segments before a task id can be extracted. +const MIN_ABCA_BRANCH_SEGMENTS = 3; + +export function extractTaskIdFromBranch(branchName: string | null | undefined): string | null { + if (!branchName) return null; + const parts = branchName.split('/'); + if (parts.length < MIN_ABCA_BRANCH_SEGMENTS || parts[0] !== 'bgagent') return null; + const taskId = parts[1]; + return taskId && taskId.length > 0 ? taskId : null; +} diff --git a/cdk/src/handlers/shared/types.ts b/cdk/src/handlers/shared/types.ts index 2584c722..fead79a6 100644 --- a/cdk/src/handlers/shared/types.ts +++ b/cdk/src/handlers/shared/types.ts @@ -95,10 +95,38 @@ export interface TaskRecord { readonly agent_heartbeat_at?: string; readonly execution_id?: string; readonly pr_url?: string; + /** + * Public CloudFront URL of the deploy-preview screenshot captured for this + * task's PR (#247). Persisted best-effort by the screenshot pipeline + * (github-webhook-processor) keyed off the taskId in the deploy branch, so + * the orchestration reconciler can embed the INTEGRATION node's combined + * preview in the parent epic panel. Absent until a preview deploys (and for + * tasks with no UI to screenshot). + */ + readonly screenshot_url?: string; + /** + * Live deploy-preview URL the {@link screenshot_url} image was captured from + * (e.g. the Vercel/Netlify preview deploy). Persisted alongside + * ``screenshot_url`` so the orchestration reconciler can make the INTEGRATION + * node's combined preview in the parent epic panel a clickable deep-link to + * the running combined site, not just a static image (#247 UX.17). Absent + * when no preview deployed. + */ + readonly screenshot_preview_url?: string; readonly error_message?: string; readonly idempotency_key?: string; readonly channel_source: ChannelSource; readonly channel_metadata?: Record<string, string>; + /** + * Linear issue UUID, hoisted to the top level from + * ``channel_metadata.linear_issue_id`` at task-create time (#247 UX.3). + * Top-level because a DynamoDB GSI (``LinearIssueIndex``) cannot key off a + * nested map field — the standalone ``@bgagent`` comment trigger queries + * this index to resolve a plain issue back to its newest ABCA task + PR. + * Present only for Linear-origin tasks; absent for GitHub/Slack/API tasks + * (which keeps the GSI sparse). + */ + readonly linear_issue_id?: string; readonly status_created_at: string; readonly created_at: string; readonly updated_at: string; @@ -206,6 +234,33 @@ export interface TaskRecord { * atomically on resume (§10.2, §9). */ readonly awaiting_approval_request_id?: string; + /** + * Linear parent/sub-issue orchestration (issue #247, Mode A). + * ``orchestration_id`` PK of the row in ``OrchestrationTable`` whose + * DAG this task is a child of. Absent on ordinary (non-orchestrated) + * tasks. PR A1 introduces the field; graph discovery (A2) and the + * reconciler (A3) populate and read it. Until then it is always + * ``undefined`` at runtime. + */ + readonly orchestration_id?: string; + /** + * Linear orchestration (#247): the ``task_id`` of the parent task + * for attribution and rollup, when a parent task exists. Absent on + * non-orchestrated tasks and on root children whose parent is the + * Linear issue rather than an ABCA task. Introduced in PR A1; + * unused at runtime until A2/A3. + */ + readonly parent_task_id?: string; + /** + * Linear orchestration (#247): sibling ``sub_issue_id``s this child + * is blocked by — the predecessors that must reach terminal-success + * (``COMPLETED`` with ``build_passed !== false``) before the + * reconciler releases this child. Empty/absent for root children. + * Authoritative gating state lives on the ``OrchestrationTable`` row; + * this is the denormalized copy threaded onto the task record. + * Introduced in PR A1; unused at runtime until A3. + */ + readonly depends_on?: readonly string[]; } /** Per-channel override for one notification channel. See diff --git a/cdk/src/handlers/shared/workflows.ts b/cdk/src/handlers/shared/workflows.ts index ca665531..b867d404 100644 --- a/cdk/src/handlers/shared/workflows.ts +++ b/cdk/src/handlers/shared/workflows.ts @@ -122,6 +122,16 @@ const DESCRIPTORS: Record<string, WorkflowDescriptor> = { readOnly: true, requiredInputs: { allOf: ['pr_number'] }, }, + // A6 re-stack (#305): re-merge a changed predecessor into an existing + // stacked-child PR. Writeable, repo-bound, operates on an existing PR + // (pr_number). Platform-issued (the restack processor), not user-facing. + 'coding/restack-v1': { + id: 'coding/restack-v1', + version: '1.0.0', + requiresRepo: true, + readOnly: false, + requiredInputs: { allOf: ['pr_number'] }, + }, 'default/agent-v1': { id: 'default/agent-v1', version: '1.0.0', @@ -180,7 +190,21 @@ export type WorkflowResolutionError = 'unknown_id' | 'unsatisfiable_version'; * resolution ladder (WORKFLOWS.md §"Replacing task types"): * 1. explicit ``workflow_ref`` (id + optional ``@constraint``); * 2. (Blueprint default — Phase 4, not yet wired); - * 3. the platform default ``default/agent-v1``. + * 3a. no explicit ref BUT a repo is present ⇒ the coding default + * ``coding/new-task-v1`` (a repo-bound task is a coding task); + * 3b. otherwise the repo-less platform default ``default/agent-v1``. + * + * Rung 3a restores the pre-#296 behaviour: before workflow-driven tasks, a + * task with a repo and no explicit type ran as ``new_task`` → + * ``coding/new-task-v1`` (edit locally, commit, push, platform opens the PR + * via ``ensure_pr``). #296 introduced the resolution ladder but left the + * repo-aware rung unwired, so every repo task fell through to + * ``default/agent-v1`` — the freeform repo-less agent prompt with no git/PR + * discipline — which broke PR-url reporting, screenshot→issue routing, and + * #247 stacking (the agent improvised ``gh api``/``gh pr create`` against an + * empty local clone). ``hasRepo`` re-wires that rung minimally until the + * Blueprint router (Phase 4) lands. Callers WITHOUT an explicit ref pass + * whether the request carries a repo. * * Returns ``null`` when an explicit ref cannot be resolved — either the id is * unknown OR an ``@constraint`` pins a version the platform does not ship. The @@ -188,9 +212,10 @@ export type WorkflowResolutionError = 'unknown_id' | 'unsatisfiable_version'; * like ``coding/new-task-v1@2.0.0`` fails admission rather than quietly running * ``1.0.0``. Use {@link resolveWorkflowRefError} for which case, to craft the 400. */ -export function resolveWorkflowRef(ref?: string | null): ResolvedWorkflow | null { +export function resolveWorkflowRef(ref?: string | null, hasRepo = false): ResolvedWorkflow | null { if (ref === undefined || ref === null || ref === '') { - const fallback = DESCRIPTORS[DEFAULT_WORKFLOW_ID]; + const fallbackId = hasRepo ? 'coding/new-task-v1' : DEFAULT_WORKFLOW_ID; + const fallback = DESCRIPTORS[fallbackId]; return { id: fallback.id, version: fallback.version }; } const { id, constraint } = parseWorkflowRef(ref); diff --git a/cdk/src/stacks/agent.ts b/cdk/src/stacks/agent.ts index 4ed6aac0..7c4d8412 100644 --- a/cdk/src/stacks/agent.ts +++ b/cdk/src/stacks/agent.ts @@ -44,9 +44,12 @@ import { FanOutConsumer } from '../constructs/fanout-consumer'; import { GitHubScreenshotIntegration } from '../constructs/github-screenshot-integration'; import { JiraIntegration } from '../constructs/jira-integration'; import { LinearIntegration } from '../constructs/linear-integration'; +import { OrchestrationReconciler } from '../constructs/orchestration-reconciler'; +import { OrchestrationTable } from '../constructs/orchestration-table'; import { PendingUploadCleanup } from '../constructs/pending-upload-cleanup'; import { RepoTable } from '../constructs/repo-table'; import { SlackIntegration } from '../constructs/slack-integration'; +import { StrandedOrchestrationReconciler } from '../constructs/stranded-orchestration-reconciler'; import { StrandedTaskReconciler } from '../constructs/stranded-task-reconciler'; import { TaskApi } from '../constructs/task-api'; import { TaskApprovalsTable } from '../constructs/task-approvals-table'; @@ -88,6 +91,8 @@ export class AgentStack extends Stack { const taskTable = new TaskTable(this, 'TaskTable'); const taskEventsTable = new TaskEventsTable(this, 'TaskEventsTable'); const taskNudgesTable = new TaskNudgesTable(this, 'TaskNudgesTable'); + // #247 Mode A: parent/sub-issue orchestration DAG state. + const orchestrationTable = new OrchestrationTable(this, 'OrchestrationTable'); // Cedar HITL approval-gate state (design §10.1). Agent writes PENDING // rows + GSI query powers `bgagent pending`; Chunk 5 wires the // Approve/Deny Lambdas + fan-out consumer. @@ -389,6 +394,31 @@ export class AgentStack extends Stack { runtimeArnHolder = runtime.agentRuntimeArn; + // --- AgentCore log-delivery: OPT-IN migration shim for ONE pre-existing + // stack whose logical IDs churned under an agentcore-alpha bump --- + // + // Background: the agentcore-alpha Runtime auto-creates AWS::Logs:: + // DeliverySource + Delivery + DeliveryDestination per loggingConfig. An + // alpha construct-path rename CHURNED both the CFN logical IDs and the + // account-scoped DeliverySource/DeliveryDestination ``Name`` of an + // ALREADY-DEPLOYED stack. Because those Names are account-unique, CFN's + // create-before-delete on the new ids collides with the live ones → + // ``AlreadyExists`` → whole-stack rollback. The fix is to re-pin the + // churned resources to the values CFN already has so it updates them in + // place instead of recreating. + // + // CRITICAL: this is needed ONLY by a stack that was deployed BEFORE the + // alpha bump. A fresh stack (a new env, CI, this PR on a clean account) + // has NO pre-existing resources to collide with and MUST synth the + // current alpha's natural ids — so the shim is OFF by default and is + // enabled per-stack via context: + // cdk deploy -c pinnedLogDeliveryStack=<stackName> + // (or the `pinnedLogDelivery` map in cdk.json). When the running stack + // doesn't match, NONE of the overrides apply and synth is pristine. + // Once the affected stack has been migrated + a clean redeploy confirmed, + // this shim and its context entry can be deleted outright. + maybePinChurnedLogResources(this, runtime); + // --- Session storage (preview) --- // The L2 construct does not yet expose filesystemConfigurations; use the // CFN escape hatch. /mnt/workspace mount backs the persistent cache @@ -611,10 +641,16 @@ export class AgentStack extends Stack { // }); // --- Task Orchestrator (durable Lambda function) --- + // Per-user concurrency cap, shared by the orchestrator (admission control) + // and the orchestration reconcilers (#331 release throttle), so the two + // never drift — the reconciler must throttle to the SAME ceiling admission + // enforces. + const maxConcurrentTasksPerUser = 10; const orchestrator = new TaskOrchestrator(this, 'TaskOrchestrator', { taskTable: taskTable.table, taskEventsTable: taskEventsTable.table, userConcurrencyTable: userConcurrencyTable.table, + maxConcurrentTasksPerUser, repoTable: repoTable.table, runtimeArn: runtime.agentRuntimeArn, githubTokenSecretArn: githubTokenSecret.secretArn, @@ -758,11 +794,145 @@ export class AgentStack extends Stack { taskTable: taskTable.table, taskEventsTable: taskEventsTable.table, repoTable: repoTable.table, + // #247 Mode A: enables the webhook processor's orchestration path + // (seed DAG + release roots). Sets ORCHESTRATION_TABLE_NAME. + orchestrationTable: orchestrationTable.table, orchestratorFunctionArn: orchestrator.alias.functionArn, guardrailId: inputGuardrail.guardrailId, guardrailVersion: inputGuardrail.guardrailVersion, + // #331: throttle the seed-time root release to the free concurrency + // budget so a wide-root epic doesn't over-release roots admission then + // hard-fails (an unrecoverable failure — a root has no predecessor for + // the sweep to re-release from). + userConcurrencyTable: userConcurrencyTable.table, + maxConcurrentTasksPerUser, + // Image attachments extracted from issue descriptions upload here + // (otherwise createTaskCore 503s "Attachment storage is not configured"). + attachmentsBucket: attachmentsBucket.bucket, }); + // #247 Mode A: the reconciler consumes the TaskTable stream and + // releases dependency-unblocked children as predecessors reach + // terminal-success. It invokes createTaskCore in-process, so it needs + // the same task-creation env + invoke permission as the webhook + // processor. + const orchestrationReconciler = new OrchestrationReconciler(this, 'OrchestrationReconciler', { + taskTable: taskTable.table, + orchestrationTable: orchestrationTable.table, + taskEventsTable: taskEventsTable.table, + orchestratorFunctionArn: orchestrator.alias.functionArn, + }); + // createTaskCore (run inside the reconciler) screens descriptions with + // the input guardrail, reads repo onboarding/blueprint config, and + // async-invokes the orchestrator. Mirror the webhook processor's grants. + repoTable.table.grantReadData(orchestrationReconciler.fn); + orchestrationReconciler.fn.addEnvironment('REPO_TABLE_NAME', repoTable.table.tableName); + orchestrationReconciler.fn.addEnvironment('GUARDRAIL_ID', inputGuardrail.guardrailId); + orchestrationReconciler.fn.addEnvironment('GUARDRAIL_VERSION', inputGuardrail.guardrailVersion); + orchestrationReconciler.fn.addEnvironment( + 'ORCHESTRATOR_FUNCTION_ARN', + orchestrator.alias.functionArn, + ); + // A5: the reconciler posts the parent rollup comment on completion — + // needs the workspace registry to resolve the per-workspace OAuth token. + linearIntegration.workspaceRegistryTable.grantReadData(orchestrationReconciler.fn); + orchestrationReconciler.fn.addEnvironment( + 'LINEAR_WORKSPACE_REGISTRY_TABLE_NAME', + linearIntegration.workspaceRegistryTable.tableName, + ); + // #331: read the user concurrency counter so a wide fan-out releases only + // up to the free budget (the cap throttles, not guillotines, children). + userConcurrencyTable.table.grantReadData(orchestrationReconciler.fn); + orchestrationReconciler.fn.addEnvironment( + 'USER_CONCURRENCY_TABLE_NAME', + userConcurrencyTable.table.tableName, + ); + orchestrationReconciler.fn.addEnvironment( + 'MAX_CONCURRENT_TASKS_PER_USER', + String(maxConcurrentTasksPerUser), + ); + orchestrationReconciler.fn.addToRolePolicy(new iam.PolicyStatement({ + actions: ['lambda:InvokeFunction'], + resources: [orchestrator.alias.functionArn], + })); + orchestrationReconciler.fn.addToRolePolicy(new iam.PolicyStatement({ + actions: ['bedrock:ApplyGuardrail'], + resources: [ + Stack.of(this).formatArn({ + service: 'bedrock', + resource: 'guardrail', + resourceName: inputGuardrail.guardrailId, + }), + ], + })); + // Released child tasks attributed to linear workspaces need the + // per-workspace OAuth secret prefix readable (createTaskCore stashes + // the ARN; agent reads it). Same prefix grant as the webhook processor. + orchestrationReconciler.fn.addToRolePolicy(new iam.PolicyStatement({ + actions: ['secretsmanager:GetSecretValue'], + resources: [ + Stack.of(this).formatArn({ + service: 'secretsmanager', + resource: 'secret', + arnFormat: ArnFormat.COLON_RESOURCE_NAME, + resourceName: 'bgagent-linear-oauth-*', + }), + ], + })); + + // #303: scheduled backstop that recovers orchestrations whose terminal + // events were lost while the live reconciler was unavailable. Runs the + // same createTaskCore release path, so it needs the identical grants + // (repo config, guardrail, orchestrator invoke, linear-oauth secret). + const strandedOrchestrationReconciler = new StrandedOrchestrationReconciler( + this, 'StrandedOrchestrationReconciler', { + orchestrationTable: orchestrationTable.table, + taskTable: taskTable.table, + taskEventsTable: taskEventsTable.table, + orchestratorFunctionArn: orchestrator.alias.functionArn, + }, + ); + repoTable.table.grantReadData(strandedOrchestrationReconciler.fn); + strandedOrchestrationReconciler.fn.addEnvironment('REPO_TABLE_NAME', repoTable.table.tableName); + strandedOrchestrationReconciler.fn.addEnvironment('GUARDRAIL_ID', inputGuardrail.guardrailId); + strandedOrchestrationReconciler.fn.addEnvironment('GUARDRAIL_VERSION', inputGuardrail.guardrailVersion); + strandedOrchestrationReconciler.fn.addToRolePolicy(new iam.PolicyStatement({ + actions: ['lambda:InvokeFunction'], + resources: [orchestrator.alias.functionArn], + })); + strandedOrchestrationReconciler.fn.addToRolePolicy(new iam.PolicyStatement({ + actions: ['bedrock:ApplyGuardrail'], + resources: [ + Stack.of(this).formatArn({ + service: 'bedrock', + resource: 'guardrail', + resourceName: inputGuardrail.guardrailId, + }), + ], + })); + strandedOrchestrationReconciler.fn.addToRolePolicy(new iam.PolicyStatement({ + actions: ['secretsmanager:GetSecretValue'], + resources: [ + Stack.of(this).formatArn({ + service: 'secretsmanager', + resource: 'secret', + arnFormat: ArnFormat.COLON_RESOURCE_NAME, + resourceName: 'bgagent-linear-oauth-*', + }), + ], + })); + // #331: the sweep is the drain path for throttle-deferred children, so it + // throttles to the same free budget the live reconciler does. + userConcurrencyTable.table.grantReadData(strandedOrchestrationReconciler.fn); + strandedOrchestrationReconciler.fn.addEnvironment( + 'USER_CONCURRENCY_TABLE_NAME', + userConcurrencyTable.table.tableName, + ); + strandedOrchestrationReconciler.fn.addEnvironment( + 'MAX_CONCURRENT_TASKS_PER_USER', + String(maxConcurrentTasksPerUser), + ); + // Phase 2.0b-O2: agent runtime reads the per-workspace Linear OAuth // token directly from Secrets Manager. The CLI (`bgagent linear setup`) // creates `bgagent-linear-oauth-<slug>` secrets at install time; @@ -969,8 +1139,20 @@ export class AgentStack extends Stack { // workspace registry so token resolution reuses the per-workspace // OAuth secrets created by `bgagent linear setup`. linearWorkspaceRegistryTable: linearIntegration.workspaceRegistryTable, + // #247 (task #57): persist screenshot_url on the deploy task so the + // orchestration reconciler can embed the integration node's combined + // preview in the parent epic panel. + taskTable: taskTable.table, }); + // #247 A6 re-stack is NOT a GitHub-webhook path. It runs inside the + // orchestration reconciler (off the TaskTable stream): when a Linear + // @bgagent comment re-iterates a sub-issue's PR (coding/pr-iteration-v1) + // and that task completes, the reconciler cascades coding/restack-v1 + // tasks to the changed node's dependents. No inbound pull_request webhook + // (those are WAF-blocked by the API's managed rule set anyway), so there + // is no RestackProcessor Lambda to wire here. + new CfnOutput(this, 'GitHubWebhookUrl', { value: `${taskApi.api.url}github/webhook`, description: 'URL to configure as the GitHub webhook target on demo repos (deployment_status events)', @@ -1089,3 +1271,90 @@ export class AgentStack extends Stack { }); } } + +/** + * A churned log-delivery resource to re-pin: the construct child id under the + * Runtime, the logical id CFN already has deployed, and (for the account-unique + * Source/Destination kinds) the deployed ``Name``. ``liveName`` is omitted for + * Delivery links, which have no Name. + */ +interface PinnedLogResource { + readonly childId: string; + readonly liveLogicalId: string; + readonly liveName?: string; +} + +/** + * Per-stack pin tables for the agentcore-alpha log-delivery churn (#247 #58). + * Keyed by ``stackName``. ONLY the listed stack is migrated; every other stack + * (fresh deploys, CI, new envs) is absent here → synth is pristine. A stack can + * also be supplied at deploy time via context (see {@link maybePinChurnedLogResources}). + * + * ``backgroundagent-dev`` was deployed before an alpha bump churned its + * DeliverySource/Destination/Delivery logical ids + account-unique Names; these + * values come from `aws cloudformation list-stack-resources` on that live stack. + * Delete this entry once that stack is migrated + a clean redeploy is confirmed. + */ +const PINNED_LOG_DELIVERY_BY_STACK: Record<string, readonly PinnedLogResource[]> = { + 'backgroundagent-dev': [ + { + childId: 'ApplicationLogsDeliverySource', + liveLogicalId: 'RuntimeCDKSourceAPPLICATIONLOGSbackgroundagentdevRuntimeBC0AE9ED96A02E02', + liveName: 'cdk-applicationlogs-source-backgroundagentdevRuntimeBC0AE9ED', + }, + { + childId: 'UsageLogsDeliverySource', + liveLogicalId: 'RuntimeCDKSourceUSAGELOGSbackgroundagentdevRuntimeBC0AE9ED544FBB22', + liveName: 'cdk-usagelogs-source-backgroundagentdevRuntimeBC0AE9ED', + }, + { + childId: 'ApplicationLogsDest', + liveLogicalId: 'RuntimeCdkLogGroupApplicationLogsDeliverybackgroundagentdevRuntimeBC0AE9EDbackgroundagentdevRuntimeApplicationLogGroup454A95E8DestapplicationlogsE09F77DC', + liveName: 'cdk-cwl-Destapplication-logs-dest-backgrounp454A95E829BF8A27', + }, + { + childId: 'UsageLogsDest', + liveLogicalId: 'RuntimeCdkLogGroupUsageLogsDeliverybackgroundagentdevRuntimeBC0AE9EDbackgroundagentdevRuntimeUsageLogGroup7FA1FA67Destusagelogs9AB608D0', + liveName: 'cdk-cwl-Destusage-logs-dest-backgroundagroup7FA1FA67A8A16CEE', + }, + // Delivery links: logical-id pin only (no Name — unique per source/dest pair). + { + childId: 'ApplicationLogsDelivery', + liveLogicalId: 'RuntimeCdkLogGroupApplicationLogsDeliverybackgroundagentdevRuntimeBC0AE9EDbackgroundagentdevRuntimeApplicationLogGroup454A95E8Delivery92FE492C', + }, + { + childId: 'UsageLogsDelivery', + liveLogicalId: 'RuntimeCdkLogGroupUsageLogsDeliverybackgroundagentdevRuntimeBC0AE9EDbackgroundagentdevRuntimeUsageLogGroup7FA1FA67Delivery40F023D7', + }, + ], +}; + +/** + * OPT-IN migration shim (#247 #58): re-pin the agentcore-alpha-churned + * log-delivery resources of ONE already-deployed stack to the logical ids + + * Names CFN already has, so a stack deployed before an alpha bump updates them + * in place instead of hitting ``AWS::Logs::DeliverySource AlreadyExists`` on + * create-before-delete. NO-OP unless the running ``stackName`` is listed in + * {@link PINNED_LOG_DELIVERY_BY_STACK} OR named via context + * (`-c pinnedLogDeliveryStack=<name>`, which selects which table entry applies) + * — so fresh stacks, CI, and other accounts synth the current alpha's natural + * ids untouched. Once the affected stack is migrated, delete this helper + its + * table entry. + */ +function maybePinChurnedLogResources(stack: Stack, runtime: agentcore.Runtime): void { + // A deploy can override WHICH stack name to treat as the pinned one (e.g. a + // renamed env that inherited the churned resources); defaults to the running + // stack's own name, so the table is matched by stackName out of the box. + const targetStackName = (stack.node.tryGetContext('pinnedLogDeliveryStack') as string | undefined) + ?? stack.stackName; + if (targetStackName !== stack.stackName) return; // context names a different stack → don't touch this one + const pins = PINNED_LOG_DELIVERY_BY_STACK[stack.stackName]; + if (!pins) return; // not a pre-existing churned stack → pristine synth + + for (const pin of pins) { + const res = runtime.node.tryFindChild(pin.childId) as CfnResource | undefined; + if (!res) continue; // a future alpha rename → silently skip (re-derive then) + res.overrideLogicalId(pin.liveLogicalId); + if (pin.liveName !== undefined) res.addPropertyOverride('Name', pin.liveName); + } +} diff --git a/cdk/test/constructs/linear-integration.test.ts b/cdk/test/constructs/linear-integration.test.ts index 450d1a25..aa9206ab 100644 --- a/cdk/test/constructs/linear-integration.test.ts +++ b/cdk/test/constructs/linear-integration.test.ts @@ -22,6 +22,7 @@ import { Template, Match } from 'aws-cdk-lib/assertions'; import * as apigw from 'aws-cdk-lib/aws-apigateway'; import * as cognito from 'aws-cdk-lib/aws-cognito'; import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; +import * as s3 from 'aws-cdk-lib/aws-s3'; import { LinearIntegration } from '../../src/constructs/linear-integration'; describe('LinearIntegration construct', () => { @@ -120,3 +121,127 @@ describe('LinearIntegration construct', () => { }); }); }); + +describe('LinearIntegration construct — #331 seed-time root release throttle', () => { + // When orchestrationTable + userConcurrencyTable are both provided, the + // webhook processor env carries the concurrency table + cap so it throttles + // the seed-time ROOT release (a failed root is unrecoverable by the sweep). + function buildWith(opts: { withConcurrency: boolean }): Template { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + const api = new apigw.RestApi(stack, 'TestApi'); + const userPool = new cognito.UserPool(stack, 'TestUserPool'); + const taskTable = new dynamodb.Table(stack, 'TaskTable', { + partitionKey: { name: 'task_id', type: dynamodb.AttributeType.STRING }, + }); + const taskEventsTable = new dynamodb.Table(stack, 'TaskEventsTable', { + partitionKey: { name: 'task_id', type: dynamodb.AttributeType.STRING }, + sortKey: { name: 'event_id', type: dynamodb.AttributeType.STRING }, + }); + const orchestrationTable = new dynamodb.Table(stack, 'OrchTable', { + partitionKey: { name: 'orchestration_id', type: dynamodb.AttributeType.STRING }, + sortKey: { name: 'sub_issue_id', type: dynamodb.AttributeType.STRING }, + }); + const userConcurrencyTable = opts.withConcurrency + ? new dynamodb.Table(stack, 'ConcTable', { + partitionKey: { name: 'user_id', type: dynamodb.AttributeType.STRING }, + }) + : undefined; + new LinearIntegration(stack, 'LinearIntegration', { + api, + userPool, + taskTable, + taskEventsTable, + orchestrationTable, + ...(userConcurrencyTable && { userConcurrencyTable, maxConcurrentTasksPerUser: 7 }), + }); + return Template.fromStack(stack); + } + + test('wires USER_CONCURRENCY_TABLE_NAME + cap when the concurrency table is provided', () => { + const t = buildWith({ withConcurrency: true }); + t.hasResourceProperties('AWS::Lambda::Function', { + Environment: { + Variables: Match.objectLike({ + ORCHESTRATION_TABLE_NAME: Match.anyValue(), + USER_CONCURRENCY_TABLE_NAME: Match.anyValue(), + MAX_CONCURRENT_TASKS_PER_USER: '7', + }), + }, + }); + }); + + test('does NOT set USER_CONCURRENCY_TABLE_NAME when the table is omitted (back-compat)', () => { + const t = buildWith({ withConcurrency: false }); + // The processor still has ORCHESTRATION_TABLE_NAME but no concurrency var. + const fns = t.findResources('AWS::Lambda::Function', { + Properties: { + Environment: { Variables: Match.objectLike({ USER_CONCURRENCY_TABLE_NAME: Match.anyValue() }) }, + }, + }); + expect(Object.keys(fns)).toHaveLength(0); + }); +}); + +describe('LinearIntegration construct — attachmentsBucket wiring', () => { + // Regression-guard: webhook processor needs ATTACHMENTS_BUCKET_NAME and S3 + // Put/Delete on the bucket so `extractImageUrlAttachments` can reach the + // bucket via createTaskCore. Without this, Linear-triggered tasks with + // markdown image attachments fail with 503 ("Attachment storage is not + // configured.") — the symptom that bit `linear-vercel` 2026-05-27. + let template: Template; + + beforeAll(() => { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + + const api = new apigw.RestApi(stack, 'TestApi'); + const userPool = new cognito.UserPool(stack, 'TestUserPool'); + const taskTable = new dynamodb.Table(stack, 'TaskTable', { + partitionKey: { name: 'task_id', type: dynamodb.AttributeType.STRING }, + }); + const taskEventsTable = new dynamodb.Table(stack, 'TaskEventsTable', { + partitionKey: { name: 'task_id', type: dynamodb.AttributeType.STRING }, + sortKey: { name: 'event_id', type: dynamodb.AttributeType.STRING }, + }); + const attachmentsBucket = new s3.Bucket(stack, 'AttachmentsBucket'); + + new LinearIntegration(stack, 'LinearIntegration', { + api, + userPool, + taskTable, + taskEventsTable, + attachmentsBucket, + }); + + template = Template.fromStack(stack); + }); + + test('processor env includes ATTACHMENTS_BUCKET_NAME when bucket provided', () => { + template.hasResourceProperties('AWS::Lambda::Function', { + Environment: { + Variables: Match.objectLike({ + ATTACHMENTS_BUCKET_NAME: Match.anyValue(), + LINEAR_PROJECT_MAPPING_TABLE_NAME: Match.anyValue(), + }), + }, + }); + }); + + test('processor role can PutObject and DeleteObject on the attachments bucket', () => { + template.hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: Match.arrayWith([ + Match.objectLike({ + Action: Match.arrayWith(['s3:PutObject']), + Effect: 'Allow', + }), + Match.objectLike({ + Action: 's3:DeleteObject*', + Effect: 'Allow', + }), + ]), + }, + }); + }); +}); diff --git a/cdk/test/constructs/orchestration-reconciler.test.ts b/cdk/test/constructs/orchestration-reconciler.test.ts new file mode 100644 index 00000000..7099f8d4 --- /dev/null +++ b/cdk/test/constructs/orchestration-reconciler.test.ts @@ -0,0 +1,112 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { App, Stack } from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import * as dynamodb from 'aws-cdk-lib/aws-dynamodb'; +import { OrchestrationReconciler } from '../../src/constructs/orchestration-reconciler'; +import { OrchestrationTable } from '../../src/constructs/orchestration-table'; +import { TaskEventsTable } from '../../src/constructs/task-events-table'; +import { TaskTable } from '../../src/constructs/task-table'; + +function synth(): Template { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + const taskTable = new TaskTable(stack, 'TaskTable'); + const orchestrationTable = new OrchestrationTable(stack, 'OrchestrationTable'); + const taskEventsTable = new TaskEventsTable(stack, 'TaskEventsTable'); + new OrchestrationReconciler(stack, 'OrchestrationReconciler', { + taskTable: taskTable.table, + orchestrationTable: orchestrationTable.table, + taskEventsTable: taskEventsTable.table, + orchestratorFunctionArn: 'arn:aws:lambda:us-east-1:123456789012:function:orch', + }); + return Template.fromStack(stack); +} + +describe('OrchestrationReconciler', () => { + let template: Template; + beforeEach(() => { + template = synth(); + }); + + test('creates the reconciler Lambda with the orchestration table env', () => { + template.hasResourceProperties('AWS::Lambda::Function', { + Environment: { + Variables: Match.objectLike({ + ORCHESTRATION_TABLE_NAME: Match.anyValue(), + TASK_TABLE_NAME: Match.anyValue(), + }), + }, + }); + }); + + test('subscribes to the TaskTable stream via an event-source mapping', () => { + template.resourceCountIs('AWS::Lambda::EventSourceMapping', 1); + template.hasResourceProperties('AWS::Lambda::EventSourceMapping', { + StartingPosition: 'LATEST', + BisectBatchOnFunctionError: true, + }); + }); + + test('provisions a DLQ for poison stream records', () => { + // At least one SQS queue (the reconciler DLQ). + const queues = template.findResources('AWS::SQS::Queue'); + expect(Object.keys(queues).length).toBeGreaterThanOrEqual(1); + }); + + test('TaskTable has a stream enabled (reconciler source)', () => { + template.hasResourceProperties('AWS::DynamoDB::Table', { + StreamSpecification: { StreamViewType: 'NEW_IMAGE' }, + }); + }); +}); + +describe('OrchestrationReconciler — grants', () => { + test('grants the function read/write on the orchestration table', () => { + const template = synth(); + // The function role should have a policy referencing dynamodb actions. + const policies = template.findResources('AWS::IAM::Policy'); + const hasDdb = Object.values(policies).some((p) => { + const statements = (p.Properties as { PolicyDocument: { Statement: Array<{ Action?: unknown }> } }) + .PolicyDocument.Statement; + return JSON.stringify(statements).includes('dynamodb:'); + }); + expect(hasDdb).toBe(true); + }); +}); + +// Minimal sanity that the props type accepts an ITable. +describe('OrchestrationReconciler — typing', () => { + test('accepts imported tables', () => { + const app = new App(); + const stack = new Stack(app, 'T2'); + const taskTable = dynamodb.Table.fromTableAttributes(stack, 'TT', { + tableName: 'tasks', + tableStreamArn: 'arn:aws:dynamodb:us-east-1:123456789012:table/tasks/stream/2026', + }); + const orch = dynamodb.Table.fromTableName(stack, 'OT', 'orch'); + const events = dynamodb.Table.fromTableName(stack, 'ET', 'events'); + expect(() => new OrchestrationReconciler(stack, 'R', { + taskTable, + orchestrationTable: orch, + taskEventsTable: events, + })).not.toThrow(); + }); +}); diff --git a/cdk/test/constructs/orchestration-table.test.ts b/cdk/test/constructs/orchestration-table.test.ts new file mode 100644 index 00000000..bb2e7c80 --- /dev/null +++ b/cdk/test/constructs/orchestration-table.test.ts @@ -0,0 +1,154 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { App, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import { OrchestrationTable } from '../../src/constructs/orchestration-table'; + +describe('OrchestrationTable', () => { + let template: Template; + + beforeEach(() => { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + new OrchestrationTable(stack, 'OrchestrationTable'); + template = Template.fromStack(stack); + }); + + test('creates a DynamoDB table with orchestration_id (PK) + sub_issue_id (SK)', () => { + template.hasResourceProperties('AWS::DynamoDB::Table', { + KeySchema: [ + { AttributeName: 'orchestration_id', KeyType: 'HASH' }, + { AttributeName: 'sub_issue_id', KeyType: 'RANGE' }, + ], + }); + }); + + test('uses PAY_PER_REQUEST billing mode', () => { + template.hasResourceProperties('AWS::DynamoDB::Table', { + BillingMode: 'PAY_PER_REQUEST', + }); + }); + + test('enables point-in-time recovery by default', () => { + template.hasResourceProperties('AWS::DynamoDB::Table', { + PointInTimeRecoverySpecification: { + PointInTimeRecoveryEnabled: true, + }, + }); + }); + + test('sets DESTROY removal policy by default', () => { + template.hasResource('AWS::DynamoDB::Table', { + DeletionPolicy: 'Delete', + UpdateReplacePolicy: 'Delete', + }); + }); + + test('enables TTL on ttl attribute', () => { + template.hasResourceProperties('AWS::DynamoDB::Table', { + TimeToLiveSpecification: { + AttributeName: 'ttl', + Enabled: true, + }, + }); + }); + + test('creates ChildTaskIndex GSI with child_task_id as PK', () => { + template.hasResourceProperties('AWS::DynamoDB::Table', { + GlobalSecondaryIndexes: Match.arrayWith([ + Match.objectLike({ + IndexName: 'ChildTaskIndex', + KeySchema: [ + { AttributeName: 'child_task_id', KeyType: 'HASH' }, + ], + Projection: { ProjectionType: 'ALL' }, + }), + ]), + }); + }); + + test('creates ChildBranchIndex GSI with child_branch_name as PK (#305 A6)', () => { + template.hasResourceProperties('AWS::DynamoDB::Table', { + GlobalSecondaryIndexes: Match.arrayWith([ + Match.objectLike({ + IndexName: 'ChildBranchIndex', + KeySchema: [ + { AttributeName: 'child_branch_name', KeyType: 'HASH' }, + ], + Projection: { ProjectionType: 'ALL' }, + }), + ]), + }); + }); + + test('declares all required attribute definitions', () => { + template.hasResourceProperties('AWS::DynamoDB::Table', { + AttributeDefinitions: Match.arrayWith([ + { AttributeName: 'orchestration_id', AttributeType: 'S' }, + { AttributeName: 'sub_issue_id', AttributeType: 'S' }, + { AttributeName: 'child_task_id', AttributeType: 'S' }, + { AttributeName: 'child_branch_name', AttributeType: 'S' }, + ]), + }); + }); + + test('static index name constants match actual GSI names', () => { + expect(OrchestrationTable.CHILD_TASK_INDEX).toBe('ChildTaskIndex'); + expect(OrchestrationTable.CHILD_BRANCH_INDEX).toBe('ChildBranchIndex'); + }); +}); + +describe('OrchestrationTable with custom props', () => { + test('accepts custom table name', () => { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + new OrchestrationTable(stack, 'OrchestrationTable', { tableName: 'my-orchestrations' }); + const template = Template.fromStack(stack); + + template.hasResourceProperties('AWS::DynamoDB::Table', { + TableName: 'my-orchestrations', + }); + }); + + test('accepts custom removal policy', () => { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + new OrchestrationTable(stack, 'OrchestrationTable', { removalPolicy: RemovalPolicy.RETAIN }); + const template = Template.fromStack(stack); + + template.hasResource('AWS::DynamoDB::Table', { + DeletionPolicy: 'Retain', + UpdateReplacePolicy: 'Retain', + }); + }); + + test('accepts point-in-time recovery disabled', () => { + const app = new App(); + const stack = new Stack(app, 'TestStack'); + new OrchestrationTable(stack, 'OrchestrationTable', { pointInTimeRecovery: false }); + const template = Template.fromStack(stack); + + template.hasResourceProperties('AWS::DynamoDB::Table', { + PointInTimeRecoverySpecification: { + PointInTimeRecoveryEnabled: false, + }, + }); + }); +}); diff --git a/cdk/test/constructs/task-table.test.ts b/cdk/test/constructs/task-table.test.ts index c9e58332..211b7530 100644 --- a/cdk/test/constructs/task-table.test.ts +++ b/cdk/test/constructs/task-table.test.ts @@ -104,6 +104,24 @@ describe('TaskTable', () => { }); }); + test('creates LinearIssueIndex GSI (PK linear_issue_id, SK created_at, INCLUDE projection)', () => { + template.hasResourceProperties('AWS::DynamoDB::Table', { + GlobalSecondaryIndexes: Match.arrayWith([ + Match.objectLike({ + IndexName: 'LinearIssueIndex', + KeySchema: [ + { AttributeName: 'linear_issue_id', KeyType: 'HASH' }, + { AttributeName: 'created_at', KeyType: 'RANGE' }, + ], + Projection: { + ProjectionType: 'INCLUDE', + NonKeyAttributes: Match.arrayWith(['pr_url', 'pr_number', 'status', 'repo', 'user_id', 'channel_metadata']), + }, + }), + ]), + }); + }); + test('declares all required attribute definitions', () => { template.hasResourceProperties('AWS::DynamoDB::Table', { AttributeDefinitions: Match.arrayWith([ @@ -113,6 +131,7 @@ describe('TaskTable', () => { { AttributeName: 'status', AttributeType: 'S' }, { AttributeName: 'created_at', AttributeType: 'S' }, { AttributeName: 'idempotency_key', AttributeType: 'S' }, + { AttributeName: 'linear_issue_id', AttributeType: 'S' }, ]), }); }); @@ -130,6 +149,7 @@ describe('TaskTable', () => { expect(TaskTable.USER_STATUS_INDEX).toBe('UserStatusIndex'); expect(TaskTable.STATUS_INDEX).toBe('StatusIndex'); expect(TaskTable.IDEMPOTENCY_INDEX).toBe('IdempotencyIndex'); + expect(TaskTable.LINEAR_ISSUE_INDEX).toBe('LinearIssueIndex'); }); }); diff --git a/cdk/test/handlers/fanout-task-events.test.ts b/cdk/test/handlers/fanout-task-events.test.ts index 98f6765c..3c4d1c7f 100644 --- a/cdk/test/handlers/fanout-task-events.test.ts +++ b/cdk/test/handlers/fanout-task-events.test.ts @@ -97,14 +97,25 @@ jest.mock('../../src/handlers/slack-notify', () => { // in `linear-feedback.ts` (#239). Mock it here so dispatcher tests // observe the call shape without exercising the real OAuth-resolver // + GraphQL path. Default ``{ ok: true }`` so a test that forgets to -// script the mock still drives the happy path. +// script the mock still drives the happy path (postIssueComment now returns +// a LinearPostResult — upstream #311/#332). const mockPostIssueComment: jest.Mock = jest.fn().mockResolvedValue({ ok: true }); +// #247 UX.3: standalone comment-triggered iterations get a threaded reply to +// the human's @bgagent comment, on top of the metrics comment. replyToComment +// returns the new reply's comment-id string (or null), NOT a LinearPostResult. +const mockReplyToComment: jest.Mock = jest.fn().mockResolvedValue('reply-id'); jest.mock('../../src/handlers/shared/linear-feedback', () => ({ postIssueComment: ( ctx: { linearWorkspaceId: string; registryTableName: string }, issueId: string, body: string, ) => mockPostIssueComment(ctx, issueId, body), + replyToComment: ( + ctx: { linearWorkspaceId: string; registryTableName: string }, + issueId: string, + parentCommentId: string, + body: string, + ) => mockReplyToComment(ctx, issueId, parentCommentId, body), })); process.env.TASK_TABLE_NAME = 'Tasks'; @@ -1341,6 +1352,7 @@ describe('fanout-task-events: Linear dispatcher (issue #239)', () => { beforeEach(() => { mockDdbSend.mockReset().mockResolvedValue({ Item: undefined }); mockPostIssueComment.mockReset().mockResolvedValue({ ok: true }); + mockReplyToComment.mockReset().mockResolvedValue('reply-id'); // Slack/GitHub mocks aren't asserted here but leaving them // un-reset would let prior-test rejections bleed in. mockDispatchSlackEvent.mockReset().mockResolvedValue(undefined); @@ -1618,6 +1630,84 @@ describe('fanout-task-events: Linear dispatcher (issue #239)', () => { // for max-turns errors is "Exceeded max turns" (see error-classifier.ts). expect(body).toContain('Exceeded max turns'); }); + + // #247 UX.3: a STANDALONE comment-triggered iteration (trigger_comment_id but + // no orchestration_iteration) gets a threaded ✅/❌ reply to the human's + // comment, on top of the metrics comment. Idempotent via the ack claim. + describe('UX.3 standalone iteration threaded reply', () => { + const STANDALONE = { + ...TASK_RECORD_LINEAR, + channel_metadata: { + linear_issue_id: 'issue-uuid-42', + linear_workspace_id: 'org-uuid-acme', + trigger_comment_id: 'human-cmt-7', + }, + pr_url: 'https://github.com/owner/repo/pull/13', + }; + + test('task_completed → ✅ threaded reply to the triggering comment, linking the PR', async () => { + mockGet(STANDALONE); + await handler({ Records: [mkEvent('task_completed', 't-lin')] }); + + expect(mockReplyToComment).toHaveBeenCalledTimes(1); + // Signature: replyToComment(ctx, issueId, parentCommentId, body). + const [, issueId, parentCommentId, body] = mockReplyToComment.mock.calls[0]; + expect(issueId).toBe('issue-uuid-42'); // the issue the comment lives on + expect(parentCommentId).toBe('human-cmt-7'); + expect(body).toMatch(/^✅ Updated — PR #13\./); + // The metrics comment is still posted too. + expect(mockPostIssueComment).toHaveBeenCalledTimes(1); + }); + + test('task_failed (agent crash) → ❌ reply with classified reason + CloudWatch task id (UX.5)', async () => { + mockGet({ ...STANDALONE, error_message: 'agent_status="error_max_turns"' }); + await handler({ Records: [mkEvent('task_failed', 't-lin')] }); + const [, , , body] = mockReplyToComment.mock.calls[0]; + expect(body).toMatch(/^❌/); + expect(body).toMatch(/Exceeded max turns/i); // classified + expect(body).toMatch(/CloudWatch for task `t-lin`/); + expect(body).toMatch(/reply with guidance/i); + }); + + test('task_completed but build_passed=false → ❌ build/test reply pointing at PR checks (UX.5)', async () => { + mockGet({ ...STANDALONE, build_passed: false, error_message: undefined }); + await handler({ Records: [mkEvent('task_completed', 't-lin')] }); + const [, , , body] = mockReplyToComment.mock.calls[0]; + expect(body).toMatch(/build\/tests didn't pass/i); + expect(body).toMatch(/PR's checks/i); + expect(body).not.toMatch(/CloudWatch/i); + }); + + test('an ORCHESTRATION iteration (orchestration_iteration=true) is NOT replied here (reconciler owns it)', async () => { + mockGet({ + ...STANDALONE, + channel_metadata: { ...STANDALONE.channel_metadata, orchestration_iteration: 'true' }, + }); + await handler({ Records: [mkEvent('task_completed', 't-lin')] }); + expect(mockReplyToComment).not.toHaveBeenCalled(); + }); + + test('a plain Linear task WITHOUT trigger_comment_id gets no threaded reply', async () => { + mockGet(TASK_RECORD_LINEAR); // no trigger_comment_id + await handler({ Records: [mkEvent('task_completed', 't-lin')] }); + expect(mockReplyToComment).not.toHaveBeenCalled(); + }); + + test('idempotent: a redelivered terminal event that loses the ack claim does not double-reply', async () => { + // Get returns the record; the ack-claim Update throws ConditionalCheckFailed. + mockDdbSend.mockReset().mockImplementation((cmd: { _type?: string; input?: { UpdateExpression?: string } }) => { + if (cmd?._type === 'Get') return Promise.resolve({ Item: STANDALONE }); + if (cmd?._type === 'Update' && cmd.input?.UpdateExpression?.includes('ack_replied_at')) { + const err = new Error('conditional'); + (err as { name?: string }).name = 'ConditionalCheckFailedException'; + return Promise.reject(err); + } + return Promise.resolve({}); + }); + await handler({ Records: [mkEvent('task_completed', 't-lin')] }); + expect(mockReplyToComment).not.toHaveBeenCalled(); + }); + }); }); // --------------------------------------------------------------------------- diff --git a/cdk/test/handlers/github-webhook-processor.test.ts b/cdk/test/handlers/github-webhook-processor.test.ts index cc554bb9..e2c286cd 100644 --- a/cdk/test/handlers/github-webhook-processor.test.ts +++ b/cdk/test/handlers/github-webhook-processor.test.ts @@ -23,6 +23,15 @@ jest.mock('@aws-sdk/client-s3', () => ({ PutObjectCommand: jest.fn((input: unknown) => ({ _type: 'Put', input })), })); +// DynamoDB doc client — drives persistScreenshotUrl (#247 UX.16/UX.17). +const ddbSend = jest.fn(); +jest.mock('@aws-sdk/client-dynamodb', () => ({ DynamoDBClient: jest.fn(() => ({})) })); +jest.mock('@aws-sdk/lib-dynamodb', () => ({ + DynamoDBDocumentClient: { from: jest.fn(() => ({ send: ddbSend })) }, + UpdateCommand: jest.fn((input: unknown) => ({ _type: 'Update', input })), + GetCommand: jest.fn((input: unknown) => ({ _type: 'Get', input })), +})); + const captureScreenshotMock = jest.fn(); jest.mock('../../src/handlers/shared/agentcore-browser', () => ({ captureScreenshot: (...args: unknown[]) => captureScreenshotMock(...args), @@ -45,15 +54,18 @@ jest.mock('../../src/handlers/shared/linear-feedback', () => ({ const findLinearIssueMock = jest.fn(); const extractLinearIdentifierMock = jest.fn(); +const extractFromBranchMock = jest.fn(); jest.mock('../../src/handlers/shared/linear-issue-lookup', () => ({ findLinearIssueByIdentifier: (...args: unknown[]) => findLinearIssueMock(...args), extractLinearIdentifier: (...args: unknown[]) => extractLinearIdentifierMock(...args), + extractLinearIdentifierFromBranch: (...args: unknown[]) => extractFromBranchMock(...args), })); process.env.SCREENSHOT_BUCKET_NAME = 'screenshot-bucket'; process.env.SCREENSHOT_PUBLIC_HOST = 'd1.cloudfront.net'; process.env.GITHUB_TOKEN_SECRET_ARN = 'arn:aws:secretsmanager:us-east-1:123:secret:gh-token'; process.env.LINEAR_WORKSPACE_REGISTRY_TABLE_NAME = 'LinearWorkspaceRegistry'; +process.env.TASK_TABLE_NAME = 'TaskTable'; import { handler } from '../../src/handlers/github-webhook-processor'; @@ -88,6 +100,11 @@ describe('github-webhook-processor handler', () => { postIssueCommentMock.mockReset(); findLinearIssueMock.mockReset(); extractLinearIdentifierMock.mockReset(); + extractFromBranchMock.mockReset(); + // Default: persistScreenshotUrl's UpdateItem succeeds with a NON-integration + // task record (no orchestration_sub_issue_id) → standalone Linear comment + // still posts, as the pre-existing tests expect. + ddbSend.mockReset().mockResolvedValue({ Attributes: { channel_metadata: {} } }); jest.restoreAllMocks(); }); @@ -147,6 +164,32 @@ describe('github-webhook-processor handler', () => { } }); + test('picks the head-SHA owner when commit-pulls returns a stacked chain (#247)', async () => { + // A stacked sub-issue chain: the deploy SHA `abc1234` is the head of + // PR 73, but the commit-pulls API also lists PRs 74 and 75 stacked on + // top (their history contains the commit). The PR whose own head is + // the SHA must win, so the screenshot routes to 73's branch. + resolveGitHubTokenMock.mockResolvedValue('gh-tok'); + fetchOk([ + { number: 73, state: 'open', title: 't73', body: 'b73', head: { ref: 'bgagent/01T/abca-152-x', sha: 'abc1234' } }, + { number: 74, state: 'open', title: 't74', body: 'b74', head: { ref: 'bgagent/01T/abca-153-y', sha: 'def5678' } }, + { number: 75, state: 'open', title: 't75', body: 'b75', head: { ref: 'bgagent/01T/abca-154-z', sha: 'aaa9999' } }, + ]); + captureScreenshotMock.mockResolvedValueOnce(new Uint8Array([1])); + s3Send.mockResolvedValueOnce({}); + upsertTaskCommentMock.mockResolvedValueOnce({ commentId: 'cmt-1' }); + extractFromBranchMock.mockReturnValueOnce('ABCA-152'); + findLinearIssueMock.mockResolvedValueOnce({ issueId: 'issue-152', linearWorkspaceId: 'ws-1', workspaceSlug: 'abca' }); + postIssueCommentMock.mockResolvedValueOnce(true); + + await handler(payload()); + + const commentArg = upsertTaskCommentMock.mock.calls[0][0] as { issueOrPrNumber: number }; + expect(commentArg.issueOrPrNumber).toBe(73); + expect(extractFromBranchMock).toHaveBeenCalledWith('bgagent/01T/abca-152-x'); + expect(postIssueCommentMock.mock.calls[0][1]).toBe('issue-152'); + }); + test('happy path: PR found → screenshot → S3 → PR comment posted', async () => { resolveGitHubTokenMock.mockResolvedValue('gh-tok'); fetchOk([{ number: 17, state: 'open', title: 'feat: add x', body: 'body' }]); @@ -208,10 +251,12 @@ describe('github-webhook-processor handler', () => { test('Linear branch fires when registry table set + identifier in PR title', async () => { resolveGitHubTokenMock.mockResolvedValue('gh-tok'); - fetchOk([{ number: 17, state: 'open', title: 'ABCA-42 fix login', body: 'body' }]); + // No branch identifier here — exercises the title fallback path. + fetchOk([{ number: 17, state: 'open', title: 'ABCA-42 fix login', body: 'body', head: { ref: 'feature-x', sha: 'abc1234' } }]); captureScreenshotMock.mockResolvedValueOnce(new Uint8Array([1])); s3Send.mockResolvedValueOnce({}); upsertTaskCommentMock.mockResolvedValueOnce({ commentId: 'cmt-1' }); + extractFromBranchMock.mockReturnValueOnce(null); extractLinearIdentifierMock.mockReturnValueOnce('ABCA-42'); findLinearIssueMock.mockResolvedValueOnce({ issueId: 'issue-uuid', @@ -230,12 +275,48 @@ describe('github-webhook-processor handler', () => { expect(linearArg[2]).toMatch(/https:\/\/d1\.cloudfront\.net\/screenshots\/owner_repo\/abc1234-42-[0-9a-f]{16}\.png/); }); - test('falls back to extractor on PR body when title yields no identifier', async () => { + test('branch-name identifier wins over a predecessor named in the PR body (#247 stacked PR)', async () => { + // The #247 Lisbon-epic regression: PR #73 (closes ABCA-152) carries a + // body that mentions ABCA-151 ("cherry-picked from predecessor branch + // ABCA-151") BEFORE the issue it closes. Branch-first routing must win + // so the screenshot lands on ABCA-152, not the predecessor. + resolveGitHubTokenMock.mockResolvedValue('gh-tok'); + fetchOk([{ + number: 73, + state: 'open', + title: 'feat(destinations): add Lisbon destination card', + body: 'cherry-picked from predecessor branch ABCA-151 ... Closes ABCA-152', + head: { ref: 'bgagent/01TASK/abca-152-link-lisbon-from-destinationsht', sha: 'abc1234' }, + }]); + captureScreenshotMock.mockResolvedValueOnce(new Uint8Array([1])); + s3Send.mockResolvedValueOnce({}); + upsertTaskCommentMock.mockResolvedValueOnce({ commentId: 'cmt-1' }); + // Real branch extractor behaviour: pulls ABCA-152 from the branch. + extractFromBranchMock.mockReturnValueOnce('ABCA-152'); + findLinearIssueMock.mockResolvedValueOnce({ + issueId: 'issue-152', + linearWorkspaceId: 'ws-1', + workspaceSlug: 'abca', + }); + postIssueCommentMock.mockResolvedValueOnce(true); + + await handler(payload()); + + // Routed to ABCA-152 from the branch; title/body extractor never consulted. + expect(extractFromBranchMock).toHaveBeenCalledWith('bgagent/01TASK/abca-152-link-lisbon-from-destinationsht'); + expect(findLinearIssueMock).toHaveBeenCalledWith('ABCA-152', 'LinearWorkspaceRegistry'); + expect(extractLinearIdentifierMock).not.toHaveBeenCalled(); + expect(postIssueCommentMock).toHaveBeenCalledTimes(1); + expect(postIssueCommentMock.mock.calls[0][1]).toBe('issue-152'); + }); + + test('falls back to title then body when branch yields no identifier', async () => { resolveGitHubTokenMock.mockResolvedValue('gh-tok'); - fetchOk([{ number: 17, state: 'open', title: 'feat: add foo', body: 'closes ABCA-42' }]); + fetchOk([{ number: 17, state: 'open', title: 'feat: add foo', body: 'closes ABCA-42', head: { ref: 'random-branch', sha: 'abc1234' } }]); captureScreenshotMock.mockResolvedValueOnce(new Uint8Array([1])); s3Send.mockResolvedValueOnce({}); upsertTaskCommentMock.mockResolvedValueOnce({ commentId: 'cmt-1' }); + extractFromBranchMock.mockReturnValueOnce(null); // branch produces no match extractLinearIdentifierMock .mockReturnValueOnce(null) // title produces no match .mockReturnValueOnce('ABCA-42'); // body does @@ -248,6 +329,7 @@ describe('github-webhook-processor handler', () => { await handler(payload()); + expect(extractFromBranchMock).toHaveBeenCalledTimes(1); expect(extractLinearIdentifierMock).toHaveBeenCalledTimes(2); expect(postIssueCommentMock).toHaveBeenCalledTimes(1); }); @@ -297,4 +379,55 @@ describe('github-webhook-processor handler', () => { // No throw — postIssueComment returning false is just logged. await expect(handler(payload())).resolves.toBeUndefined(); }); + + test('#247 UX.17: persists BOTH screenshot_url and screenshot_preview_url on the task record', async () => { + resolveGitHubTokenMock.mockResolvedValue('gh-tok'); + fetchOk([{ number: 17, state: 'open', title: 't', body: '', head: { ref: 'bgagent/01TASKID/abca-42-x', sha: 'abc1234' } }]); + captureScreenshotMock.mockResolvedValueOnce(new Uint8Array([1])); + s3Send.mockResolvedValueOnce({}); + upsertTaskCommentMock.mockResolvedValueOnce({ commentId: 'cmt-1' }); + extractFromBranchMock.mockReturnValueOnce(null); + extractLinearIdentifierMock.mockReturnValue(null); + + await handler(payload()); + + const upd = ddbSend.mock.calls.find((c) => c[0]?._type === 'Update'); + expect(upd).toBeDefined(); + const input = upd![0].input as { Key: { task_id: string }; ExpressionAttributeValues: Record<string, string> }; + expect(input.Key.task_id).toBe('01TASKID'); // 2nd branch segment + expect(input.ExpressionAttributeValues[':u']).toMatch(/cloudfront\.net\/screenshots/); + expect(input.ExpressionAttributeValues[':p']).toBe('https://preview.example.com'); // the deploy preview URL + }); + + test('#247 UX.16: integration node deploy persists the URL but does NOT post a standalone Linear comment', async () => { + resolveGitHubTokenMock.mockResolvedValue('gh-tok'); + // The integration node's PR — branch + title both name the PARENT epic + // (ABCA-301), which WOULD route a Linear comment onto the parent. + fetchOk([{ + number: 191, + state: 'open', + title: 'feat(pages): integrate FAQ + Reviews (ABCA-301 combined result)', + body: 'combined', + head: { ref: 'bgagent/01INTEGRATION/integrate-the-sub-issues', sha: 'abc1234' }, + }]); + captureScreenshotMock.mockResolvedValueOnce(new Uint8Array([1])); + s3Send.mockResolvedValueOnce({}); + upsertTaskCommentMock.mockResolvedValueOnce({ commentId: 'cmt-1' }); + // The persisted task record marks this as the synthetic integration node. + ddbSend.mockReset().mockResolvedValue({ + Attributes: { channel_metadata: { orchestration_sub_issue_id: 'orch_1__integration' } }, + }); + extractFromBranchMock.mockReturnValue(null); + extractLinearIdentifierMock.mockReturnValue('ABCA-301'); + + await handler(payload()); + + // URL persisted (panel embed path) … + expect(ddbSend.mock.calls.some((c) => c[0]?._type === 'Update')).toBe(true); + // … the GitHub PR comment still posts (load-bearing on the PR) … + expect(upsertTaskCommentMock).toHaveBeenCalledTimes(1); + // … but NO standalone Linear comment on the parent epic. + expect(findLinearIssueMock).not.toHaveBeenCalled(); + expect(postIssueCommentMock).not.toHaveBeenCalled(); + }); }); diff --git a/cdk/test/handlers/github-webhook.test.ts b/cdk/test/handlers/github-webhook.test.ts index d8d71f43..6e3328fe 100644 --- a/cdk/test/handlers/github-webhook.test.ts +++ b/cdk/test/handlers/github-webhook.test.ts @@ -123,8 +123,15 @@ describe('github-webhook receiver', () => { expect(lambdaSend).not.toHaveBeenCalled(); }); - test('200 silently ignores non-deployment_status events', async () => { - const res = await handler(event('{}', { 'X-GitHub-Event': 'pull_request' })); + test('200 silently ignores pull_request events (A6 is no longer a GitHub-webhook path)', async () => { + // #247 A6 redesign: re-stack is driven by the reconciler off a Linear + // @bgagent comment, not a GitHub pull_request webhook (those are + // WAF-blocked anyway). pull_request events are a plain 200 no-op — no + // restack invoke. + const res = await handler(event( + JSON.stringify({ action: 'synchronize', pull_request: { head: { ref: 'branch-A', sha: 's' } } }), + { 'X-GitHub-Event': 'pull_request' }, + )); expect(res.statusCode).toBe(200); expect(lambdaSend).not.toHaveBeenCalled(); }); diff --git a/cdk/test/handlers/linear-webhook-processor-orchestration.test.ts b/cdk/test/handlers/linear-webhook-processor-orchestration.test.ts new file mode 100644 index 00000000..71d2712c --- /dev/null +++ b/cdk/test/handlers/linear-webhook-processor-orchestration.test.ts @@ -0,0 +1,662 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * Tests the #247 Mode A orchestration routing in the Linear webhook + * processor — the env-var-gated branch that, when ORCHESTRATION_TABLE_NAME + * is set and a workspace token resolves, probes the labeled parent issue + * for a sub-issue graph and routes accordingly: + * seeded → no parent task (reconciler owns children) + * single_task → falls through to the normal one-issue→one-task path + * rejected/error → terminal ❌ comment, no task + * + * Kept separate from linear-webhook-processor.test.ts because the env + * var is read at module-eval time; this file enables it, the sibling + * file leaves it unset (proving the path is dormant by default). + * discoverOrchestration is mocked — its internals are covered by + * orchestration-discovery.test.ts. + */ + +const ddbSend = jest.fn(); +jest.mock('@aws-sdk/client-dynamodb', () => ({ DynamoDBClient: jest.fn(() => ({})) })); +jest.mock('@aws-sdk/lib-dynamodb', () => ({ + DynamoDBDocumentClient: { from: jest.fn(() => ({ send: ddbSend })) }, + GetCommand: jest.fn((input: unknown) => ({ _type: 'Get', input })), + QueryCommand: jest.fn((input: unknown) => ({ _type: 'Query', input })), + UpdateCommand: jest.fn((input: unknown) => ({ _type: 'Update', input })), + BatchWriteCommand: jest.fn((input: unknown) => ({ _type: 'BatchWrite', input })), +})); + +const createTaskCoreMock = jest.fn(); +jest.mock('../../src/handlers/shared/create-task-core', () => ({ + createTaskCore: (...args: unknown[]) => createTaskCoreMock(...args), +})); + +const reportIssueFailureMock = jest.fn(); +const swapIssueReactionMock = jest.fn(); +const transitionIssueStateMock = jest.fn(); +const upsertStatusCommentMock = jest.fn(); +const reactToCommentMock = jest.fn(); +const replyToCommentMock = jest.fn(); +jest.mock('../../src/handlers/shared/linear-feedback', () => ({ + reportIssueFailure: (...args: unknown[]) => reportIssueFailureMock(...args), + swapIssueReaction: (...args: unknown[]) => swapIssueReactionMock(...args), + transitionIssueState: (...args: unknown[]) => transitionIssueStateMock(...args), + upsertStatusComment: (...args: unknown[]) => upsertStatusCommentMock(...args), + reactToComment: (...args: unknown[]) => reactToCommentMock(...args), + replyToComment: (...args: unknown[]) => replyToCommentMock(...args), + EMOJI_STARTED: 'eyes', + EMOJI_SUCCESS: 'white_check_mark', + EMOJI_FAILURE: 'x', +})); + +const resolveLinearOauthTokenMock = jest.fn(); +jest.mock('../../src/handlers/shared/linear-oauth-resolver', () => ({ + resolveLinearOauthToken: (...args: unknown[]) => resolveLinearOauthTokenMock(...args), +})); + +const discoverOrchestrationMock = jest.fn(); +jest.mock('../../src/handlers/shared/orchestration-discovery', () => ({ + discoverOrchestration: (...args: unknown[]) => discoverOrchestrationMock(...args), +})); + +const fetchIssueParentIdMock = jest.fn(); +jest.mock('../../src/handlers/shared/linear-subissue-fetch', () => ({ + fetchIssueParentId: (...args: unknown[]) => fetchIssueParentIdMock(...args), +})); + +process.env.LINEAR_PROJECT_MAPPING_TABLE_NAME = 'LinearProjects'; +process.env.LINEAR_USER_MAPPING_TABLE_NAME = 'LinearUsers'; +process.env.LINEAR_WORKSPACE_REGISTRY_TABLE_NAME = 'LinearWorkspaceRegistry'; +process.env.TASK_TABLE_NAME = 'TaskTable'; +// Enable the orchestration path for this file (sibling file leaves it unset). +process.env.ORCHESTRATION_TABLE_NAME = 'OrchestrationTable'; + +import { handler } from '../../src/handlers/linear-webhook-processor'; + +function eventWith(payload: Record<string, unknown>): { raw_body: string } { + return { raw_body: JSON.stringify(payload) }; +} + +function issue(overrides: Record<string, unknown> = {}): Record<string, unknown> { + return { + action: 'create', + type: 'Issue', + organizationId: 'org-1', + actor: { id: 'user-1' }, + data: { + id: 'issue-1', + identifier: 'ABC-42', + title: 'Epic: ship the thing', + description: 'Parent epic.', + projectId: 'project-1', + teamId: 'team-1', + labels: [{ id: 'lbl-bg', name: 'bgagent' }], + }, + ...overrides, + }; +} + +/** Wire the common preamble: onboarded project, linked user, resolved token. */ +function happyPreamble(): void { + ddbSend + // 1: project mapping lookup → onboarded + active + .mockResolvedValueOnce({ Item: { status: 'active', repo: 'owner/repo', label_filter: 'bgagent' } }) + // 2: user mapping lookup → linked platform user + .mockResolvedValueOnce({ Item: { platform_user_id: 'platform-user-1' } }); + resolveLinearOauthTokenMock.mockResolvedValue({ + accessToken: 'access-tok', + oauthSecretArn: 'arn:secret', + workspaceSlug: 'acme', + }); +} + +describe('linear-webhook-processor — #247 orchestration routing', () => { + beforeEach(() => { + ddbSend.mockReset(); + createTaskCoreMock.mockReset(); + // Default: release path (now exercised in the seed test) returns a created task. + createTaskCoreMock.mockResolvedValue({ statusCode: 201, body: JSON.stringify({ data: { task_id: 'child-task' } }) }); + reportIssueFailureMock.mockReset(); + reportIssueFailureMock.mockResolvedValue(undefined); + resolveLinearOauthTokenMock.mockReset(); + discoverOrchestrationMock.mockReset(); + swapIssueReactionMock.mockReset().mockResolvedValue(true); + transitionIssueStateMock.mockReset().mockResolvedValue(true); + upsertStatusCommentMock.mockReset().mockResolvedValue('cmt-status-1'); + fetchIssueParentIdMock.mockReset(); + }); + + test('seeded graph → no parent task created (reconciler owns children)', async () => { + happyPreamble(); + discoverOrchestrationMock.mockResolvedValueOnce({ + kind: 'seeded', + orchestrationId: 'orch_abc', + childCount: 3, + rootSubIssueIds: ['A'], + alreadyExisted: false, + }); + // After seeding, the handler loads the orchestration (Query) to release + // roots + post the initial panel. Return a real snapshot so the panel path + // runs (mirrors the parent start signal). All Query calls return it. + ddbSend.mockResolvedValue({ + Items: [ + { + sub_issue_id: '#meta', + orchestration_id: 'orch_abc', + parent_linear_issue_id: 'issue-1', + linear_workspace_id: 'org-1', + repo: 'owner/repo', + platform_user_id: 'u1', + }, + { + sub_issue_id: 'A', + orchestration_id: 'orch_abc', + depends_on: [], + child_status: 'ready', + parent_linear_issue_id: 'issue-1', + linear_workspace_id: 'org-1', + repo: 'owner/repo', + }, + ], + }); + + await handler(eventWith(issue())); + + expect(discoverOrchestrationMock).toHaveBeenCalledTimes(1); + expect(reportIssueFailureMock).not.toHaveBeenCalled(); + // The parent issue itself spawns no task FROM the single-task path — but + // releasing root A does call createTaskCore once (for the child). It must + // NOT be called with the parent's task_description (the single-task body). + const calledWithParentBody = createTaskCoreMock.mock.calls.some( + (c) => (c[0] as { task_description?: string }).task_description?.includes('Epic: ship the thing')); + expect(calledWithParentBody).toBe(false); + // #247 UX.2: the initial panel is posted (upsertStatusComment) and the + // parent start signal mirrored — 👀 reaction + In Progress — via upsertEpicPanel. + expect(upsertStatusCommentMock).toHaveBeenCalled(); + expect(swapIssueReactionMock).toHaveBeenCalledWith(expect.anything(), expect.any(String), 'eyes'); + expect(transitionIssueStateMock).toHaveBeenCalledWith( + expect.anything(), expect.any(String), 'started', ['In Progress'], + ); + }); + + test('seeded → posts the live status block on the parent + stamps its id (#3)', async () => { + // project + user lookups (preamble) + ddbSend + .mockResolvedValueOnce({ Item: { status: 'active', repo: 'owner/repo', label_filter: 'bgagent' } }) + .mockResolvedValueOnce({ Item: { platform_user_id: 'u1' } }); + resolveLinearOauthTokenMock.mockResolvedValue({ accessToken: 'tok', oauthSecretArn: 'arn', workspaceSlug: 'acme' }); + discoverOrchestrationMock.mockResolvedValueOnce({ + kind: 'seeded', orchestrationId: 'orch_abc', childCount: 1, rootSubIssueIds: ['A'], alreadyExisted: false, + }); + // Every subsequent Query (release-path load + post-release status load) + // returns a snapshot with a meta row + one child; Updates (release flip, + // setStatusCommentId) return {}. + const snapshotItems = { + Items: [ + { sub_issue_id: '#meta', orchestration_id: 'orch_abc', parent_linear_issue_id: 'issue-1', linear_workspace_id: 'org-1', repo: 'owner/repo', child_count: 1, platform_user_id: 'u1' }, + { sub_issue_id: 'A', orchestration_id: 'orch_abc', parent_linear_issue_id: 'issue-1', linear_workspace_id: 'org-1', repo: 'owner/repo', depends_on: [], child_status: 'released', linear_identifier: 'ABCA-1', title: 'Step A' }, + ], + }; + ddbSend.mockResolvedValue(snapshotItems); + + await handler(eventWith(issue())); + + // Status block posted (no existing id → create) and its id stamped back. + expect(upsertStatusCommentMock).toHaveBeenCalledTimes(1); + const [, parentArg, bodyArg, existingId] = upsertStatusCommentMock.mock.calls[0]; + expect(parentArg).toBe('issue-1'); + expect(bodyArg).toContain('ABCA orchestration'); + expect(existingId).toBeUndefined(); // create, not edit + // setStatusCommentId issues an Update with the returned comment id. + const stampUpdate = ddbSend.mock.calls.map((c) => c[0]?.input).find((i) => i?.UpdateExpression?.includes('status_comment_id')); + expect(stampUpdate?.ExpressionAttributeValues?.[':cid']).toBe('cmt-status-1'); + }); + + test('seeded on idempotent replay → no duplicate start signal on parent', async () => { + happyPreamble(); + discoverOrchestrationMock.mockResolvedValueOnce({ + kind: 'seeded', + orchestrationId: 'orch_abc', + childCount: 3, + rootSubIssueIds: ['A'], + alreadyExisted: true, // replay + }); + ddbSend.mockResolvedValueOnce({ Items: [] }); + + await handler(eventWith(issue())); + + // alreadyExisted ⇒ skip the start reaction/transition (already done on first seed). + expect(swapIssueReactionMock).not.toHaveBeenCalled(); + expect(transitionIssueStateMock).not.toHaveBeenCalled(); + }); + + test('no sub-issues → single_task falls through to normal task creation', async () => { + happyPreamble(); + discoverOrchestrationMock.mockResolvedValueOnce({ kind: 'single_task', parentLinearIssueId: 'issue-1' }); + createTaskCoreMock.mockResolvedValueOnce({ statusCode: 201, body: JSON.stringify({ data: { task_id: 'T1' } }) }); + + await handler(eventWith(issue())); + + expect(discoverOrchestrationMock).toHaveBeenCalledTimes(1); + // Falls through → a single task is created as today. + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + }); + + test('rejected graph (cycle) → terminal comment, no task', async () => { + happyPreamble(); + discoverOrchestrationMock.mockResolvedValueOnce({ + kind: 'rejected', + reason: 'cycle', + message: 'The sub-issue blocking relations form a cycle.', + }); + + await handler(eventWith(issue())); + + expect(createTaskCoreMock).not.toHaveBeenCalled(); + expect(reportIssueFailureMock).toHaveBeenCalledTimes(1); + // reportIssueFailure(ctx, issueId, message) + const [ctx, issueId, message] = reportIssueFailureMock.mock.calls[0]; + expect(ctx).toMatchObject({ linearWorkspaceId: 'org-1' }); + expect(issueId).toBe('issue-1'); + expect(String(message)).toMatch(/cycle/i); + }); + + test('discovery error → terminal comment, no task, no silent single-task fallback', async () => { + happyPreamble(); + discoverOrchestrationMock.mockResolvedValueOnce({ kind: 'error', message: 'Could not reach the Linear API.' }); + + await handler(eventWith(issue())); + + expect(createTaskCoreMock).not.toHaveBeenCalled(); + expect(reportIssueFailureMock).toHaveBeenCalledTimes(1); + }); + + test('no workspace token → event dropped (no orchestration, no task)', async () => { + ddbSend + .mockResolvedValueOnce({ Item: { status: 'active', repo: 'owner/repo', label_filter: 'bgagent' } }) + .mockResolvedValueOnce({ Item: { platform_user_id: 'platform-user-1' } }); + // When the registry table is configured but the workspace token does + // not resolve, the handler drops the event (added in #200) rather than + // creating a task against a workspace ABCA can't recognize — outbound + // Linear comments would silently skip and we'd burn agent quota for no + // observable result. So neither orchestration NOR a single task fires. + resolveLinearOauthTokenMock.mockResolvedValue(null); + + await handler(eventWith(issue())); + + expect(discoverOrchestrationMock).not.toHaveBeenCalled(); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); +}); + +describe('linear-webhook-processor — #247 A6 comment trigger', () => { + /** A Comment webhook payload. */ + function comment(overrides: Record<string, unknown> = {}): Record<string, unknown> { + return { + type: 'Comment', + action: 'create', + organizationId: 'org-1', + actor: { id: 'user-9' }, + data: { id: 'comment-1', body: '@bgagent change the timeout to 30 min', issueId: 'sub-issue-1' }, + ...overrides, + }; + } + + /** Mock loadOrchestration (Query) → snapshot with the sub-issue as a started child, and GetCommand → its PR url. + * The standalone LinearIssueIndex GSI query (Query w/ IndexName) returns empty unless `standalone` is given. */ + function mockOrchWithChild(opts: { + subIssueId: string; + childTaskId?: string; + prUrl?: string; + standalone?: { task_id: string; user_id?: string; repo?: string; pr_url?: string; pr_number?: number }; + }): void { + const meta = { + sub_issue_id: '#meta', + orchestration_id: 'orch_x', + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + repo: 'o/r', + child_count: 1, + platform_user_id: 'release-user', + }; + const child: Record<string, unknown> = { + orchestration_id: 'orch_x', + sub_issue_id: opts.subIssueId, + depends_on: [], + child_status: 'succeeded', + repo: 'o/r', + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + }; + if (opts.childTaskId) child.child_task_id = opts.childTaskId; + ddbSend.mockImplementation(async (cmd: { _type: string; input: Record<string, unknown> }) => { + if (cmd._type === 'Query' && cmd.input.IndexName === 'LinearIssueIndex') { + return { Items: opts.standalone ? [opts.standalone] : [] }; // resolveTaskByLinearIssue + } + if (cmd._type === 'Query') return { Items: [meta, child] }; // loadOrchestration + if (cmd._type === 'Get') return { Item: opts.prUrl ? { pr_url: opts.prUrl } : {} }; + return {}; + }); + } + + /** Mock for a PLAIN (non-orchestration) issue: no parent, no orchestration snapshot, only the GSI hit. */ + function mockStandaloneOnly(standalone: { task_id: string; user_id?: string; repo?: string; pr_url?: string; pr_number?: number } | null): void { + fetchIssueParentIdMock.mockResolvedValue(null); // no parent ⇒ not a sub-issue + ddbSend.mockImplementation(async (cmd: { _type: string; input: Record<string, unknown> }) => { + if (cmd._type === 'Query' && cmd.input.IndexName === 'LinearIssueIndex') { + return { Items: standalone ? [standalone] : [] }; + } + return {}; + }); + } + + beforeEach(() => { + ddbSend.mockReset(); + createTaskCoreMock.mockReset().mockResolvedValue({ statusCode: 201, body: '{}' }); + resolveLinearOauthTokenMock.mockReset() + .mockResolvedValue({ accessToken: 'tok', oauthSecretArn: 'arn:secret', workspaceSlug: 'acme' }); + fetchIssueParentIdMock.mockReset().mockResolvedValue('PARENT'); + discoverOrchestrationMock.mockReset(); + reactToCommentMock.mockReset().mockResolvedValue(true); + replyToCommentMock.mockReset().mockResolvedValue(true); + }); + + test('@bgagent on a started sub-issue → pr-iteration task on its PR with cascade marker', async () => { + mockOrchWithChild({ subIssueId: 'sub-issue-1', childTaskId: 'task-sub-1', prUrl: 'https://github.com/o/r/pull/42' }); + await handler(eventWith(comment())); + + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + const [body, ctx] = createTaskCoreMock.mock.calls[0]; + expect(body.workflow_ref).toBe('coding/pr-iteration-v1'); + expect(body.pr_number).toBe(42); + expect(body.task_description).toBe('change the timeout to 30 min'); + expect(ctx.channelSource).toBe('linear'); + expect(ctx.channelMetadata.orchestration_iteration).toBe('true'); + expect(ctx.channelMetadata.orchestration_sub_issue_id).toBe('sub-issue-1'); + expect(ctx.channelMetadata.linear_issue_id).toBe('sub-issue-1'); + expect(ctx.idempotencyKey).toContain('comment-1'); + // #247 UX.3: the triggering comment id is threaded so the reconciler can + // reply ✅/❌ beneath it when the iteration lands. + expect(ctx.channelMetadata.trigger_comment_id).toBe('comment-1'); + }); + + test('@bgagent on a started sub-issue → instant 👀 ack on the TRIGGERING comment (#247 UX.3)', async () => { + mockOrchWithChild({ subIssueId: 'sub-issue-1', childTaskId: 'task-sub-1', prUrl: 'https://github.com/o/r/pull/42' }); + await handler(eventWith(comment())); + + // 👀 lands on the comment (commentId 'comment-1'), not the issue, with EMOJI_STARTED. + expect(reactToCommentMock).toHaveBeenCalledTimes(1); + const [, commentId, emoji] = reactToCommentMock.mock.calls[0]; + expect(commentId).toBe('comment-1'); + expect(emoji).toBe('eyes'); + }); + + test('@bgagent THREAD-REPLY trigger → 👀 on the reply, but reply target is the thread ROOT (#247 UX.11)', async () => { + // A trigger comment that is itself a thread-reply carries parentId = the + // top-level root. Linear rejects replying to a reply, so trigger_comment_id + // must be the ROOT — but the 👀 still goes on the actual reply the human wrote. + mockOrchWithChild({ subIssueId: 'sub-issue-1', childTaskId: 'task-sub-1', prUrl: 'https://github.com/o/r/pull/42' }); + await handler(eventWith(comment({ + data: { id: 'reply-cmt-9', parentId: 'root-cmt-1', body: '@bgagent tweak it', issueId: 'sub-issue-1' }, + }))); + + // 👀 on the actual reply the human wrote. + expect(reactToCommentMock).toHaveBeenCalledWith(expect.anything(), 'reply-cmt-9', 'eyes'); + // But the ack replies to the thread ROOT, not the reply. + const ctx = createTaskCoreMock.mock.calls[0][1]; + expect(ctx.channelMetadata.trigger_comment_id).toBe('root-cmt-1'); + }); + + test('@bgagent that does NOT resolve to an actionable iteration → no premature 👀 ack', async () => { + // No childTaskId ⇒ un-started sub-issue ⇒ we bail before acting; don't ack. + mockOrchWithChild({ subIssueId: 'sub-issue-1' }); + await handler(eventWith(comment())); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + expect(reactToCommentMock).not.toHaveBeenCalled(); + }); + + test('comment WITHOUT @bgagent → no task (ordinary discussion / agent progress comment)', async () => { + await handler(eventWith(comment({ data: { id: 'c2', body: 'looks good to me!', issueId: 'sub-issue-1' } }))); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + // Never even fetched the parent (cheap short-circuit on the mention check). + expect(fetchIssueParentIdMock).not.toHaveBeenCalled(); + }); + + test('@bgagent on an issue with no parent AND no ABCA task → clean no-op (not an ABCA issue)', async () => { + mockStandaloneOnly(null); // no parent, GSI miss + await handler(eventWith(comment())); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + expect(reactToCommentMock).not.toHaveBeenCalled(); // no premature ack + }); + + test('@bgagent on a sub-issue whose parent is not an orchestration AND no ABCA task → no task', async () => { + fetchIssueParentIdMock.mockResolvedValue('PARENT'); + ddbSend.mockImplementation(async (cmd: { _type: string; input: Record<string, unknown> }) => { + if (cmd._type === 'Query' && cmd.input.IndexName === 'LinearIssueIndex') return { Items: [] }; + return { Items: [] }; // loadOrchestration → no snapshot + }); + await handler(eventWith(comment())); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('@bgagent on an un-started sub-issue (no child_task_id) AND no ABCA task → no task', async () => { + mockOrchWithChild({ subIssueId: 'sub-issue-1' }); // no childTaskId, no standalone + await handler(eventWith(comment())); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('bare @bgagent (no text) → falls back to a generic iteration instruction', async () => { + mockOrchWithChild({ subIssueId: 'sub-issue-1', childTaskId: 'task-sub-1', prUrl: 'https://github.com/o/r/pull/7' }); + await handler(eventWith(comment({ data: { id: 'c3', body: '@bgagent', issueId: 'sub-issue-1' } }))); + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + expect(createTaskCoreMock.mock.calls[0][0].task_description).toMatch(/latest review feedback/i); + }); + + // #247 UX.3: the GENERALIZED trigger — a plain (non-orchestration) issue + // that ABCA opened a PR for, resolved via the LinearIssueIndex GSI. + describe('standalone (non-orchestration) @bgagent trigger', () => { + test('plain issue with an ABCA PR → pr-iteration task, 👀 ack, trigger_comment_id but NO orchestration markers', async () => { + mockStandaloneOnly({ task_id: 'task-solo', user_id: 'u-solo', repo: 'o/r', pr_number: 99 }); + await handler(eventWith(comment())); + + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + const [body, ctx] = createTaskCoreMock.mock.calls[0]; + expect(body.workflow_ref).toBe('coding/pr-iteration-v1'); + expect(body.pr_number).toBe(99); + expect(body.repo).toBe('o/r'); + expect(ctx.userId).toBe('u-solo'); // attributed to the original task's user + expect(ctx.channelMetadata.trigger_comment_id).toBe('comment-1'); + expect(ctx.channelMetadata.linear_issue_id).toBe('sub-issue-1'); + // NOT an orchestration iteration — the reconciler must ignore it (fanout replies). + expect(ctx.channelMetadata.orchestration_id).toBeUndefined(); + expect(ctx.channelMetadata.orchestration_iteration).toBeUndefined(); + // 👀 ack on the comment. + expect(reactToCommentMock).toHaveBeenCalledWith(expect.anything(), 'comment-1', 'eyes'); + }); + + test('plain issue resolves PR from pr_url when pr_number absent', async () => { + mockStandaloneOnly({ task_id: 'task-solo', user_id: 'u-solo', repo: 'o/r', pr_url: 'https://github.com/o/r/pull/123' }); + await handler(eventWith(comment())); + expect(createTaskCoreMock.mock.calls[0][0].pr_number).toBe(123); + }); + + test('plain issue whose ABCA task opened NO PR → no task, no ack', async () => { + mockStandaloneOnly({ task_id: 'task-solo', user_id: 'u-solo', repo: 'o/r' }); // no pr + await handler(eventWith(comment())); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + expect(reactToCommentMock).not.toHaveBeenCalled(); + }); + + test('plain issue task missing user_id → cannot attribute → no task', async () => { + mockStandaloneOnly({ task_id: 'task-solo', repo: 'o/r', pr_number: 5 }); // no user_id + await handler(eventWith(comment())); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + }); + + // #247 UX.18: an @bgagent comment left on the PARENT epic (the panel lives + // there) routes to the sub-issue it names — instead of the old silent drop. + describe('parent-epic @bgagent comment routing', () => { + /** Mock so the COMMENTED issue id is itself the orchestration parent. The + * fan-out epic has two started sub-issues (footer + newsletter). */ + function mockParentEpic(parentIssueId: string): void { + const meta = { + sub_issue_id: '#meta', + orchestration_id: 'orch_x', + parent_linear_issue_id: parentIssueId, + linear_workspace_id: 'WS', + repo: 'o/r', + child_count: 2, + platform_user_id: 'release-user', + }; + const footer = { + orchestration_id: 'orch_x', + sub_issue_id: 'sub-footer', + depends_on: [], + child_status: 'succeeded', + repo: 'o/r', + parent_linear_issue_id: parentIssueId, + linear_workspace_id: 'WS', + linear_identifier: 'ABCA-305', + title: 'Add a site-wide footer', + child_task_id: 'task-footer', + }; + const news = { + orchestration_id: 'orch_x', + sub_issue_id: 'sub-news', + depends_on: [], + child_status: 'succeeded', + repo: 'o/r', + parent_linear_issue_id: parentIssueId, + linear_workspace_id: 'WS', + linear_identifier: 'ABCA-306', + title: 'Add a newsletter signup section', + child_task_id: 'task-news', + }; + // Stateful ack-claim (#247 UX.20): the conditional Update on ack#<comment> + // succeeds the FIRST time and ConditionalCheckFailed on every redelivery. + const claimedAcks = new Set<string>(); + ddbSend.mockImplementation(async (cmd: { _type: string; input: Record<string, unknown> }) => { + if (cmd._type === 'Update') { + const sk = (cmd.input.Key as { sub_issue_id?: string })?.sub_issue_id ?? ''; + if (sk.startsWith('ack#')) { + if (claimedAcks.has(sk)) { + throw Object.assign(new Error('claim exists'), { name: 'ConditionalCheckFailedException' }); + } + claimedAcks.add(sk); + } + return {}; + } + if (cmd._type === 'Query' && cmd.input.IndexName === 'LinearIssueIndex') return { Items: [] }; + if (cmd._type === 'Query') return { Items: [meta, footer, news] }; // loadOrchestration (parent's own) + if (cmd._type === 'Get') { + const tid = (cmd.input.Key as { task_id: string }).task_id; + const pr = tid === 'task-footer' ? 193 : tid === 'task-news' ? 192 : null; + return { Item: pr ? { pr_number: pr } : {} }; + } + return {}; + }); + } + + /** A comment ON the parent epic (issueId === the parent id). */ + function parentComment(body: string, id = 'pc-1'): Record<string, unknown> { + return { + type: 'Comment', + action: 'create', + organizationId: 'org-1', + actor: { id: 'user-9' }, + data: { id, body, issueId: 'PARENT-EPIC' }, + }; + } + + test('the live case: "@bgagent for the footer change it" on the epic → iterates ABCA-305 PR #193', async () => { + mockParentEpic('PARENT-EPIC'); + await handler(eventWith(parentComment('@bgagent for the footer can you change it to "unforgettable memories await you"'))); + + // 👀 on the parent comment (never a silent drop). + expect(reactToCommentMock).toHaveBeenCalledWith(expect.anything(), 'pc-1', 'eyes'); + // Routed to the footer sub-issue's PR with the cascade marker. + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + const [body, ctx] = createTaskCoreMock.mock.calls[0]; + expect(body.workflow_ref).toBe('coding/pr-iteration-v1'); + expect(body.pr_number).toBe(193); + expect(ctx.channelMetadata.orchestration_sub_issue_id).toBe('sub-footer'); + expect(ctx.channelMetadata.orchestration_iteration).toBe('true'); + expect(ctx.channelMetadata.linear_issue_id).toBe('sub-footer'); + // #247 UX.19: the trigger comment lives on the PARENT epic, so the reply + // must target the parent issue (not the sub-issue) — else Linear rejects it. + expect(ctx.channelMetadata.trigger_comment_issue_id).toBe('PARENT-EPIC'); + expect(ctx.channelMetadata.trigger_comment_id).toBe('pc-1'); + // No disambiguation reply — we acted. + expect(replyToCommentMock).not.toHaveBeenCalled(); + }); + + test('targeting by Linear identifier on the epic → iterates that node', async () => { + mockParentEpic('PARENT-EPIC'); + await handler(eventWith(parentComment('@bgagent ABCA-306 tweak the newsletter copy'))); + expect(createTaskCoreMock.mock.calls[0][0].pr_number).toBe(192); + expect(createTaskCoreMock.mock.calls[0][1].channelMetadata.orchestration_sub_issue_id).toBe('sub-news'); + }); + + test('ambiguous comment on the epic → 👀 + a "which sub-issue?" reply, NO task, NO new issue', async () => { + mockParentEpic('PARENT-EPIC'); + await handler(eventWith(parentComment('@bgagent please update the copy'))); + // Acked, but did not act. + expect(reactToCommentMock).toHaveBeenCalledWith(expect.anything(), 'pc-1', 'eyes'); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + // Posted a disambiguation reply on the parent — never a silent drop. + expect(replyToCommentMock).toHaveBeenCalledTimes(1); + const [, issueId, , replyBody] = replyToCommentMock.mock.calls[0]; + expect(issueId).toBe('PARENT-EPIC'); + expect(replyBody).toContain('ABCA-305'); + expect(replyBody).toContain('ABCA-306'); + expect(replyBody.toLowerCase()).toContain('new work'); // the create-a-sub-issue path + }); + + test('no-match comment on the epic → 👀 + reply (never a silent drop), no task', async () => { + mockParentEpic('PARENT-EPIC'); + await handler(eventWith(parentComment('@bgagent looks great, ship it'))); + expect(reactToCommentMock).toHaveBeenCalledWith(expect.anything(), 'pc-1', 'eyes'); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + expect(replyToCommentMock).toHaveBeenCalledTimes(1); + }); + + test('#247 UX.20: webhook REDELIVERY of the same parent comment posts EXACTLY ONE reply (no spam)', async () => { + mockParentEpic('PARENT-EPIC'); + const evt = eventWith(parentComment('@bgagent looks great, ship it', 'pc-dup')); + // Linear redelivers the same comment webhook 3× (handler exceeded its ack window). + await handler(evt); + await handler(evt); + await handler(evt); + // The conditional ack-claim lets only the FIRST delivery act: one 👀, one reply. + expect(replyToCommentMock).toHaveBeenCalledTimes(1); + expect(reactToCommentMock).toHaveBeenCalledTimes(1); + }); + + test('#247 UX.20: a matched-iteration parent comment also dedups under redelivery (one task, one ack)', async () => { + mockParentEpic('PARENT-EPIC'); + const evt = eventWith(parentComment('@bgagent for the footer change the tagline', 'pc-iter')); + await handler(evt); + await handler(evt); + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); // one iteration, not two + expect(reactToCommentMock).toHaveBeenCalledTimes(1); + }); + }); +}); diff --git a/cdk/test/handlers/linear-webhook-processor.test.ts b/cdk/test/handlers/linear-webhook-processor.test.ts index 533e4314..bdc68031 100644 --- a/cdk/test/handlers/linear-webhook-processor.test.ts +++ b/cdk/test/handlers/linear-webhook-processor.test.ts @@ -39,6 +39,15 @@ jest.mock('../../src/handlers/shared/linear-oauth-resolver', () => ({ resolveLinearOauthToken: (...args: unknown[]) => resolveLinearOauthTokenMock(...args), })); +const probeLinearIssueContextMock = jest.fn(); +jest.mock('../../src/handlers/shared/linear-issue-context-probe', () => { + const actual = jest.requireActual('../../src/handlers/shared/linear-issue-context-probe'); + return { + ...actual, + probeLinearIssueContext: (...args: unknown[]) => probeLinearIssueContextMock(...args), + }; +}); + process.env.LINEAR_PROJECT_MAPPING_TABLE_NAME = 'LinearProjects'; process.env.LINEAR_USER_MAPPING_TABLE_NAME = 'LinearUsers'; process.env.LINEAR_WORKSPACE_REGISTRY_TABLE_NAME = 'LinearWorkspaceRegistry'; @@ -86,6 +95,14 @@ describe('linear-webhook-processor handler', () => { workspaceSlug: 'acme', oauthSecretArn: 'arn:aws:secretsmanager:us-east-1:123:secret:bgagent-linear-oauth-acme', }); + // Attachments-via-MCP probe (672bfa6): default to "nothing to fetch" so + // existing tests are unaffected; the context-discovery tests override. + probeLinearIssueContextMock.mockReset(); + probeLinearIssueContextMock.mockResolvedValue({ + attachmentTitles: [], + projectName: null, + projectHasDocuments: false, + }); }); test('skips missing raw_body', async () => { @@ -491,5 +508,194 @@ describe('linear-webhook-processor handler', () => { const [reqBody] = createTaskCoreMock.mock.calls[0]; expect(reqBody.attachments).toBeUndefined(); }); + + test('skips uploads.linear.app images so the unauthenticated URL resolver does not 401', async () => { + // Linear's CDN requires the workspace OAuth token to fetch, which the + // orchestrator's URL-resolver does NOT have. The agent picks these up + // at runtime via mcp__linear-server__extract_images instead, per the + // Linear-channel prompt addendum. + const payload = issue(); + const data = payload.data as Record<string, unknown>; + data.description = [ + '![paste](https://uploads.linear.app/15d12f61/090e5ce6/938f90d7)', + '![public](https://i.imgur.com/abc.png)', + ].join('\n'); + + await handler(eventWith(payload)); + + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + const [reqBody] = createTaskCoreMock.mock.calls[0]; + // Only the public image survives the filter. + expect(reqBody.attachments).toHaveLength(1); + expect(reqBody.attachments[0].url).toBe('https://i.imgur.com/abc.png'); + }); + + test('drops attachments entirely when only uploads.linear.app images are present', async () => { + const payload = issue(); + const data = payload.data as Record<string, unknown>; + data.description = '![only](https://uploads.linear.app/x/y/z)'; + + await handler(eventWith(payload)); + + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + const [reqBody] = createTaskCoreMock.mock.calls[0]; + expect(reqBody.attachments).toBeUndefined(); + }); + }); + + // ─── Linear issue context probe (paperclip attachments + project docs) ────── + + describe('linear issue context probe', () => { + beforeEach(() => { + ddbSend + .mockResolvedValueOnce({ Item: { repo: 'org/repo', status: 'active' } }) + .mockResolvedValueOnce({ Item: { platform_user_id: 'cognito-user-1', status: 'active' } }); + createTaskCoreMock.mockResolvedValueOnce({ + statusCode: 201, + body: JSON.stringify({ data: { task_id: 'T1' } }), + }); + // Resolver must yield an access token for the probe to be called. + resolveLinearOauthTokenMock.mockResolvedValue({ + accessToken: 'lin_oauth_token', + scope: 'read,write,issues:create,comments:create', + workspaceSlug: 'demo', + oauthSecretArn: 'arn:aws:secretsmanager:us-east-1:000:secret:bgagent-linear-oauth-demo-AbCdEf', + }); + }); + + test('probes Linear with the resolved access token and the issue id', async () => { + await handler(eventWith(issue())); + expect(probeLinearIssueContextMock).toHaveBeenCalledTimes(1); + const [token, issueId] = probeLinearIssueContextMock.mock.calls[0]; + expect(token).toBe('lin_oauth_token'); + expect(issueId).toBe('issue-1'); + }); + + test('prepends a hint listing paperclip attachment titles when present', async () => { + probeLinearIssueContextMock.mockResolvedValueOnce({ + attachmentTitles: ['design-spec.pdf', 'crash-trace.txt'], + projectName: 'Onboarding', + projectHasDocuments: false, + }); + + await handler(eventWith(issue())); + + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + const [reqBody] = createTaskCoreMock.mock.calls[0]; + expect(reqBody.task_description).toContain('Linear may have additional context'); + expect(reqBody.task_description).toContain('design-spec.pdf'); + expect(reqBody.task_description).toContain('crash-trace.txt'); + expect(reqBody.task_description).toContain('mcp__linear-server__get_attachment'); + // The original description must still be present, not replaced. + expect(reqBody.task_description).toContain('Users cannot log in.'); + }); + + test('prepends a hint about project documents when the project has wiki docs', async () => { + probeLinearIssueContextMock.mockResolvedValueOnce({ + attachmentTitles: [], + projectName: 'Onboarding', + projectHasDocuments: true, + }); + + await handler(eventWith(issue())); + + const [reqBody] = createTaskCoreMock.mock.calls[0]; + expect(reqBody.task_description).toContain('project "Onboarding"'); + expect(reqBody.task_description).toContain('wiki documents'); + expect(reqBody.task_description).toContain('mcp__linear-server__list_documents'); + }); + + test('omits the hint when probe finds nothing', async () => { + // Default mock already returns an empty probe. + await handler(eventWith(issue())); + const [reqBody] = createTaskCoreMock.mock.calls[0]; + expect(reqBody.task_description).not.toContain('Linear may have additional context'); + // Sanity: original task description still in place. + expect(reqBody.task_description).toContain('ABC-42: Fix the login bug'); + }); + }); +}); + +// ─── Direct probe behavior — covers the GraphQL query shape ───────────────── + +describe('probeLinearIssueContext', () => { + // The mock above only intercepts the version imported by the handler under + // test. To verify the actual GraphQL query and field selections we exercise + // the real module against a stubbed fetch. + const realModule = jest.requireActual('../../src/handlers/shared/linear-issue-context-probe') as { + probeLinearIssueContext: (token: string, issueId: string) => Promise<unknown>; + }; + + let originalFetch: typeof fetch; + let fetchMock: jest.Mock; + + beforeEach(() => { + originalFetch = global.fetch; + fetchMock = jest.fn(); + global.fetch = fetchMock as unknown as typeof fetch; + }); + + afterEach(() => { + global.fetch = originalFetch; + }); + + test('GraphQL query includes attachments and project.documents fields', async () => { + fetchMock.mockResolvedValueOnce({ + ok: true, + json: async () => ({ + data: { + issue: { + attachments: { nodes: [{ id: 'att1', title: 'spec.pdf' }] }, + project: { id: 'proj1', name: 'P1', documents: { nodes: [{ id: 'doc1' }] } }, + }, + }, + }), + }); + + const result = await realModule.probeLinearIssueContext('tok', 'issue-uuid-1') as { + attachmentTitles: string[]; + projectName: string | null; + projectHasDocuments: boolean; + }; + + expect(fetchMock).toHaveBeenCalledTimes(1); + const [, init] = fetchMock.mock.calls[0]; + const body = JSON.parse((init as { body: string }).body) as { query: string; variables: { id: string } }; + expect(body.variables.id).toBe('issue-uuid-1'); + expect(body.query).toContain('attachments'); + expect(body.query).toContain('project'); + expect(body.query).toContain('documents'); + expect(result).toEqual({ + attachmentTitles: ['spec.pdf'], + projectName: 'P1', + projectHasDocuments: true, + }); + }); + + test('returns empty probe on graphql errors', async () => { + fetchMock.mockResolvedValueOnce({ + ok: true, + json: async () => ({ errors: [{ message: 'boom' }] }), + }); + const result = await realModule.probeLinearIssueContext('tok', 'i') as { + attachmentTitles: string[]; + }; + expect(result.attachmentTitles).toEqual([]); + }); + + test('returns empty probe on non-2xx', async () => { + fetchMock.mockResolvedValueOnce({ ok: false, status: 401, json: async () => ({}) }); + const result = await realModule.probeLinearIssueContext('tok', 'i') as { + projectHasDocuments: boolean; + }; + expect(result.projectHasDocuments).toBe(false); + }); + + test('returns empty probe on network failure', async () => { + fetchMock.mockRejectedValueOnce(new Error('network down')); + const result = await realModule.probeLinearIssueContext('tok', 'i') as { + attachmentTitles: string[]; + }; + expect(result.attachmentTitles).toEqual([]); }); }); diff --git a/cdk/test/handlers/linear-webhook.test.ts b/cdk/test/handlers/linear-webhook.test.ts index a0d1cd89..c816f539 100644 --- a/cdk/test/handlers/linear-webhook.test.ts +++ b/cdk/test/handlers/linear-webhook.test.ts @@ -138,13 +138,13 @@ describe('linear-webhook handler', () => { expect(lambdaSend).not.toHaveBeenCalled(); }); - test('ignores non-Issue event types with 200', async () => { + test('ignores unrecognized event types with 200 (e.g. Reaction)', async () => { const body = JSON.stringify({ action: 'create', - type: 'Comment', + type: 'Reaction', webhookTimestamp: Date.now(), webhookId: 'wh-2', - data: { id: 'cmt-1' }, + data: { id: 'rx-1' }, }); const result = await handler(makeEvent(body, sign(body))); expect(result.statusCode).toBe(200); @@ -152,6 +152,36 @@ describe('linear-webhook handler', () => { expect(lambdaSend).not.toHaveBeenCalled(); }); + test('forwards a Comment:create event to the processor (#247 A6 trigger)', async () => { + const body = JSON.stringify({ + action: 'create', + type: 'Comment', + webhookTimestamp: Date.now(), + webhookId: 'wh-2c', + organizationId: 'org-1', + data: { id: 'cmt-1', body: '@bgagent fix it', issueId: 'iss-9' }, + }); + ddbSend.mockResolvedValueOnce({}); // dedup Put succeeds + lambdaSend.mockResolvedValueOnce({}); + const result = await handler(makeEvent(body, sign(body))); + expect(result.statusCode).toBe(200); + expect(ddbSend).toHaveBeenCalled(); // deduped + expect(lambdaSend).toHaveBeenCalled(); // forwarded to processor + }); + + test('ignores a non-create Comment event (edited/removed) with 200', async () => { + const body = JSON.stringify({ + action: 'update', + type: 'Comment', + webhookTimestamp: Date.now(), + webhookId: 'wh-2u', + data: { id: 'cmt-2', body: '@bgagent edited', issueId: 'iss-9' }, + }); + const result = await handler(makeEvent(body, sign(body))); + expect(result.statusCode).toBe(200); + expect(lambdaSend).not.toHaveBeenCalled(); // not forwarded + }); + test('400s when data.id is missing on an Issue event', async () => { const body = JSON.stringify({ action: 'create', diff --git a/cdk/test/handlers/orchestration-reconciler.test.ts b/cdk/test/handlers/orchestration-reconciler.test.ts new file mode 100644 index 00000000..b09059a5 --- /dev/null +++ b/cdk/test/handlers/orchestration-reconciler.test.ts @@ -0,0 +1,822 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import type { DynamoDBRecord } from 'aws-lambda'; + +const ddbSend = jest.fn(); +jest.mock('@aws-sdk/client-dynamodb', () => ({ DynamoDBClient: jest.fn(() => ({})) })); +jest.mock('@aws-sdk/lib-dynamodb', () => ({ + DynamoDBDocumentClient: { from: jest.fn(() => ({ send: ddbSend })) }, + QueryCommand: jest.fn((input: unknown) => ({ _type: 'Query', input })), + UpdateCommand: jest.fn((input: unknown) => ({ _type: 'Update', input })), + GetCommand: jest.fn((input: unknown) => ({ _type: 'Get', input })), + BatchGetCommand: jest.fn((input: unknown) => ({ _type: 'BatchGet', input })), +})); + +const createTaskCoreMock = jest.fn(); +jest.mock('../../src/handlers/shared/create-task-core', () => ({ + createTaskCore: (...args: unknown[]) => createTaskCoreMock(...args), +})); + +const postIssueCommentMock = jest.fn(); +const upsertStatusCommentMock = jest.fn(); +const swapIssueReactionMock = jest.fn(); +const swapCommentReactionMock = jest.fn(); +const transitionIssueStateMock = jest.fn(); +const replyToCommentMock = jest.fn(); +jest.mock('../../src/handlers/shared/linear-feedback', () => ({ + postIssueComment: (...args: unknown[]) => postIssueCommentMock(...args), + upsertStatusComment: (...args: unknown[]) => upsertStatusCommentMock(...args), + swapIssueReaction: (...args: unknown[]) => swapIssueReactionMock(...args), + swapCommentReaction: (...args: unknown[]) => swapCommentReactionMock(...args), + transitionIssueState: (...args: unknown[]) => transitionIssueStateMock(...args), + replyToComment: (...args: unknown[]) => replyToCommentMock(...args), + EMOJI_SUCCESS: 'white_check_mark', + EMOJI_FAILURE: 'x', +})); + +jest.mock('../../src/handlers/shared/logger', () => ({ + logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn() }, +})); + +process.env.ORCHESTRATION_TABLE_NAME = 'OrchestrationTable'; +process.env.TASK_TABLE_NAME = 'TaskTable'; +// A6 surfacing (#34/#35): the cascade posts Linear comments only when the +// workspace registry is configured. Set it so the surfacing path is exercised. +process.env.LINEAR_WORKSPACE_REGISTRY_TABLE_NAME = 'WorkspaceRegistry'; + +import { handler, parseTerminalTaskRecord } from '../../src/handlers/orchestration-reconciler'; + +/** Build a TaskTable stream MODIFY record. */ +function taskRecord(fields: { + task_id?: string; + status?: string; + build_passed?: boolean; + orchestration_id?: string; + eventName?: 'INSERT' | 'MODIFY' | 'REMOVE'; + // A6 cascade markers (channel_metadata fields on an iteration/restack task). + orchestration_sub_issue_id?: string; + restack_predecessor_sub_issue_id?: string; + orchestration_iteration?: boolean; + // #247 UX.3: the human comment that triggered an iteration. + trigger_comment_id?: string; + // #247 UX.19: the issue that trigger comment lives on (parent epic when routed). + trigger_comment_issue_id?: string; + // #247 UX.5: raw agent error_message (drives the failure-reply detail). + error_message?: string; +}): DynamoDBRecord { + const img: Record<string, unknown> = {}; + if (fields.task_id) img.task_id = { S: fields.task_id }; + if (fields.status) img.status = { S: fields.status }; + if (fields.build_passed !== undefined) img.build_passed = { BOOL: fields.build_passed }; + if (fields.error_message) img.error_message = { S: fields.error_message }; + // PRODUCTION SHAPE: createTaskCore persists orchestration_id INSIDE the + // nested channel_metadata MAP, not as a top-level attribute. The stream + // image must mirror that or the reconciler skips every orchestration + // child. (Regression: the first dev smoke had orchestration_id only in + // channel_metadata and the reconciler — reading it top-level — ignored + // all completions, so dependents never released.) + const cm: Record<string, unknown> = {}; + if (fields.orchestration_id) cm.orchestration_id = { S: fields.orchestration_id }; + if (fields.orchestration_sub_issue_id) cm.orchestration_sub_issue_id = { S: fields.orchestration_sub_issue_id }; + if (fields.restack_predecessor_sub_issue_id) { + cm.restack_predecessor_sub_issue_id = { S: fields.restack_predecessor_sub_issue_id }; + } + if (fields.orchestration_iteration) cm.orchestration_iteration = { S: 'true' }; + if (fields.trigger_comment_id) cm.trigger_comment_id = { S: fields.trigger_comment_id }; + if (fields.trigger_comment_issue_id) cm.trigger_comment_issue_id = { S: fields.trigger_comment_issue_id }; + if (Object.keys(cm).length > 0) img.channel_metadata = { M: cm }; + return { + eventName: fields.eventName ?? 'MODIFY', + dynamodb: { NewImage: img as never }, + } as DynamoDBRecord; +} + +describe('parseTerminalTaskRecord', () => { + test('extracts a terminal orchestration child event', () => { + const evt = parseTerminalTaskRecord(taskRecord({ + task_id: 'T1', status: 'COMPLETED', build_passed: true, orchestration_id: 'orch_1', + })); + expect(evt).toEqual({ taskId: 'T1', status: 'COMPLETED', buildPassed: true, orchestrationId: 'orch_1' }); + }); + + test('skips non-terminal status', () => { + expect(parseTerminalTaskRecord(taskRecord({ task_id: 'T1', status: 'RUNNING', orchestration_id: 'orch_1' }))).toBeNull(); + }); + + test('skips tasks with no orchestration_id (non-orchestration tasks)', () => { + expect(parseTerminalTaskRecord(taskRecord({ task_id: 'T1', status: 'COMPLETED' }))).toBeNull(); + }); + + test('skips REMOVE events', () => { + expect(parseTerminalTaskRecord(taskRecord({ + task_id: 'T1', status: 'COMPLETED', orchestration_id: 'orch_1', eventName: 'REMOVE', + }))).toBeNull(); + }); + + test('skips records with no NewImage', () => { + expect(parseTerminalTaskRecord({ eventName: 'MODIFY', dynamodb: {} } as DynamoDBRecord)).toBeNull(); + }); +}); + +/** Mock the GSI lookup + loadOrchestration Query for a child set. */ +function mockOrchestration(opts: { + subIssueId: string; + children: Array<{ sub_issue_id: string; depends_on?: string[]; child_status: string }>; +}): void { + // Stateful, query-type-aware mock (robust to the reconciler's read + // pattern: GSI lookup + possibly-repeated loadOrchestration + status + // Updates). Status Updates mutate the in-memory rows so a subsequent + // fresh loadOrchestration reflects them — which is exactly what the + // concurrency-safe re-read relies on. + const meta = { + sub_issue_id: '#meta', + orchestration_id: 'orch_1', + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + repo: 'o/r', + child_count: opts.children.length, + platform_user_id: 'user-1', + }; + const rows: Record<string, Record<string, unknown>> = {}; + for (const c of opts.children) { + rows[c.sub_issue_id] = { + orchestration_id: 'orch_1', + sub_issue_id: c.sub_issue_id, + depends_on: c.depends_on ?? [], + child_status: c.child_status, + repo: 'o/r', + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + }; + } + ddbSend.mockImplementation(async (cmd: { _type: string; input: Record<string, unknown> }) => { + const { _type, input } = cmd; + if (_type === 'Query' && input.IndexName === 'ChildTaskIndex') { + return { Items: [{ ...rows[opts.subIssueId], sub_issue_id: opts.subIssueId }] }; + } + if (_type === 'Query') { // loadOrchestration + return { Items: [meta, ...Object.values(rows)] }; + } + if (_type === 'Update') { + const sk = (input.Key as { sub_issue_id: string }).sub_issue_id; + const vals = input.ExpressionAttributeValues as Record<string, unknown>; + const row = rows[sk]; + if (row) { + if (vals[':s'] !== undefined) row.child_status = vals[':s']; + if (vals[':released'] !== undefined) { row.child_status = 'released'; row.child_task_id = vals[':tid']; } + } + return {}; + } + return {}; + }); +} + +describe('orchestration-reconciler handler', () => { + beforeEach(() => { + ddbSend.mockReset(); + createTaskCoreMock.mockReset(); + createTaskCoreMock.mockResolvedValue({ statusCode: 201, body: JSON.stringify({ data: { task_id: 'child-task' } }) }); + }); + + test('A succeeds → releases blocked dependent B', async () => { + mockOrchestration({ + subIssueId: 'A', + children: [ + { sub_issue_id: 'A', child_status: 'released' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'blocked' }, + ], + }); + await handler({ Records: [taskRecord({ task_id: 'TA', status: 'COMPLETED', orchestration_id: 'orch_1' })] } as never); + + // B released via createTaskCore. + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + const ctx = createTaskCoreMock.mock.calls[0][1]; + expect(ctx.idempotencyKey).toBe('orch_1_B'); + }); + + test('A fails → no release, B skipped (createTaskCore not called)', async () => { + mockOrchestration({ + subIssueId: 'A', + children: [ + { sub_issue_id: 'A', child_status: 'released' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'blocked' }, + ], + }); + + await handler({ Records: [taskRecord({ task_id: 'TA', status: 'FAILED', orchestration_id: 'orch_1' })] } as never); + + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('COMPLETED with build_passed=false → treated as failure, B not released', async () => { + mockOrchestration({ + subIssueId: 'A', + children: [ + { sub_issue_id: 'A', child_status: 'released' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'blocked' }, + ], + }); + + await handler({ + Records: [taskRecord({ task_id: 'TA', status: 'COMPLETED', build_passed: false, orchestration_id: 'orch_1' })], + } as never); + + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('non-orchestration / non-terminal records are skipped entirely', async () => { + await handler({ + Records: [ + taskRecord({ task_id: 'T1', status: 'RUNNING', orchestration_id: 'orch_1' }), + taskRecord({ task_id: 'T2', status: 'COMPLETED' }), // no orchestration_id + ], + } as never); + expect(ddbSend).not.toHaveBeenCalled(); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('unresolvable sub_issue_id (GSI miss) → skip, no throw', async () => { + ddbSend.mockResolvedValueOnce({ Items: [] }); // GSI miss + await handler({ Records: [taskRecord({ task_id: 'TA', status: 'COMPLETED', orchestration_id: 'orch_1' })] } as never); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('#57: all-terminal epic with an integration node → embeds its combined screenshot in the panel', async () => { + upsertStatusCommentMock.mockReset().mockResolvedValue('panel-1'); + transitionIssueStateMock.mockReset().mockResolvedValue(true); + swapIssueReactionMock.mockReset().mockResolvedValue(true); + const meta = { + sub_issue_id: '#meta', + orchestration_id: 'orch_1', + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + repo: 'o/r', + child_count: 2, + platform_user_id: 'u1', + status_comment_id: 'panel-1', + }; + // A (real leaf) + integration node, BOTH succeeded → all-terminal. The + // integration node's task record carries a screenshot_url. + const rows = [ + { + orchestration_id: 'orch_1', + sub_issue_id: 'A', + depends_on: [], + child_status: 'succeeded', + child_task_id: 'task-A', + repo: 'o/r', + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + linear_identifier: 'ENG-1', + }, + { + orchestration_id: 'orch_1', + sub_issue_id: 'orch_1__integration', + depends_on: ['A'], + child_status: 'succeeded', + child_task_id: 'task-int', + repo: 'o/r', + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + }, + ]; + ddbSend.mockImplementation(async (cmd: { _type: string; input: Record<string, unknown> }) => { + if (cmd._type === 'Query' && cmd.input.IndexName === 'ChildTaskIndex') { + return { Items: [{ ...rows[1] }] }; // the integration node just completed + } + if (cmd._type === 'Query') return { Items: [meta, ...rows] }; + if (cmd._type === 'BatchGet') { // resolveChildPrUrls + const keys = cmd.input.RequestItems as Record<string, { Keys: Array<{ task_id: string }> }>; + const tbl = Object.keys(keys)[0]; + return { Responses: { [tbl]: keys[tbl].Keys.map((k) => ({ task_id: k.task_id, pr_url: `https://github.com/o/r/pull/${k.task_id.length}` })) } }; + } + if (cmd._type === 'Get') { // resolveCombinedScreenshotUrl(task-int) + const tid = (cmd.input.Key as { task_id: string }).task_id; + return { + Item: tid === 'task-int' + ? { screenshot_url: 'https://cdn.example/combined.png', screenshot_preview_url: 'https://combined.vercel.app' } + : {}, + }; + } + return {}; + }); + + await handler({ + Records: [taskRecord({ + task_id: 'task-int', status: 'COMPLETED', orchestration_id: 'orch_1', + })], + } as never); + + expect(upsertStatusCommentMock).toHaveBeenCalled(); + const body = upsertStatusCommentMock.mock.calls.at(-1)![2] as string; + expect(body).toContain('✅'); // complete + // #247 UX.17: the panel embeds the image AND deep-links to the live combined deploy. + expect(body).toContain('[![combined preview](https://cdn.example/combined.png)](https://combined.vercel.app)'); + expect(body).toContain('[Open the combined preview](https://combined.vercel.app)'); + }); +}); + +/** Detect a cascade marker in parseTerminalTaskRecord. */ +describe('parseTerminalTaskRecord — A6 cascade marker', () => { + test('a restack task (carries restack_predecessor) → cascadeSubIssueId set', () => { + const evt = parseTerminalTaskRecord(taskRecord({ + task_id: 'TR', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'B', + restack_predecessor_sub_issue_id: 'A', + })); + expect(evt?.cascadeSubIssueId).toBe('B'); + }); + + test('an iteration task (orchestration_iteration=true) → cascadeSubIssueId set', () => { + const evt = parseTerminalTaskRecord(taskRecord({ + task_id: 'TI', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'A', + orchestration_iteration: true, + })); + expect(evt?.cascadeSubIssueId).toBe('A'); + }); + + test('a normal child task (no markers) → cascadeSubIssueId undefined', () => { + const evt = parseTerminalTaskRecord(taskRecord({ + task_id: 'T1', status: 'COMPLETED', orchestration_id: 'orch_1', + })); + expect(evt?.cascadeSubIssueId).toBeUndefined(); + }); +}); + +/** Mock for the cascade path: loadOrchestration + per-dependent GetCommand pr_url. */ +function mockCascade(children: Array<{ + sub_issue_id: string; + depends_on?: string[]; + child_status: string; + child_task_id?: string; + child_branch_name?: string; + linear_identifier?: string; +}>): void { + const meta = { + sub_issue_id: '#meta', + orchestration_id: 'orch_1', + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + repo: 'o/r', + child_count: children.length, + platform_user_id: 'user-1', + // A panel comment exists → the cascade EDITS it (UX.2), rather than posting fresh. + status_comment_id: 'panel-cmt-1', + }; + const rows = children.map((c) => ({ + orchestration_id: 'orch_1', + sub_issue_id: c.sub_issue_id, + depends_on: c.depends_on ?? [], + child_status: c.child_status, + repo: 'o/r', + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + ...(c.child_task_id && { child_task_id: c.child_task_id }), + ...(c.child_branch_name && { child_branch_name: c.child_branch_name }), + ...(c.linear_identifier && { linear_identifier: c.linear_identifier }), + })); + ddbSend.mockImplementation(async (cmd: { _type: string; input: Record<string, unknown> }) => { + if (cmd._type === 'Query') return { Items: [meta, ...rows] }; // loadOrchestration + if (cmd._type === 'Get') { // resolvePrNumber for a dependent task + const tid = (cmd.input.Key as { task_id: string }).task_id; + return { Item: { task_id: tid, pr_url: `https://github.com/o/r/pull/${tid.length}` } }; + } + if (cmd._type === 'BatchGet') { // resolveChildPrUrls for the panel + const keys = (cmd.input.RequestItems as Record<string, { Keys: Array<{ task_id: string }> }>); + const tbl = Object.keys(keys)[0]; + return { Responses: { [tbl]: keys[tbl].Keys.map((k) => ({ task_id: k.task_id, pr_url: `https://github.com/o/r/pull/${k.task_id.length}` })) } }; + } + return {}; + }); +} + +describe('orchestration-reconciler handler — A6 cascade', () => { + beforeEach(() => { + ddbSend.mockReset(); + createTaskCoreMock.mockReset(); + createTaskCoreMock.mockResolvedValue({ statusCode: 201, body: '{}' }); + postIssueCommentMock.mockReset().mockResolvedValue(true); + }); + + test('restack on B completes → re-stacks B\'s direct dependent C (one hop)', async () => { + // chain A→B→C, all started; the just-completed task re-stacked B. + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-B', child_branch_name: 'branch-B' }, + { sub_issue_id: 'C', depends_on: ['B'], child_status: 'succeeded', child_task_id: 'task-C', child_branch_name: 'branch-C' }, + ]); + await handler({ + Records: [taskRecord({ + task_id: 'restack-task-1', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'B', + restack_predecessor_sub_issue_id: 'A', + })], + } as never); + + // Exactly one restack spawned — for C (B's direct dependent), NOT A. + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + const [body, ctx] = createTaskCoreMock.mock.calls[0]; + expect(body.workflow_ref).toBe('coding/restack-v1'); + expect(ctx.channelMetadata.orchestration_sub_issue_id).toBe('C'); + expect(ctx.channelMetadata.restack_predecessor_sub_issue_id).toBe('B'); + expect(ctx.channelMetadata.orchestration_merge_branches).toBe(JSON.stringify(['branch-B'])); + // Idempotency keyed on the SOURCE task id (converges, no loop). + expect(ctx.idempotencyKey).toContain('restack-task-1'); + }); + + test('iteration on A completes → re-stacks A\'s direct dependent B', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-B', child_branch_name: 'branch-B' }, + ]); + await handler({ + Records: [taskRecord({ + task_id: 'iter-task-1', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'A', + orchestration_iteration: true, + })], + } as never); + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + expect(createTaskCoreMock.mock.calls[0][1].channelMetadata.orchestration_sub_issue_id).toBe('B'); + }); + + test('UX.15: a cascade that RE-OPENS the epic clears rollup_posted_at (so parent state can re-settle)', async () => { + // A comment on an already-completed epic re-opens it. The first + // completion's rollup_posted_at stamp must be cleared, or claimRollup stays + // failed forever and the parent reaction/state never re-mirror (👀→✅). + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-B', child_branch_name: 'branch-B', linear_identifier: 'ENG-2' }, + ]); + await handler({ + Records: [taskRecord({ + task_id: 'iter-task-1', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'A', + orchestration_iteration: true, + })], + } as never); + // An Update issued a `REMOVE rollup_posted_at` on the meta row. + const clears = ddbSend.mock.calls + .map((c) => c[0]) + .filter((cmd) => cmd?._type === 'Update' + && typeof cmd.input?.UpdateExpression === 'string' + && cmd.input.UpdateExpression.includes('REMOVE rollup_posted_at')); + expect(clears.length).toBeGreaterThan(0); + }); + + test('FAILED iteration → no cascade', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-B', child_branch_name: 'branch-B' }, + ]); + await handler({ + Records: [taskRecord({ + task_id: 'iter-fail', + status: 'FAILED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'A', + orchestration_iteration: true, + })], + } as never); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('cascade source with no started dependents → no restack', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'blocked' }, // not started + ]); + await handler({ + Records: [taskRecord({ + task_id: 'iter-1', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'A', + orchestration_iteration: true, + })], + } as never); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('UX.15 regression: a re-stack of a NO-DEPENDENTS node still refreshes the panel + settles (not stuck)', async () => { + // The stress-caught hang: a cascade source with no dependents returned + // early without refreshing → the node's '🔄 updating' row never cleared and + // the epic never re-settled to ✅. Here every child is already terminal, so + // the completion settle must fire: panel edited + parent state mirrored. + upsertStatusCommentMock.mockReset().mockResolvedValue('panel-cmt-1'); + transitionIssueStateMock.mockReset().mockResolvedValue(true); + swapIssueReactionMock.mockReset().mockResolvedValue(true); + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + // B is a leaf (nothing depends on it) AND has no dependents → planDirectRestack=0. + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-B', child_branch_name: 'branch-B', linear_identifier: 'ENG-2' }, + ]); + // A re-stack of B (the no-dependents leaf) completes. + await handler({ + Records: [taskRecord({ + task_id: 'restack-B', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'B', + restack_predecessor_sub_issue_id: 'A', + })], + } as never); + + // No further restack (B has no dependents). + expect(createTaskCoreMock).not.toHaveBeenCalled(); + // But the panel WAS refreshed (settle) — and since all children are + // terminal, it shows complete + mirrors parent state. + expect(upsertStatusCommentMock).toHaveBeenCalled(); + const body = upsertStatusCommentMock.mock.calls.at(-1)![2] as string; + expect(body).toMatch(/complete/i); + expect(body).not.toMatch(/updating/i); // the stale updating row is gone + expect(transitionIssueStateMock).toHaveBeenCalled(); // parent settled + }); + + test('a cascade source does NOT run normal child gating (no GSI sub-issue lookup)', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-B', child_branch_name: 'branch-B' }, + ]); + await handler({ + Records: [taskRecord({ + task_id: 'iter-1', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'A', + orchestration_iteration: true, + })], + } as never); + // Never queried ChildTaskIndex (that's the normal-gating path). + const gsiCalls = ddbSend.mock.calls.filter( + (c) => c[0]?._type === 'Query' && c[0]?.input?.IndexName === 'ChildTaskIndex'); + expect(gsiCalls).toHaveLength(0); + }); +}); + +describe('orchestration-reconciler handler — A6 cascade surfacing via the panel (#247 UX.2)', () => { + beforeEach(() => { + ddbSend.mockReset(); + createTaskCoreMock.mockReset().mockResolvedValue({ statusCode: 201, body: '{}' }); + postIssueCommentMock.mockReset().mockResolvedValue(true); + upsertStatusCommentMock.mockReset().mockResolvedValue('panel-cmt-1'); + swapIssueReactionMock.mockReset().mockResolvedValue(true); + transitionIssueStateMock.mockReset().mockResolvedValue(true); + }); + + const iterEvent = (sub: string) => ({ + Records: [taskRecord({ + task_id: 'iter-task-1', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: sub, + orchestration_iteration: true, + })], + }) as never; + + test('refreshes the panel with the impacted row as "updating per comment" — NO standalone parent/sub-issue comments', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-B', child_branch_name: 'branch-B', linear_identifier: 'ENG-2' }, + ]); + await handler(iterEvent('A')); + // The panel is edited (upsertStatusComment), NOT a stream of new comments. + expect(upsertStatusCommentMock).toHaveBeenCalled(); + const body = upsertStatusCommentMock.mock.calls.at(-1)![2] as string; + // Impacted dependent B shows '🔄 … updating per ENG-1's comment'. + expect(body).toMatch(/ENG-2.*updating per ENG-1's comment/); + // The retired standalone '🔄 Re-stacked' / 'revised' parent comments are GONE. + expect(postIssueCommentMock).not.toHaveBeenCalled(); + }); + + test('idempotent replay (200, NOT 201) does NOT re-mark the panel as updating', async () => { + createTaskCoreMock.mockResolvedValue({ statusCode: 200, body: '{}' }); + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-B', child_branch_name: 'branch-B', linear_identifier: 'ENG-2' }, + ]); + await handler(iterEvent('A')); + // No NEW restack task created → no panel "updating" refresh from the cascade. + expect(upsertStatusCommentMock).not.toHaveBeenCalled(); + }); + + test('integration-node dependent renders friendly in the panel (never raw id)', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + { sub_issue_id: 'orch_1__integration', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-int', child_branch_name: 'branch-int' }, + ]); + await handler(iterEvent('A')); + expect(upsertStatusCommentMock).toHaveBeenCalled(); + const body = upsertStatusCommentMock.mock.calls.at(-1)![2] as string; + expect(body).toContain('Integration — combined result'); + expect(body).not.toContain('orch_1__integration'); + }); + + test('a restack from a PREDECESSOR change (not a comment) says "updating to include … change"', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-B', child_branch_name: 'branch-B', linear_identifier: 'ENG-2' }, + ]); + // restack source (carries restack_predecessor, NOT orchestration_iteration). + await handler({ + Records: [taskRecord({ + task_id: 'restack-1', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'A', + restack_predecessor_sub_issue_id: 'Z', + })], + } as never); + const body = upsertStatusCommentMock.mock.calls.at(-1)![2] as string; + expect(body).toMatch(/ENG-2.*updating to include ENG-1's change/); + }); +}); + +describe('orchestration-reconciler handler — A6 iteration ack reply (#247 UX.3)', () => { + beforeEach(() => { + ddbSend.mockReset(); + createTaskCoreMock.mockReset().mockResolvedValue({ statusCode: 201, body: '{}' }); + postIssueCommentMock.mockReset().mockResolvedValue(true); + upsertStatusCommentMock.mockReset().mockResolvedValue('panel-cmt-1'); + swapIssueReactionMock.mockReset().mockResolvedValue(true); + swapCommentReactionMock.mockReset().mockResolvedValue(true); + transitionIssueStateMock.mockReset().mockResolvedValue(true); + replyToCommentMock.mockReset().mockResolvedValue('reply-1'); + }); + + /** An iteration event carrying the human comment id that triggered it. */ + const iterEventWithComment = (status: string, commentId = 'human-cmt-1', buildPassed?: boolean, errorMessage?: string) => ({ + Records: [taskRecord({ + task_id: 'iter-task-1', + status, + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'A', + orchestration_iteration: true, + trigger_comment_id: commentId, + ...(buildPassed !== undefined && { build_passed: buildPassed }), + ...(errorMessage !== undefined && { error_message: errorMessage }), + })], + }) as never; + + test('successful iteration → ✅ threaded reply to the triggering comment, linking the PR', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + ]); + await handler(iterEventWithComment('COMPLETED')); + + expect(replyToCommentMock).toHaveBeenCalledTimes(1); + // Signature: replyToComment(ctx, issueId, parentCommentId, body). + const [, issueId, parentCommentId, body] = replyToCommentMock.mock.calls[0]; + expect(issueId).toBe('A'); // the sub-issue the comment lives on + expect(parentCommentId).toBe('human-cmt-1'); + expect(body).toMatch(/^✅ Updated — PR #\d+\./); + // #247 UX.21: the trigger comment's 👀 swaps to ✅, and the sub-issue + // advances to In Review (platform-owned settle, not agent-flapped). + expect(swapCommentReactionMock).toHaveBeenCalledWith(expect.anything(), 'human-cmt-1', 'white_check_mark'); + expect(transitionIssueStateMock).toHaveBeenCalledWith(expect.anything(), 'A', 'started', ['In Review']); + }); + + test('#247 UX.19: a PARENT-routed iteration replies on the PARENT issue, not the sub-issue', async () => { + // The human commented on the parent epic (UX.18 routed it to sub-issue A). + // The ✅/❌ reply must use the PARENT issue id as commentCreate's issueId — + // else Linear rejects the reply (parentId belongs to a different issue) and + // the human sees 👀 then silence (live-caught on ABCA-304). + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + ]); + await handler({ + Records: [taskRecord({ + task_id: 'iter-task-1', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'A', + orchestration_iteration: true, + trigger_comment_id: 'parent-cmt-1', + trigger_comment_issue_id: 'PARENT', // comment lives on the parent epic + })], + } as never); + + expect(replyToCommentMock).toHaveBeenCalledTimes(1); + const [, issueId, parentCommentId] = replyToCommentMock.mock.calls[0]; + expect(issueId).toBe('PARENT'); // NOT 'A' — the reply targets the parent comment's issue + expect(parentCommentId).toBe('parent-cmt-1'); + }); + + test('FAILED iteration (agent crash) → ❌ reply with classified reason + CloudWatch task id (UX.5)', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + ]); + await handler(iterEventWithComment('FAILED', 'human-cmt-1', undefined, 'agent_status="error_max_turns"')); + + expect(replyToCommentMock).toHaveBeenCalledTimes(1); + const [, , , body] = replyToCommentMock.mock.calls[0]; + expect(body).toMatch(/^❌/); + expect(body).toMatch(/Exceeded max turns/i); // classified + expect(body).toMatch(/CloudWatch for task `iter-task-1`/); + expect(body).toMatch(/reply with guidance/i); + // A failed iteration still does not cascade onto dependents. + expect(createTaskCoreMock).not.toHaveBeenCalled(); + // #247 UX.21: the trigger comment's 👀 swaps to ❌, but the sub-issue state + // is LEFT in place on failure (the ❌ + reply convey it; never demote). + expect(swapCommentReactionMock).toHaveBeenCalledWith(expect.anything(), 'human-cmt-1', 'x'); + expect(transitionIssueStateMock).not.toHaveBeenCalled(); + }); + + test('COMPLETED-but-build-failed iteration → ❌ build/test reply pointing at PR checks (UX.5)', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + ]); + // COMPLETED, build_passed=false, NO error_message → build/test failure shape. + await handler(iterEventWithComment('COMPLETED', 'human-cmt-1', false)); + + expect(replyToCommentMock).toHaveBeenCalledTimes(1); + const [, , , body] = replyToCommentMock.mock.calls[0]; + expect(body).toMatch(/build\/tests didn't pass/i); + expect(body).toMatch(/PR's checks/i); + expect(body).not.toMatch(/CloudWatch/i); // build-fail copy omits the log pointer + // build_passed=false ⇒ not a success ⇒ no cascade onto dependents. + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('build_passed=false → ❌ reply (treated as not-successful)', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + ]); + await handler(iterEventWithComment('COMPLETED', 'human-cmt-1', false)); + const [, , , body] = replyToCommentMock.mock.calls[0]; + expect(body).toMatch(/^❌/); + }); + + test('idempotent: redelivery loses the claim → no duplicate reply', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + ]); + // First Update (the ack claim) wins; a second Update with the same key is + // rejected by the conditional → simulate the redelivery losing the claim. + let ackClaims = 0; + const base = ddbSend.getMockImplementation()!; + ddbSend.mockImplementation(async (cmd: { _type: string; input: Record<string, unknown> }) => { + if (cmd._type === 'Update' && (cmd.input.UpdateExpression as string)?.includes('ack_replied_at')) { + ackClaims += 1; + if (ackClaims > 1) { + const err = new Error('conditional'); + (err as { name?: string }).name = 'ConditionalCheckFailedException'; + throw err; + } + return {}; + } + return base(cmd); + }); + + await handler(iterEventWithComment('COMPLETED')); + await handler(iterEventWithComment('COMPLETED')); // redelivery + + // Replied exactly once across both deliveries. + expect(replyToCommentMock).toHaveBeenCalledTimes(1); + }); + + test('a restack (no trigger_comment_id) → no ack reply', async () => { + mockCascade([ + { sub_issue_id: 'A', child_status: 'succeeded', child_task_id: 'task-A', child_branch_name: 'branch-A', linear_identifier: 'ENG-1' }, + { sub_issue_id: 'B', depends_on: ['A'], child_status: 'succeeded', child_task_id: 'task-B', child_branch_name: 'branch-B', linear_identifier: 'ENG-2' }, + ]); + await handler({ + Records: [taskRecord({ + task_id: 'restack-1', + status: 'COMPLETED', + orchestration_id: 'orch_1', + orchestration_sub_issue_id: 'A', + restack_predecessor_sub_issue_id: 'Z', + })], + } as never); + expect(replyToCommentMock).not.toHaveBeenCalled(); + }); +}); diff --git a/cdk/test/handlers/reconcile-stranded-orchestrations.test.ts b/cdk/test/handlers/reconcile-stranded-orchestrations.test.ts new file mode 100644 index 00000000..d57440a7 --- /dev/null +++ b/cdk/test/handlers/reconcile-stranded-orchestrations.test.ts @@ -0,0 +1,214 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/** + * #303 — stranded-orchestration backstop. Uses a stateful in-memory + * DynamoDB fake so the sweep's read-advance-release cycle is exercised + * for real (status writes are visible to the subsequent reload). + */ + +interface Row { [k: string]: unknown } +const orch = new Map<string, Row>(); // OrchestrationTable, key = `${oid} ${sk}` +const tasksTbl = new Map<string, Row>(); // TaskTable, key = task_id + +const fakeSend = jest.fn(async (cmd: { _type: string; input: Record<string, unknown> }) => { + const { _type, input } = cmd; + const tn = input.TableName as string; + if (_type === 'Scan') { + // meta-row scan on OrchestrationTable + const items = [...orch.values()].filter((r) => r.sub_issue_id === '#meta'); + return { Items: items }; + } + if (_type === 'Get') { + const k = input.Key as Row; + return { Item: tn.includes('Task') ? tasksTbl.get(String(k.task_id)) : orch.get(`${k.orchestration_id} ${k.sub_issue_id}`) }; + } + if (_type === 'Query') { + const oid = (input.ExpressionAttributeValues as Row)[':oid']; + return { Items: [...orch.values()].filter((r) => r.orchestration_id === oid) }; + } + if (_type === 'Update') { + const k = input.Key as Row; + const key = `${k.orchestration_id} ${k.sub_issue_id}`; + const vals = input.ExpressionAttributeValues as Row; + const row = orch.get(key); + if (row && input.ConditionExpression?.toString().includes('child_status <> :s') && row.child_status === vals[':s']) { + const e = new Error('c'); e.name = 'ConditionalCheckFailedException'; throw e; + } + if (row) { + if (vals[':s'] !== undefined) row.child_status = vals[':s']; + if (vals[':released'] !== undefined) { row.child_status = 'released'; row.child_task_id = vals[':tid']; } + } + return {}; + } + throw new Error(`fake: unhandled ${_type}`); +}); + +jest.mock('@aws-sdk/client-dynamodb', () => ({ DynamoDBClient: jest.fn(() => ({})) })); +jest.mock('@aws-sdk/lib-dynamodb', () => ({ + DynamoDBDocumentClient: { from: jest.fn(() => ({ send: fakeSend })) }, + ScanCommand: jest.fn((input: unknown) => ({ _type: 'Scan', input })), + GetCommand: jest.fn((input: unknown) => ({ _type: 'Get', input })), + QueryCommand: jest.fn((input: unknown) => ({ _type: 'Query', input })), + UpdateCommand: jest.fn((input: unknown) => ({ _type: 'Update', input })), +})); + +const createTaskCoreMock = jest.fn(); +jest.mock('../../src/handlers/shared/create-task-core', () => ({ + createTaskCore: (...args: unknown[]) => createTaskCoreMock(...args), +})); +jest.mock('../../src/handlers/shared/logger', () => ({ + logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn() }, +})); + +process.env.ORCHESTRATION_TABLE_NAME = 'OrchestrationTable'; +process.env.TASK_TABLE_NAME = 'TaskTable'; + +import { handler } from '../../src/handlers/reconcile-stranded-orchestrations'; + +function seed(oid: string, children: Array<{ sk: string; deps?: string[]; status: string; taskId?: string }>): void { + orch.set(`${oid} #meta`, { + orchestration_id: oid, + sub_issue_id: '#meta', + parent_linear_issue_id: 'P', + linear_workspace_id: 'WS', + repo: 'o/r', + child_count: children.length, + platform_user_id: 'user-1', + }); + for (const c of children) { + orch.set(`${oid} ${c.sk}`, { + orchestration_id: oid, + sub_issue_id: c.sk, + depends_on: c.deps ?? [], + child_status: c.status, + repo: 'o/r', + parent_linear_issue_id: 'P', + linear_workspace_id: 'WS', + ...(c.taskId && { child_task_id: c.taskId }), + }); + } +} +const statusOf = (oid: string, sk: string) => orch.get(`${oid} ${sk}`)?.child_status; + +beforeEach(() => { + orch.clear(); tasksTbl.clear(); fakeSend.mockClear(); + createTaskCoreMock.mockReset(); + createTaskCoreMock.mockResolvedValue({ statusCode: 201, body: JSON.stringify({ data: { task_id: 'new-task' } }) }); +}); + +describe('#303 stranded-orchestration backstop', () => { + test('lost RELEASE event: A already succeeded, B blocked → sweep releases B', async () => { + // The live reconciler missed releasing B even though A is succeeded. + seed('o1', [ + { sk: 'A', status: 'succeeded' }, + { sk: 'B', deps: ['A'], status: 'blocked' }, + ]); + await handler(); + expect(createTaskCoreMock).toHaveBeenCalledTimes(1); + expect(statusOf('o1', 'B')).toBe('released'); + }); + + test('lost TERMINAL event: A released + its task COMPLETED but row stuck → sweep advances A and releases B', async () => { + seed('o2', [ + { sk: 'A', status: 'released', taskId: 'task-A' }, + { sk: 'B', deps: ['A'], status: 'blocked' }, + ]); + tasksTbl.set('task-A', { task_id: 'task-A', status: 'COMPLETED', build_passed: true }); + await handler(); + expect(statusOf('o2', 'A')).toBe('succeeded'); + expect(statusOf('o2', 'B')).toBe('released'); + }); + + test('lost TERMINAL event with build_passed=false: A→failed, B→skipped, no release', async () => { + seed('o3', [ + { sk: 'A', status: 'released', taskId: 'task-A' }, + { sk: 'B', deps: ['A'], status: 'blocked' }, + ]); + tasksTbl.set('task-A', { task_id: 'task-A', status: 'COMPLETED', build_passed: false }); + await handler(); + expect(statusOf('o3', 'A')).toBe('failed'); + expect(statusOf('o3', 'B')).toBe('skipped'); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('transitive skip: A failed → B and C (chain) both skipped', async () => { + seed('o4', [ + { sk: 'A', status: 'failed' }, + { sk: 'B', deps: ['A'], status: 'blocked' }, + { sk: 'C', deps: ['B'], status: 'blocked' }, + ]); + await handler(); + expect(statusOf('o4', 'B')).toBe('skipped'); + expect(statusOf('o4', 'C')).toBe('skipped'); + }); + + test('still-running child is left alone (task not terminal)', async () => { + seed('o5', [ + { sk: 'A', status: 'released', taskId: 'task-A' }, + { sk: 'B', deps: ['A'], status: 'blocked' }, + ]); + tasksTbl.set('task-A', { task_id: 'task-A', status: 'RUNNING' }); + await handler(); + expect(statusOf('o5', 'A')).toBe('released'); // unchanged + expect(statusOf('o5', 'B')).toBe('blocked'); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('fully-terminal orchestration is skipped (no work, no release)', async () => { + seed('o6', [ + { sk: 'A', status: 'succeeded' }, + { sk: 'B', deps: ['A'], status: 'succeeded' }, + ]); + await handler(); + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); + + test('diamond: D releases only once BOTH B and C are succeeded', async () => { + seed('o7', [ + { sk: 'B', status: 'succeeded' }, + { sk: 'C', status: 'succeeded' }, + { sk: 'D', deps: ['B', 'C'], status: 'blocked' }, + ]); + await handler(); + expect(statusOf('o7', 'D')).toBe('released'); + }); + + test('diamond not-ready: one predecessor still running → D stays blocked', async () => { + seed('o8', [ + { sk: 'B', status: 'succeeded' }, + { sk: 'C', status: 'released', taskId: 'task-C' }, + { sk: 'D', deps: ['B', 'C'], status: 'blocked' }, + ]); + tasksTbl.set('task-C', { task_id: 'task-C', status: 'RUNNING' }); + await handler(); + expect(statusOf('o8', 'D')).toBe('blocked'); + }); + + test('idempotent: a second sweep over a healthy orchestration releases nothing new', async () => { + seed('o9', [ + { sk: 'A', status: 'succeeded' }, + { sk: 'B', deps: ['A'], status: 'blocked' }, + ]); + await handler(); + createTaskCoreMock.mockClear(); + await handler(); // B is now 'released' → no further release + expect(createTaskCoreMock).not.toHaveBeenCalled(); + }); +}); diff --git a/cdk/test/handlers/shared/create-task-core.test.ts b/cdk/test/handlers/shared/create-task-core.test.ts index 8edf6220..26d13864 100644 --- a/cdk/test/handlers/shared/create-task-core.test.ts +++ b/cdk/test/handlers/shared/create-task-core.test.ts @@ -523,7 +523,7 @@ describe('createTaskCore', () => { expect(result.statusCode).toBe(201); }); - test('resolves the default workflow when workflow_ref is omitted', async () => { + test('resolves the coding workflow when workflow_ref is omitted AND a repo is present', async () => { const result = await createTaskCore( { repo: 'org/repo', task_description: 'Fix the bug' }, makeContext(), @@ -531,8 +531,11 @@ describe('createTaskCore', () => { ); expect(result.statusCode).toBe(201); const body = JSON.parse(result.body); - // No workflow_ref ⇒ the resolution ladder falls to the platform default. - expect(body.data.resolved_workflow).toEqual({ id: 'default/agent-v1', version: '1.0.0' }); + // No workflow_ref BUT a repo is present ⇒ the repo-aware fallback resolves + // to the disciplined coding workflow, not the repo-less default/agent-v1. + // (Pre-#296 behaviour; #296 left this rung unwired and every repo task + // fell through to default/agent-v1, breaking pr_url/screenshot/stacking.) + expect(body.data.resolved_workflow).toEqual({ id: 'coding/new-task-v1', version: '1.0.0' }); }); test('creates a pr-iteration workflow task with pr_number', async () => { diff --git a/cdk/test/handlers/shared/failure-reply.test.ts b/cdk/test/handlers/shared/failure-reply.test.ts new file mode 100644 index 00000000..c7d456a5 --- /dev/null +++ b/cdk/test/handlers/shared/failure-reply.test.ts @@ -0,0 +1,115 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { TaskStatus } from '../../../src/constructs/task-status'; +import { renderFailureReply } from '../../../src/handlers/shared/failure-reply'; + +describe('renderFailureReply (#247 UX.5 — failure is a conversation)', () => { + describe('build/test failure — the REAL live-verified gating shape', () => { + // Live-verified 2026-06-16: a build/test regression persists as + // status=FAILED, build_passed=null, error_message="Task did not succeed + // (agent_status='success', build_ok=False)". The agent finished fine; only + // the build gate failed. (The previous COMPLETED+build_passed===false + // assumption NEVER occurs live — that bug shipped to dev and was caught by + // forcing a regression in UX.6.) + const body = renderFailureReply({ + status: TaskStatus.FAILED, + buildPassed: null, + errorMessage: "Task did not succeed (agent_status='success', build_ok=False)", + taskId: 't1', + }); + + test('points at the PR checks, not a raw dump', () => { + expect(body).toMatch(/^❌/); + expect(body).toMatch(/build\/tests didn't pass/i); + expect(body).toMatch(/PR's checks/i); + }); + + test('invites a reply (the retry seam)', () => { + expect(body).toMatch(/reply with guidance/i); + }); + + test('does NOT surface a CloudWatch task pointer (that is for agent failures)', () => { + expect(body).not.toMatch(/CloudWatch/i); + }); + + test('also matches the end_turn variant of the gating message', () => { + const b = renderFailureReply({ + status: TaskStatus.FAILED, + errorMessage: "Task did not succeed (agent_status='end_turn', build_ok=False)", + taskId: 't1b', + }); + expect(b).toMatch(/build\/tests didn't pass/i); + expect(b).not.toMatch(/CloudWatch/i); + }); + + test('defensive: explicit build_passed=false with no error_message still reads as build failure', () => { + const b = renderFailureReply({ status: TaskStatus.FAILED, buildPassed: false, taskId: 't1c' }); + expect(b).toMatch(/build\/tests didn't pass/i); + }); + }); + + describe('agent-itself failure (crash / cap / timeout before a clean terminal)', () => { + test('max-turns crash → classified title + CloudWatch task id + retry invite', () => { + const body = renderFailureReply({ + status: TaskStatus.FAILED, + errorMessage: 'Task did not succeed: agent_status="error_max_turns"', + taskId: 'task-xyz', + }); + expect(body).toMatch(/^❌/); + expect(body).toMatch(/Exceeded max turns/i); // classified title + expect(body).toMatch(/CloudWatch for task `task-xyz`/); + expect(body).toMatch(/reply with guidance/i); + }); + + test('truncates a long raw error to an excerpt with an ellipsis', () => { + const longErr = 'boom '.repeat(200); // 1000 chars + const body = renderFailureReply({ status: TaskStatus.FAILED, errorMessage: longErr, taskId: 't2' }); + expect(body).toContain('…'); + // The reply stays compact — nowhere near the 1000-char raw error. + expect(body.length).toBeLessThan(400); + }); + + test('unclassifiable error → generic fallback title, still points at CloudWatch', () => { + const body = renderFailureReply({ status: TaskStatus.FAILED, errorMessage: 'weird thing', taskId: 't3' }); + // UNKNOWN_CLASSIFICATION title is "Unexpected error". + expect(body).toMatch(/Unexpected error/i); + expect(body).toMatch(/CloudWatch for task `t3`/); + }); + + test('no error_message at all → still a coherent agent-failure reply', () => { + const body = renderFailureReply({ status: TaskStatus.FAILED, taskId: 't4' }); + expect(body).toMatch(/^❌/); + expect(body).toMatch(/CloudWatch for task `t4`/); + }); + + test('a genuine agent crash (agent_status=error_*) reads as agent failure, NOT build', () => { + // An agent crash mid-execution — distinct from the build-gate-failed + // shape (which carries agent_status='success'). Must get the CloudWatch + // pointer, not the softer "PR's checks" build copy. + const body = renderFailureReply({ + status: TaskStatus.FAILED, + errorMessage: 'Task did not succeed (agent_status=\'error_during_execution\', build_ok=False)', + taskId: 't5', + }); + expect(body).toMatch(/CloudWatch for task `t5`/); + expect(body).not.toMatch(/PR's checks/i); + }); + }); +}); diff --git a/cdk/test/handlers/shared/linear-feedback.test.ts b/cdk/test/handlers/shared/linear-feedback.test.ts index 3a19f4d9..44bbe940 100644 --- a/cdk/test/handlers/shared/linear-feedback.test.ts +++ b/cdk/test/handlers/shared/linear-feedback.test.ts @@ -30,7 +30,13 @@ import { addIssueReaction, type LinearFeedbackContext, postIssueComment, + reactToComment, + replyToComment, reportIssueFailure, + swapCommentReaction, + swapIssueReaction, + transitionIssueState, + upsertStatusComment, } from '../../../src/handlers/shared/linear-feedback'; const CTX: LinearFeedbackContext = { @@ -158,6 +164,91 @@ describe('linear-feedback', () => { }); }); + describe('reactToComment (#247 UX.3 — instant "on it" ack on a comment)', () => { + test('reacts on the COMMENT (commentId), defaulting to 👀 (eyes)', async () => { + fetchMock.mockResolvedValue(jsonResponse({ data: { reactionCreate: { success: true } } })); + + const ok = await reactToComment(CTX, 'comment-77'); + + expect(ok).toBe(true); + const init = fetchMock.mock.calls[0][1]; + const body = JSON.parse(init.body as string) as { query: string; variables: { commentId: string; emoji: string } }; + expect(body.query).toContain('reactionCreate'); + // The variable is commentId — NOT issueId (reacts on the comment, not the issue). + expect(body.variables.commentId).toBe('comment-77'); + expect(body.variables.emoji).toBe('eyes'); + }); + + test('honours an explicit emoji argument', async () => { + fetchMock.mockResolvedValue(jsonResponse({ data: { reactionCreate: { success: true } } })); + await reactToComment(CTX, 'comment-77', 'white_check_mark'); + const init = fetchMock.mock.calls[0][1]; + const body = JSON.parse(init.body as string) as { variables: { emoji: string } }; + expect(body.variables.emoji).toBe('white_check_mark'); + }); + + test('returns false when the token cannot be resolved (no fetch)', async () => { + resolveLinearOauthTokenMock.mockResolvedValueOnce(null); + const ok = await reactToComment(CTX, 'comment-77'); + expect(ok).toBe(false); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + test('returns false on network failure (swallowed)', async () => { + fetchMock.mockRejectedValueOnce(new Error('ECONNRESET')); + const ok = await reactToComment(CTX, 'comment-77'); + expect(ok).toBe(false); + }); + }); + + describe('replyToComment (#247 UX.3 — threaded reply that notifies)', () => { + test('POSTs commentCreate with BOTH issueId and parentId, returns the new reply id', async () => { + fetchMock.mockResolvedValue(jsonResponse({ data: { commentCreate: { success: true, comment: { id: 'reply-99' } } } })); + + const replyId = await replyToComment(CTX, ISSUE_ID, 'comment-77', '✅ Updated — PR #178'); + + expect(replyId).toBe('reply-99'); + const init = fetchMock.mock.calls[0][1]; + const body = JSON.parse(init.body as string) as { query: string; variables: { issueId: string; parentId: string; body: string } }; + expect(body.query).toContain('commentCreate'); + // CONTRACT (live-verified 2026-06-16): Linear's commentCreate REQUIRES + // issueId even for a threaded reply — parentId alone fails argument + // validation. Pin BOTH so the missing-issueId regression can't return. + expect(body.variables.issueId).toBe(ISSUE_ID); + expect(body.variables.parentId).toBe('comment-77'); + expect(body.variables.body).toBe('✅ Updated — PR #178'); + }); + + test('the mutation declares issueId as a required argument (regression guard)', async () => { + fetchMock.mockResolvedValue(jsonResponse({ data: { commentCreate: { success: true, comment: { id: 'r' } } } })); + await replyToComment(CTX, ISSUE_ID, 'comment-77', 'body'); + const init = fetchMock.mock.calls[0][1]; + const query = (JSON.parse(init.body as string) as { query: string }).query; + // The GraphQL op must pass issueId INTO commentCreate's input — not just + // accept it as a variable. Catches a half-fix that drops it from input. + expect(query).toMatch(/commentCreate\(\s*input:\s*\{[^}]*issueId:\s*\$issueId/); + }); + + test('returns null when commentCreate did not succeed', async () => { + fetchMock.mockResolvedValue(jsonResponse({ data: { commentCreate: { success: false } } })); + const replyId = await replyToComment(CTX, ISSUE_ID, 'comment-77', 'body'); + expect(replyId).toBeNull(); + }); + + test('returns null on GraphQL errors (no throw)', async () => { + fetchMock.mockResolvedValueOnce(jsonResponse({ errors: [{ message: 'parent not found' }] })); + const replyId = await replyToComment(CTX, ISSUE_ID, 'comment-77', 'body'); + expect(replyId).toBeNull(); + }); + + test('returns null when the token cannot be resolved (no fetch)', async () => { + resolveLinearOauthTokenMock.mockResolvedValueOnce(null); + const replyId = await replyToComment(CTX, ISSUE_ID, 'comment-77', 'body'); + expect(replyId).toBeNull(); + expect(fetchMock).not.toHaveBeenCalled(); + }); + }); + describe('reportIssueFailure', () => { test('posts comment + ❌ in parallel via Promise.allSettled', async () => { await reportIssueFailure(CTX, ISSUE_ID, '❌ failed'); @@ -186,4 +277,221 @@ describe('linear-feedback', () => { await expect(reportIssueFailure(CTX, ISSUE_ID, 'msg')).resolves.toBeUndefined(); }); }); + + describe('swapIssueReaction (one marker at a time, #3)', () => { + const reactionsResp = (rs: Array<{ id: string; emoji: string }>) => + jsonResponse({ data: { issue: { reactions: rs } } }); + + test('👀 present → deletes it and adds the target (✅)', async () => { + fetchMock + .mockResolvedValueOnce(reactionsResp([{ id: 'r-eyes', emoji: 'eyes' }])) // query + .mockResolvedValueOnce(jsonResponse({ data: { reactionDelete: { success: true } } })) // delete 👀 + .mockResolvedValueOnce(jsonResponse({ data: { reactionCreate: { success: true } } })); // add ✅ + const ok = await swapIssueReaction(CTX, ISSUE_ID, 'white_check_mark'); + expect(ok).toBe(true); + const deleteVars = JSON.parse(fetchMock.mock.calls[1][1].body).variables; + expect(deleteVars).toEqual({ id: 'r-eyes' }); + const createVars = JSON.parse(fetchMock.mock.calls[2][1].body).variables; + expect(createVars).toEqual({ issueId: ISSUE_ID, emoji: 'white_check_mark' }); + }); + + test('target already present → deletes other bgagent markers, does NOT re-create', async () => { + fetchMock + .mockResolvedValueOnce(reactionsResp([ + { id: 'r-eyes', emoji: 'eyes' }, + { id: 'r-check', emoji: 'white_check_mark' }, + ])) + .mockResolvedValueOnce(jsonResponse({ data: { reactionDelete: { success: true } } })); // delete 👀 only + const ok = await swapIssueReaction(CTX, ISSUE_ID, 'white_check_mark'); + expect(ok).toBe(true); + // 1 query + 1 delete (the 👀); no create (✅ already there). + expect(fetchMock).toHaveBeenCalledTimes(2); + expect(JSON.parse(fetchMock.mock.calls[1][1].body).variables).toEqual({ id: 'r-eyes' }); + }); + + test('never deletes a human (non-bgagent) reaction', async () => { + fetchMock + .mockResolvedValueOnce(reactionsResp([ + { id: 'r-eyes', emoji: 'eyes' }, + { id: 'r-tada', emoji: 'tada' }, // human reaction — must survive + ])) + .mockResolvedValueOnce(jsonResponse({ data: { reactionDelete: { success: true } } })) // delete 👀 + .mockResolvedValueOnce(jsonResponse({ data: { reactionCreate: { success: true } } })); // add ✅ + await swapIssueReaction(CTX, ISSUE_ID, 'white_check_mark'); + const deletedIds = fetchMock.mock.calls + .filter((c) => JSON.parse(c[1].body).query.includes('reactionDelete')) + .map((c) => JSON.parse(c[1].body).variables.id); + expect(deletedIds).toEqual(['r-eyes']); // only the bgagent marker, never r-tada + }); + + test('no existing markers → just adds the target', async () => { + fetchMock + .mockResolvedValueOnce(reactionsResp([])) + .mockResolvedValueOnce(jsonResponse({ data: { reactionCreate: { success: true } } })); + const ok = await swapIssueReaction(CTX, ISSUE_ID, 'eyes'); + expect(ok).toBe(true); + expect(fetchMock).toHaveBeenCalledTimes(2); // query + create, no deletes + }); + + test('no token → false, no fetch', async () => { + resolveLinearOauthTokenMock.mockResolvedValueOnce(null); + expect(await swapIssueReaction(CTX, ISSUE_ID, 'eyes')).toBe(false); + expect(fetchMock).not.toHaveBeenCalled(); + }); + }); + + describe('swapCommentReaction (#247 UX.21 — settle the trigger comment 👀→✅/❌)', () => { + const commentReactionsResp = (rs: Array<{ id: string; emoji: string }>) => + jsonResponse({ data: { comment: { reactions: rs } } }); + + test('👀 on the comment → deletes it and adds ✅ (on the COMMENT, not the issue)', async () => { + fetchMock + .mockResolvedValueOnce(commentReactionsResp([{ id: 'r-eyes', emoji: 'eyes' }])) + .mockResolvedValueOnce(jsonResponse({ data: { reactionDelete: { success: true } } })) + .mockResolvedValueOnce(jsonResponse({ data: { reactionCreate: { success: true } } })); + const ok = await swapCommentReaction(CTX, 'comment-77', 'white_check_mark'); + expect(ok).toBe(true); + // query targets the COMMENT + expect(JSON.parse(fetchMock.mock.calls[0][1].body).variables).toEqual({ commentId: 'comment-77' }); + // delete the stale 👀 + expect(JSON.parse(fetchMock.mock.calls[1][1].body).variables).toEqual({ id: 'r-eyes' }); + // create the ✅ via reactionCreate(commentId) + const createVars = JSON.parse(fetchMock.mock.calls[2][1].body).variables; + expect(createVars).toEqual({ commentId: 'comment-77', emoji: 'white_check_mark' }); + }); + + test('target already present → no re-create (idempotent under redelivery)', async () => { + fetchMock + .mockResolvedValueOnce(commentReactionsResp([ + { id: 'r-eyes', emoji: 'eyes' }, + { id: 'r-check', emoji: 'white_check_mark' }, + ])) + .mockResolvedValueOnce(jsonResponse({ data: { reactionDelete: { success: true } } })); + const ok = await swapCommentReaction(CTX, 'comment-77', 'white_check_mark'); + expect(ok).toBe(true); + expect(fetchMock).toHaveBeenCalledTimes(2); // query + delete 👀; ✅ already present + }); + + test('never deletes a human reaction on the comment', async () => { + fetchMock + .mockResolvedValueOnce(commentReactionsResp([ + { id: 'r-eyes', emoji: 'eyes' }, + { id: 'r-heart', emoji: 'heart' }, // human — must survive + ])) + .mockResolvedValueOnce(jsonResponse({ data: { reactionDelete: { success: true } } })) + .mockResolvedValueOnce(jsonResponse({ data: { reactionCreate: { success: true } } })); + await swapCommentReaction(CTX, 'comment-77', 'x'); + const deletedIds = fetchMock.mock.calls + .filter((c) => JSON.parse(c[1].body).query.includes('reactionDelete')) + .map((c) => JSON.parse(c[1].body).variables.id); + expect(deletedIds).toEqual(['r-eyes']); // never r-heart + }); + + test('no token → false, no fetch', async () => { + resolveLinearOauthTokenMock.mockResolvedValueOnce(null); + expect(await swapCommentReaction(CTX, 'comment-77', 'eyes')).toBe(false); + expect(fetchMock).not.toHaveBeenCalled(); + }); + }); + + describe('upsertStatusComment (#3 live status block)', () => { + test('no existing id → creates a comment and returns the new id', async () => { + fetchMock.mockResolvedValueOnce( + jsonResponse({ data: { commentCreate: { success: true, comment: { id: 'cmt-new' } } } }), + ); + const id = await upsertStatusComment(CTX, ISSUE_ID, 'body'); + expect(id).toBe('cmt-new'); + // create mutation carries issueId + body + const vars = JSON.parse(fetchMock.mock.calls[0][1].body).variables; + expect(vars).toEqual({ issueId: ISSUE_ID, body: 'body' }); + }); + + test('existing id → edits in place and returns the same id', async () => { + fetchMock.mockResolvedValueOnce(jsonResponse({ data: { commentUpdate: { success: true } } })); + const id = await upsertStatusComment(CTX, ISSUE_ID, 'new body', 'cmt-existing'); + expect(id).toBe('cmt-existing'); + const vars = JSON.parse(fetchMock.mock.calls[0][1].body).variables; + expect(vars).toEqual({ id: 'cmt-existing', body: 'new body' }); + }); + + test('create reporting success:false → null', async () => { + fetchMock.mockResolvedValueOnce(jsonResponse({ data: { commentCreate: { success: false } } })); + expect(await upsertStatusComment(CTX, ISSUE_ID, 'body')).toBeNull(); + }); + + test('update GraphQL failure → null (does not fabricate the id)', async () => { + fetchMock.mockResolvedValueOnce(jsonResponse({ errors: [{ message: 'not found' }] })); + expect(await upsertStatusComment(CTX, ISSUE_ID, 'body', 'cmt-x')).toBeNull(); + }); + + test('no token → null, no fetch', async () => { + resolveLinearOauthTokenMock.mockResolvedValueOnce(null); + expect(await upsertStatusComment(CTX, ISSUE_ID, 'body')).toBeNull(); + expect(fetchMock).not.toHaveBeenCalled(); + }); + }); + + describe('transitionIssueState', () => { + // Mirrors the real ABCA team's workflow states (by type + position). + const TEAM_STATES = [ + { id: 's-backlog', type: 'backlog', name: 'Backlog', position: 0 }, + { id: 's-todo', type: 'unstarted', name: 'Todo', position: 1 }, + { id: 's-inprogress', type: 'started', name: 'In Progress', position: 2 }, + { id: 's-inreview', type: 'started', name: 'In Review', position: 1002 }, + { id: 's-done', type: 'completed', name: 'Done', position: 3 }, + ]; + const statesResp = (current: { id: string; type: string; name: string; position: number }) => + jsonResponse({ data: { issue: { state: current, team: { states: { nodes: TEAM_STATES } } } } }); + const cur = (id: string) => TEAM_STATES.find((s) => s.id === id)!; + + test('Backlog → In Progress: picks the named started state, issues issueUpdate', async () => { + fetchMock + .mockResolvedValueOnce(statesResp(cur('s-backlog'))) // team-states query + .mockResolvedValueOnce(jsonResponse({ data: { issueUpdate: { success: true } } })); + const ok = await transitionIssueState(CTX, ISSUE_ID, 'started', ['In Progress']); + expect(ok).toBe(true); + // second call is the mutation with the resolved stateId + const mutationVars = JSON.parse(fetchMock.mock.calls[1][1].body).variables; + expect(mutationVars).toEqual({ issueId: ISSUE_ID, stateId: 's-inprogress' }); + }); + + test('In Progress → In Review: name preference wins over position among started states', async () => { + fetchMock + .mockResolvedValueOnce(statesResp(cur('s-inprogress'))) + .mockResolvedValueOnce(jsonResponse({ data: { issueUpdate: { success: true } } })); + const ok = await transitionIssueState(CTX, ISSUE_ID, 'started', ['In Review']); + expect(ok).toBe(true); + expect(JSON.parse(fetchMock.mock.calls[1][1].body).variables.stateId).toBe('s-inreview'); + }); + + test('already in target state → no mutation, returns false', async () => { + fetchMock.mockResolvedValueOnce(statesResp(cur('s-inreview'))); + const ok = await transitionIssueState(CTX, ISSUE_ID, 'started', ['In Review']); + expect(ok).toBe(false); + expect(fetchMock).toHaveBeenCalledTimes(1); // only the query, no mutation + }); + + test('never moves backward: Done (completed) is not demoted to In Review', async () => { + fetchMock.mockResolvedValueOnce(statesResp(cur('s-done'))); + const ok = await transitionIssueState(CTX, ISSUE_ID, 'started', ['In Review']); + expect(ok).toBe(false); + expect(fetchMock).toHaveBeenCalledTimes(1); + }); + + test('returns false when token cannot be resolved', async () => { + resolveLinearOauthTokenMock.mockResolvedValueOnce(null); + const ok = await transitionIssueState(CTX, ISSUE_ID, 'started', ['In Review']); + expect(ok).toBe(false); + expect(fetchMock).not.toHaveBeenCalled(); + }); + + test('returns false when the team has no state of the target type', async () => { + const noCompleted = TEAM_STATES.filter((s) => s.type !== 'completed'); + fetchMock.mockResolvedValueOnce( + jsonResponse({ data: { issue: { state: cur('s-inprogress'), team: { states: { nodes: noCompleted } } } } }), + ); + const ok = await transitionIssueState(CTX, ISSUE_ID, 'completed'); + expect(ok).toBe(false); + }); + }); }); diff --git a/cdk/test/handlers/shared/linear-issue-lookup.test.ts b/cdk/test/handlers/shared/linear-issue-lookup.test.ts index 47042fd3..3ca119b8 100644 --- a/cdk/test/handlers/shared/linear-issue-lookup.test.ts +++ b/cdk/test/handlers/shared/linear-issue-lookup.test.ts @@ -23,7 +23,10 @@ // regex's `lastIndex` reset behavior is exactly the kind of thing that // breaks silently between releases. (theagenticguy PR-241 review.) -import { extractLinearIdentifier } from '../../../src/handlers/shared/linear-issue-lookup'; +import { + extractLinearIdentifier, + extractLinearIdentifierFromBranch, +} from '../../../src/handlers/shared/linear-issue-lookup'; describe('extractLinearIdentifier', () => { test('returns null for null / undefined / empty input', () => { @@ -79,3 +82,31 @@ describe('extractLinearIdentifier', () => { expect(extractLinearIdentifier('fourth ABCA-1')).toBe('ABCA-1'); }); }); + +describe('extractLinearIdentifierFromBranch', () => { + test('pulls the canonical identifier from an ABCA task branch (lowercased slug)', () => { + // bgagent/{taskId}/{slug} where slug = slugify("ABCA-151: Add lisbon-guide.html") + expect( + extractLinearIdentifierFromBranch('bgagent/01KTSK8XGXHRMT0JX44GYRPJG7/abca-151-add-lisbon-guidehtml'), + ).toBe('ABCA-151'); + }); + + test('the ULID task-id segment does not false-match before the identifier', () => { + // The ULID has no dash, so it cannot produce a <KEY>-<n> match; the + // first real match is the issue identifier in the slug. + expect( + extractLinearIdentifierFromBranch('bgagent/01KTSKET9040HDJP3P2QE15DXC/abca-152-link-lisbon-from-destinationsht'), + ).toBe('ABCA-152'); + }); + + test('returns null for a branch with no identifier', () => { + expect(extractLinearIdentifierFromBranch('bgagent/01TASK/task')).toBeNull(); + expect(extractLinearIdentifierFromBranch('feature/some-thing')).toBeNull(); + }); + + test('returns null on null/undefined/empty', () => { + expect(extractLinearIdentifierFromBranch(null)).toBeNull(); + expect(extractLinearIdentifierFromBranch(undefined)).toBeNull(); + expect(extractLinearIdentifierFromBranch('')).toBeNull(); + }); +}); diff --git a/cdk/test/handlers/shared/linear-subissue-fetch.test.ts b/cdk/test/handlers/shared/linear-subissue-fetch.test.ts new file mode 100644 index 00000000..f2e949ef --- /dev/null +++ b/cdk/test/handlers/shared/linear-subissue-fetch.test.ts @@ -0,0 +1,175 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { fetchSubIssueGraph } from '../../../src/handlers/shared/linear-subissue-fetch'; + +/** Build a mock fetch returning a given JSON body + ok/status. */ +function mockFetch(body: unknown, init: { ok?: boolean; status?: number } = {}): typeof fetch { + return (async () => ({ + ok: init.ok ?? true, + status: init.status ?? 200, + json: async () => body, + })) as unknown as typeof fetch; +} + +/** Shape a Linear `issue.children` GraphQL response. */ +function graphResponse(children: Array<{ + id: string; + identifier?: string; + title?: string; + blockedBy?: string[]; // ids that block this child (inverseRelations type "blocks") +}>) { + return { + data: { + issue: { + id: 'PARENT', + children: { + nodes: children.map((c) => ({ + id: c.id, + identifier: c.identifier, + title: c.title, + inverseRelations: { + nodes: (c.blockedBy ?? []).map((bid) => ({ type: 'blocks', issue: { id: bid } })), + }, + })), + }, + }, + }, + }; +} + +describe('fetchSubIssueGraph — success shapes', () => { + test('maps children and blockedBy edges into depends_on', async () => { + const fetchImpl = mockFetch(graphResponse([ + { id: 'A', identifier: 'ENG-1', title: 'Root' }, + { id: 'B', identifier: 'ENG-2', title: 'Blocked by A', blockedBy: ['A'] }, + ])); + const result = await fetchSubIssueGraph('tok', 'PARENT', { fetchImpl }); + expect(result.kind).toBe('ok'); + if (result.kind === 'ok') { + expect(result.parentIssueId).toBe('PARENT'); + expect(result.children).toEqual([ + { id: 'A', identifier: 'ENG-1', title: 'Root', depends_on: [] }, + { id: 'B', identifier: 'ENG-2', title: 'Blocked by A', depends_on: ['A'] }, + ]); + } + }); + + test('drops blockedBy edges that point outside the child set', async () => { + // C is blocked by GHOST (not a sibling) — edge dropped. + const fetchImpl = mockFetch(graphResponse([ + { id: 'C', blockedBy: ['GHOST'] }, + ])); + const result = await fetchSubIssueGraph('tok', 'PARENT', { fetchImpl }); + if (result.kind === 'ok') expect(result.children[0].depends_on).toEqual([]); + }); + + test('ignores relation types other than "blocks"', async () => { + const fetchImpl = mockFetch({ + data: { + issue: { + id: 'PARENT', + children: { + nodes: [ + { id: 'A' }, + { + id: 'B', + inverseRelations: { + nodes: [ + { type: 'related', issue: { id: 'A' } }, // not a blocker + { type: 'duplicate', issue: { id: 'A' } }, + ], + }, + }, + ], + }, + }, + }, + }); + const result = await fetchSubIssueGraph('tok', 'PARENT', { fetchImpl }); + if (result.kind === 'ok') expect(result.children[1].depends_on).toEqual([]); + }); + + test('dedups duplicate blocker edges', async () => { + const fetchImpl = mockFetch({ + data: { + issue: { + id: 'PARENT', + children: { + nodes: [ + { id: 'A' }, + { + id: 'B', + inverseRelations: { + nodes: [ + { type: 'blocks', issue: { id: 'A' } }, + { type: 'blocks', issue: { id: 'A' } }, + ], + }, + }, + ], + }, + }, + }, + }); + const result = await fetchSubIssueGraph('tok', 'PARENT', { fetchImpl }); + if (result.kind === 'ok') expect(result.children[1].depends_on).toEqual(['A']); + }); + + test('ignores a self-blocking edge from the raw payload', async () => { + const fetchImpl = mockFetch(graphResponse([{ id: 'A', blockedBy: ['A'] }])); + const result = await fetchSubIssueGraph('tok', 'PARENT', { fetchImpl }); + if (result.kind === 'ok') expect(result.children[0].depends_on).toEqual([]); + }); +}); + +describe('fetchSubIssueGraph — no children', () => { + test('returns no_children when the issue has an empty children set', async () => { + const fetchImpl = mockFetch(graphResponse([])); + const result = await fetchSubIssueGraph('tok', 'PARENT', { fetchImpl }); + expect(result.kind).toBe('no_children'); + if (result.kind === 'no_children') expect(result.parentIssueId).toBe('PARENT'); + }); +}); + +describe('fetchSubIssueGraph — error shapes', () => { + test('non-2xx → error', async () => { + const fetchImpl = mockFetch({}, { ok: false, status: 503 }); + const result = await fetchSubIssueGraph('tok', 'PARENT', { fetchImpl }); + expect(result.kind).toBe('error'); + }); + + test('GraphQL errors → error', async () => { + const fetchImpl = mockFetch({ errors: [{ message: 'boom' }] }); + const result = await fetchSubIssueGraph('tok', 'PARENT', { fetchImpl }); + expect(result.kind).toBe('error'); + }); + + test('network throw → error (never throws)', async () => { + const fetchImpl = (async () => { throw new Error('ECONNRESET'); }) as unknown as typeof fetch; + const result = await fetchSubIssueGraph('tok', 'PARENT', { fetchImpl }); + expect(result.kind).toBe('error'); + }); + + test('missing issue in payload → error', async () => { + const fetchImpl = mockFetch({ data: { issue: null } }); + const result = await fetchSubIssueGraph('tok', 'PARENT', { fetchImpl }); + expect(result.kind).toBe('error'); + }); +}); diff --git a/cdk/test/handlers/shared/linear-task-by-issue.test.ts b/cdk/test/handlers/shared/linear-task-by-issue.test.ts new file mode 100644 index 00000000..69bdceed --- /dev/null +++ b/cdk/test/handlers/shared/linear-task-by-issue.test.ts @@ -0,0 +1,81 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { + prNumberFromTask, + resolveTaskByLinearIssue, +} from '../../../src/handlers/shared/linear-task-by-issue'; + +jest.mock('../../../src/handlers/shared/logger', () => ({ + logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn() }, +})); + +describe('resolveTaskByLinearIssue', () => { + const send = jest.fn(); + const ddb = { send } as never; + + beforeEach(() => send.mockReset()); + + test('queries LinearIssueIndex descending (newest task) and maps the row', async () => { + send.mockResolvedValueOnce({ + Items: [{ task_id: 'T9', user_id: 'u1', repo: 'o/r', pr_number: 42, status: 'COMPLETED' }], + }); + + const task = await resolveTaskByLinearIssue(ddb, 'TaskTable', 'issue-uuid'); + + expect(task).toEqual({ task_id: 'T9', user_id: 'u1', repo: 'o/r', pr_number: 42, status: 'COMPLETED' }); + const input = send.mock.calls[0][0].input; + expect(input.IndexName).toBe('LinearIssueIndex'); + expect(input.KeyConditionExpression).toContain('linear_issue_id'); + expect(input.ExpressionAttributeValues[':iid']).toBe('issue-uuid'); + expect(input.ScanIndexForward).toBe(false); // newest first + expect(input.Limit).toBe(1); + }); + + test('GSI miss (no rows) → null', async () => { + send.mockResolvedValueOnce({ Items: [] }); + expect(await resolveTaskByLinearIssue(ddb, 'TaskTable', 'x')).toBeNull(); + }); + + test('query error → null (swallowed, treated as non-ABCA issue)', async () => { + send.mockRejectedValueOnce(new Error('AccessDenied')); + expect(await resolveTaskByLinearIssue(ddb, 'TaskTable', 'x')).toBeNull(); + }); + + test('omits absent optional fields', async () => { + send.mockResolvedValueOnce({ Items: [{ task_id: 'T1' }] }); + const task = await resolveTaskByLinearIssue(ddb, 'TaskTable', 'x'); + expect(task).toEqual({ task_id: 'T1' }); + }); +}); + +describe('prNumberFromTask', () => { + test('prefers numeric pr_number', () => { + expect(prNumberFromTask({ task_id: 'T', pr_number: 7, pr_url: 'https://github.com/o/r/pull/9' })).toBe(7); + }); + + test('falls back to parsing pr_url', () => { + expect(prNumberFromTask({ task_id: 'T', pr_url: 'https://github.com/o/r/pull/123' })).toBe(123); + }); + + test('null when neither yields a number', () => { + expect(prNumberFromTask({ task_id: 'T' })).toBeNull(); + expect(prNumberFromTask({ task_id: 'T', pr_url: 'https://github.com/o/r/tree/main' })).toBeNull(); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-base-branch.test.ts b/cdk/test/handlers/shared/orchestration-base-branch.test.ts new file mode 100644 index 00000000..58cc75b0 --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-base-branch.test.ts @@ -0,0 +1,84 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { selectBaseBranch } from '../../../src/handlers/shared/orchestration-base-branch'; + +const pred = (sub_issue_id: string, branch_name: string) => ({ sub_issue_id, branch_name }); + +describe('selectBaseBranch', () => { + test('root (no predecessors) → default branch, no merges', () => { + expect(selectBaseBranch({ predecessors: [] })).toEqual({ + base_branch: 'main', merge_branches: [], shape: 'root', + }); + }); + + test('respects a custom default branch for roots', () => { + expect(selectBaseBranch({ predecessors: [], defaultBranch: 'develop' }).base_branch).toBe('develop'); + }); + + test('linear (1 predecessor) → stack on its branch, no merges', () => { + const sel = selectBaseBranch({ predecessors: [pred('A', 'bgagent/taskA/step-a')] }); + expect(sel).toEqual({ + base_branch: 'bgagent/taskA/step-a', merge_branches: [], shape: 'linear', + }); + }); + + test('diamond (2 predecessors) → base main + merge both branches', () => { + const sel = selectBaseBranch({ + predecessors: [pred('B', 'bgagent/taskB/b'), pred('C', 'bgagent/taskC/c')], + }); + expect(sel.shape).toBe('diamond'); + expect(sel.base_branch).toBe('main'); + expect(sel.merge_branches).toEqual(['bgagent/taskB/b', 'bgagent/taskC/c']); + }); + + test('diamond merge list is deduped and sorted (deterministic)', () => { + const sel = selectBaseBranch({ + predecessors: [pred('C', 'z-branch'), pred('B', 'a-branch'), pred('D', 'a-branch')], + }); + expect(sel.merge_branches).toEqual(['a-branch', 'z-branch']); + }); + + test('diamond uses default branch as base, not a predecessor', () => { + const sel = selectBaseBranch({ + predecessors: [pred('B', 'feat-b'), pred('C', 'feat-c')], defaultBranch: 'trunk', + }); + expect(sel.base_branch).toBe('trunk'); + }); + + test('predecessors missing a branch_name are ignored', () => { + // One real predecessor branch + one empty → degrades to linear on the real one. + const sel = selectBaseBranch({ predecessors: [pred('A', 'feat-a'), pred('B', '')] }); + expect(sel.shape).toBe('linear'); + expect(sel.base_branch).toBe('feat-a'); + }); + + test('all predecessors missing branches → degrade to root (never invalid base)', () => { + const sel = selectBaseBranch({ predecessors: [pred('A', ''), pred('B', '')] }); + expect(sel).toEqual({ base_branch: 'main', merge_branches: [], shape: 'root' }); + }); + + test('two predecessors that share a branch collapse to a single (linear) merge', () => { + // After dedup, only one distinct branch → treated as linear, not diamond. + const sel = selectBaseBranch({ predecessors: [pred('A', 'same'), pred('B', 'same')] }); + expect(sel.shape).toBe('linear'); + expect(sel.base_branch).toBe('same'); + expect(sel.merge_branches).toEqual([]); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-comment-trigger.test.ts b/cdk/test/handlers/shared/orchestration-comment-trigger.test.ts new file mode 100644 index 00000000..b26dd9a5 --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-comment-trigger.test.ts @@ -0,0 +1,125 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { + buildIterationInstruction, + isBotAuthoredComment, + parseCommentTrigger, +} from '../../../src/handlers/shared/orchestration-comment-trigger'; + +describe('parseCommentTrigger', () => { + test('mention with instruction → triggered, instruction stripped + trimmed', () => { + const t = parseCommentTrigger('@bgagent the session timeout should be 30 min, not 60'); + expect(t.triggered).toBe(true); + expect(t.instruction).toBe('the session timeout should be 30 min, not 60'); + }); + + test('mention mid-sentence still triggers', () => { + const t = parseCommentTrigger('Hey @bgagent please add a dark-mode toggle'); + expect(t.triggered).toBe(true); + expect(t.instruction).toBe('Hey please add a dark-mode toggle'); + }); + + test('case-insensitive', () => { + expect(parseCommentTrigger('@BgAgent fix it').triggered).toBe(true); + }); + + test('bare mention with no text → triggered with empty instruction', () => { + const t = parseCommentTrigger('@bgagent'); + expect(t.triggered).toBe(true); + expect(t.instruction).toBe(''); + }); + + test('no mention → not triggered (ordinary human discussion)', () => { + expect(parseCommentTrigger('I think this looks good, merging soon').triggered).toBe(false); + }); + + test('empty / null / undefined body → not triggered', () => { + expect(parseCommentTrigger('').triggered).toBe(false); + expect(parseCommentTrigger(null).triggered).toBe(false); + expect(parseCommentTrigger(undefined).triggered).toBe(false); + }); + + test('the agent\'s own progress comment (no mention) never triggers', () => { + expect(parseCommentTrigger('🤖 Starting on the task — cloning repo now.').triggered).toBe(false); + expect(parseCommentTrigger('✅ PR opened: https://github.com/o/r/pull/5').triggered).toBe(false); + }); + + test('token boundary: @bgagentbot and email-like do NOT trigger', () => { + expect(parseCommentTrigger('ping @bgagentbot for help').triggered).toBe(false); + expect(parseCommentTrigger('email me at foo@bgagent.io').triggered).toBe(false); + }); + + test('multiple mentions are all stripped', () => { + const t = parseCommentTrigger('@bgagent do X and @bgagent also Y'); + expect(t.triggered).toBe(true); + expect(t.instruction).toBe('do X and also Y'); + }); + + // #247 UX.20 — the self-trigger infinite loop. The bot's OWN comments must + // never re-trigger it, even when (esp. when) they contain a literal @bgagent. + describe('self-comment guard (#247 UX.20 loop prevention)', () => { + test('the disambiguation reply does NOT trigger, even though it embeds "@bgagent ABCA-123:"', () => { + // This EXACT body spammed ~50 replies live: it starts with 👋 and contains + // a literal @bgagent example, which the old regex re-matched → loop. + const body = '👋 I couldn\'t tell which sub-issue that\'s about.\n\nOtherwise, comment on the ' + + 'specific sub-issue, or name it here — e.g. `@bgagent ABCA-123: <what to change>`. The sub-issues are:'; + expect(parseCommentTrigger(body).triggered).toBe(false); + expect(isBotAuthoredComment(body)).toBe(true); + }); + + test('all bot template prefixes are recognized as bot-authored (never trigger)', () => { + for (const body of [ + '👋 That could apply to more than one sub-issue…', + '✅ Updated — PR #193.', + '✅ **ABCA orchestration complete**', + '❌ I made the change, but the build/tests didn\'t pass.', + '⚠️ **ABCA orchestration finished with failures**', + '🔄 **ABCA orchestration** · 1/3 complete', + '🤖 Starting on this issue…', + '🖼️ **Preview screenshot**', + '🔗 PR opened: https://github.com/o/r/pull/9', + ]) { + expect(isBotAuthoredComment(body)).toBe(true); + expect(parseCommentTrigger(body).triggered).toBe(false); + } + }); + + test('a genuine human @bgagent comment is NOT misclassified as bot-authored', () => { + expect(isBotAuthoredComment('@bgagent for the footer change the tagline')).toBe(false); + expect(parseCommentTrigger('@bgagent for the footer change the tagline').triggered).toBe(true); + }); + + test('leading whitespace before a bot marker is still caught', () => { + expect(isBotAuthoredComment(' \n✅ Updated — PR #193.')).toBe(true); + }); + }); +}); + +describe('buildIterationInstruction', () => { + test('uses the comment instruction when present', () => { + expect(buildIterationInstruction({ triggered: true, instruction: 'make the header sticky' })) + .toBe('make the header sticky'); + }); + + test('falls back to a generic directive for a bare mention', () => { + expect(buildIterationInstruction({ triggered: true, instruction: '' })) + .toMatch(/latest review feedback/i); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-dag.test.ts b/cdk/test/handlers/shared/orchestration-dag.test.ts new file mode 100644 index 00000000..4ef2ec4e --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-dag.test.ts @@ -0,0 +1,148 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { validateDag, topologicalOrder, type DagNode } from '../../../src/handlers/shared/orchestration-dag'; + +const node = (id: string, ...depends_on: string[]): DagNode => ({ id, depends_on }); + +describe('validateDag — valid graphs', () => { + test('empty graph is valid with no layers', () => { + const result = validateDag([]); + expect(result).toEqual({ ok: true, layers: [] }); + }); + + test('single root node → one layer', () => { + const result = validateDag([node('A')]); + expect(result).toEqual({ ok: true, layers: [['A']] }); + }); + + test('independent siblings all land in layer 0', () => { + const result = validateDag([node('A'), node('B'), node('C')]); + expect(result.ok).toBe(true); + if (result.ok) expect(result.layers).toEqual([['A', 'B', 'C']]); + }); + + test('linear chain A→B→C produces three single-node layers', () => { + // B depends on A, C depends on B. + const result = validateDag([node('C', 'B'), node('B', 'A'), node('A')]); + expect(result.ok).toBe(true); + if (result.ok) expect(result.layers).toEqual([['A'], ['B'], ['C']]); + }); + + test('diamond A→{B,C}→D layers B and C together, D last', () => { + const result = validateDag([ + node('A'), + node('B', 'A'), + node('C', 'A'), + node('D', 'B', 'C'), + ]); + expect(result.ok).toBe(true); + if (result.ok) expect(result.layers).toEqual([['A'], ['B', 'C'], ['D']]); + }); + + test('layers are sorted for deterministic output', () => { + const result = validateDag([node('z'), node('a'), node('m')]); + if (result.ok) expect(result.layers[0]).toEqual(['a', 'm', 'z']); + }); + + test('tolerates a duplicated edge to the same predecessor', () => { + // depends_on lists A twice — should not double-count in-degree. + const result = validateDag([node('A'), { id: 'B', depends_on: ['A', 'A'] }]); + expect(result.ok).toBe(true); + if (result.ok) expect(result.layers).toEqual([['A'], ['B']]); + }); +}); + +describe('validateDag — rejected graphs', () => { + test('self-loop is a cycle', () => { + const result = validateDag([node('A', 'A')]); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe('cycle'); + expect(result.offendingIds).toEqual(['A']); + } + }); + + test('two-node cycle A↔B', () => { + const result = validateDag([node('A', 'B'), node('B', 'A')]); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe('cycle'); + expect(result.offendingIds).toEqual(['A', 'B']); + } + }); + + test('cycle is reported even when valid roots exist', () => { + // R is a clean root; X→Y→Z→X is a cycle hanging off nothing. + const result = validateDag([ + node('R'), + node('X', 'Z'), + node('Y', 'X'), + node('Z', 'Y'), + ]); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe('cycle'); + expect(result.offendingIds).toEqual(['X', 'Y', 'Z']); + } + }); + + test('dangling edge → depends_on points outside the node set', () => { + const result = validateDag([node('A'), node('B', 'GHOST')]); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe('dangling_edge'); + expect(result.offendingIds).toEqual(['B']); + } + }); + + test('duplicate id', () => { + const result = validateDag([node('A'), node('A')]); + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.reason).toBe('duplicate_id'); + expect(result.offendingIds).toEqual(['A']); + } + }); + + test('duplicate-id check precedes dangling/cycle checks', () => { + // Duplicate A plus a dangling edge — duplicate wins (checked first). + const result = validateDag([node('A'), node('A', 'GHOST')]); + if (!result.ok) expect(result.reason).toBe('duplicate_id'); + }); + + test('rejection carries a user-facing message', () => { + const result = validateDag([node('A', 'B'), node('B', 'A')]); + if (!result.ok) { + expect(result.message).toMatch(/cycle/i); + expect(result.message.length).toBeGreaterThan(0); + } + }); +}); + +describe('topologicalOrder', () => { + test('returns a flat valid order for an accepted graph', () => { + const order = topologicalOrder([node('C', 'B'), node('B', 'A'), node('A')]); + expect(order).toEqual(['A', 'B', 'C']); + }); + + test('throws on an invalid graph', () => { + expect(() => topologicalOrder([node('A', 'B'), node('B', 'A')])).toThrow(/invalid dependency graph/i); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-discovery.test.ts b/cdk/test/handlers/shared/orchestration-discovery.test.ts new file mode 100644 index 00000000..8f3fb234 --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-discovery.test.ts @@ -0,0 +1,279 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { discoverOrchestration } from '../../../src/handlers/shared/orchestration-discovery'; +import { declarativeGraphSource } from '../../../src/handlers/shared/orchestration-graph-source'; + +jest.mock('../../../src/handlers/shared/logger', () => ({ + logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn() }, +})); + +/** Mock fetch returning a Linear children payload. */ +function mockFetch(children: Array<{ id: string; blockedBy?: string[] }>): typeof fetch { + return (async () => ({ + ok: true, + status: 200, + json: async () => ({ + data: { + issue: { + id: 'PARENT', + children: { + nodes: children.map((c) => ({ + id: c.id, + inverseRelations: { nodes: (c.blockedBy ?? []).map((b) => ({ type: 'blocks', issue: { id: b } })) }, + })), + }, + }, + }, + }), + })) as unknown as typeof fetch; +} + +function errorFetch(): typeof fetch { + return (async () => ({ ok: false, status: 500, json: async () => ({}) })) as unknown as typeof fetch; +} + +function emptyFetch(): typeof fetch { + return (async () => ({ + ok: true, + status: 200, + json: async () => ({ data: { issue: { id: 'PARENT', children: { nodes: [] } } } }), + })) as unknown as typeof fetch; +} + +const base = { + tableName: 'OrchestrationTable', + accessToken: 'tok', + parentLinearIssueId: 'PARENT', + linearWorkspaceId: 'WS', + repo: 'o/r', + now: '2026-06-09T12:00:00.000Z', + releaseContext: { platform_user_id: 'platform-user-1' }, +}; + +describe('discoverOrchestration', () => { + test('no sub-issues → single_task', async () => { + const ddb = { send: jest.fn() }; + const result = await discoverOrchestration({ + ...base, ddb: ddb as never, fetchOptions: { fetchImpl: emptyFetch() }, + }); + expect(result.kind).toBe('single_task'); + expect(ddb.send).not.toHaveBeenCalled(); // never touches the table + }); + + test('valid DAG → seeded with roots from layer 0', async () => { + const ddb = { send: jest.fn().mockResolvedValueOnce({ Item: undefined }).mockResolvedValueOnce({}) }; + const result = await discoverOrchestration({ + ...base, + ddb: ddb as never, + fetchOptions: { fetchImpl: mockFetch([{ id: 'A' }, { id: 'B', blockedBy: ['A'] }]) }, + }); + expect(result.kind).toBe('seeded'); + if (result.kind === 'seeded') { + expect(result.childCount).toBe(2); + expect(result.rootSubIssueIds).toEqual(['A']); + expect(result.alreadyExisted).toBe(false); + } + }); + + test('cycle → rejected, nothing persisted', async () => { + const ddb = { send: jest.fn() }; + const result = await discoverOrchestration({ + ...base, + ddb: ddb as never, + fetchOptions: { fetchImpl: mockFetch([{ id: 'A', blockedBy: ['B'] }, { id: 'B', blockedBy: ['A'] }]) }, + }); + expect(result.kind).toBe('rejected'); + if (result.kind === 'rejected') { + expect(result.reason).toBe('cycle'); + expect(result.message).toMatch(/cycle/i); + } + expect(ddb.send).not.toHaveBeenCalled(); + }); + + test('Linear fetch error → error (does NOT fall back to single_task)', async () => { + const ddb = { send: jest.fn() }; + const result = await discoverOrchestration({ + ...base, ddb: ddb as never, fetchOptions: { fetchImpl: errorFetch() }, + }); + expect(result.kind).toBe('error'); + expect(ddb.send).not.toHaveBeenCalled(); + }); + + test('persistence throw → error', async () => { + const ddb = { send: jest.fn().mockResolvedValueOnce({ Item: undefined }).mockRejectedValueOnce(new Error('DDB down')) }; + const result = await discoverOrchestration({ + ...base, + ddb: ddb as never, + fetchOptions: { fetchImpl: mockFetch([{ id: 'A' }]) }, + }); + expect(result.kind).toBe('error'); + }); + + test('re-trigger of an existing epic with the SAME graph → extended, no new nodes', async () => { + // seedOrchestration's GetCommand sees the meta row (already seeded) → + // alreadyExisted, so discovery routes to extendOrchestration. extend's own + // loadOrchestration Query returns the existing children (A, B); the fetched + // graph is identical → no new nodes → extended with empty addedSubIssueIds. + const orchId = '#meta'; + const ddb = { + send: jest.fn() + // 1) seedOrchestration GetCommand → meta exists + .mockResolvedValueOnce({ Item: { sub_issue_id: orchId } }) + // 2) extendOrchestration loadOrchestration Query → meta + A + B + .mockResolvedValueOnce({ + Items: [ + { sub_issue_id: '#meta', orchestration_id: 'orch_x', parent_linear_issue_id: 'P', linear_workspace_id: 'WS', repo: 'o/r', platform_user_id: 'u1' }, + { sub_issue_id: 'A', orchestration_id: 'orch_x', depends_on: [], child_status: 'succeeded', parent_linear_issue_id: 'P', linear_workspace_id: 'WS', repo: 'o/r' }, + { sub_issue_id: 'B', orchestration_id: 'orch_x', depends_on: ['A'], child_status: 'succeeded', parent_linear_issue_id: 'P', linear_workspace_id: 'WS', repo: 'o/r' }, + ], + }), + }; + const result = await discoverOrchestration({ + ...base, + ddb: ddb as never, + fetchOptions: { fetchImpl: mockFetch([{ id: 'A' }, { id: 'B', blockedBy: ['A'] }]) }, + }); + expect(result.kind).toBe('extended'); + if (result.kind === 'extended') expect(result.addedSubIssueIds).toEqual([]); + }); + + test('re-trigger with a NEW sub-issue → extended, adds the new node', async () => { + const ddb = { + send: jest.fn() + .mockResolvedValueOnce({ Item: { sub_issue_id: '#meta' } }) // seed: meta exists + .mockResolvedValueOnce({ + Items: [ // extend load: A + B exist + { sub_issue_id: '#meta', orchestration_id: 'orch_x', parent_linear_issue_id: 'P', linear_workspace_id: 'WS', repo: 'o/r', platform_user_id: 'u1' }, + { sub_issue_id: 'A', orchestration_id: 'orch_x', depends_on: [], child_status: 'succeeded', parent_linear_issue_id: 'P', linear_workspace_id: 'WS', repo: 'o/r' }, + { sub_issue_id: 'B', orchestration_id: 'orch_x', depends_on: ['A'], child_status: 'succeeded', parent_linear_issue_id: 'P', linear_workspace_id: 'WS', repo: 'o/r' }, + ], + }) + .mockResolvedValueOnce({}) // BatchWrite new rows + .mockResolvedValueOnce({}), // Update meta child_count + }; + // Fetched graph adds C (depends on the finished B) → C is new + releasable. + const result = await discoverOrchestration({ + ...base, + ddb: ddb as never, + fetchOptions: { fetchImpl: mockFetch([{ id: 'A' }, { id: 'B', blockedBy: ['A'] }, { id: 'C', blockedBy: ['B'] }]) }, + }); + expect(result.kind).toBe('extended'); + if (result.kind === 'extended') { + expect(result.addedSubIssueIds).toEqual(['C']); + expect(result.releasableSubIssueIds).toEqual(['C']); // B already succeeded + } + }); + + // #247/#299 trigger-agnostic seam: a custom graphSource (declarative / + // planner) drives the SAME validate→seed→reconcile pipeline, bypassing the + // Linear fetch entirely. + describe('graphSource seam (non-Linear)', () => { + test('declarative graph → seeded; never touches the Linear fetch', async () => { + const ddb = { send: jest.fn().mockResolvedValueOnce({ Item: undefined }).mockResolvedValueOnce({}) }; + // No fetchOptions/fetchImpl — if discovery hit the Linear fetch it would throw on the real network. + const result = await discoverOrchestration({ + ...base, + ddb: ddb as never, + graphSource: declarativeGraphSource([ + { id: 'phase-1', depends_on: [], title: 'Plan' }, + { id: 'phase-2', depends_on: ['phase-1'], title: 'Build' }, + { id: 'phase-3', depends_on: ['phase-2'], title: 'Verify' }, + ]), + }); + expect(result.kind).toBe('seeded'); + if (result.kind === 'seeded') { + expect(result.childCount).toBe(3); + expect(result.rootSubIssueIds).toEqual(['phase-1']); // layer 0 + } + }); + + test('declarative graph still rejects an invalid DAG (cycle)', async () => { + const ddb = { send: jest.fn() }; + const result = await discoverOrchestration({ + ...base, + ddb: ddb as never, + graphSource: declarativeGraphSource([ + { id: 'x', depends_on: ['y'] }, + { id: 'y', depends_on: ['x'] }, + ]), + }); + expect(result.kind).toBe('rejected'); + expect(ddb.send).not.toHaveBeenCalled(); + }); + + test('empty declarative graph → single_task', async () => { + const ddb = { send: jest.fn() }; + const result = await discoverOrchestration({ + ...base, ddb: ddb as never, graphSource: declarativeGraphSource([]), + }); + expect(result.kind).toBe('single_task'); + expect(ddb.send).not.toHaveBeenCalled(); + }); + }); + + // #16: auto-integration node for fan-out. + describe('fan-out integration node (#16)', () => { + test('pure fan-out (A→B, A→C) → seeds a synthetic integration node over the leaves', async () => { + const ddb = { send: jest.fn().mockResolvedValueOnce({ Item: undefined }).mockResolvedValueOnce({}) }; + const result = await discoverOrchestration({ + ...base, + ddb: ddb as never, + // two leaves B, C both depending on A + fetchOptions: { fetchImpl: mockFetch([{ id: 'A' }, { id: 'B', blockedBy: ['A'] }, { id: 'C', blockedBy: ['A'] }]) }, + }); + expect(result.kind).toBe('seeded'); + if (result.kind === 'seeded') { + // A, B, C + 1 synthetic integration node + expect(result.childCount).toBe(4); + expect(result.rootSubIssueIds).toEqual(['A']); // integration node is NOT a root + } + // The BatchWrite (2nd ddb call) includes the synthetic node depending on B + C. + const puts = ddb.send.mock.calls[1][0].input.RequestItems[Object.keys(ddb.send.mock.calls[1][0].input.RequestItems)[0]] as Array<{ PutRequest: { Item: Record<string, unknown> } }>; + const integ = puts.map((p) => p.PutRequest.Item).find((i) => String(i.sub_issue_id).endsWith('__integration')); + expect(integ).toBeDefined(); + expect([...(integ!.depends_on as string[])].sort()).toEqual(['B', 'C']); + }); + + test('linear chain → NO integration node added', async () => { + const ddb = { send: jest.fn().mockResolvedValueOnce({ Item: undefined }).mockResolvedValueOnce({}) }; + const result = await discoverOrchestration({ + ...base, + ddb: ddb as never, + fetchOptions: { fetchImpl: mockFetch([{ id: 'A' }, { id: 'B', blockedBy: ['A'] }]) }, + }); + expect(result.kind).toBe('seeded'); + if (result.kind === 'seeded') expect(result.childCount).toBe(2); // no synthetic node + }); + + test('declarative fan-out also gets an integration node', async () => { + const ddb = { send: jest.fn().mockResolvedValueOnce({ Item: undefined }).mockResolvedValueOnce({}) }; + const result = await discoverOrchestration({ + ...base, + ddb: ddb as never, + graphSource: declarativeGraphSource([ + { id: 'x', depends_on: [] }, + { id: 'y', depends_on: [] }, + ]), + }); + expect(result.kind).toBe('seeded'); + if (result.kind === 'seeded') expect(result.childCount).toBe(3); // x, y + integration + }); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-epic-tip.test.ts b/cdk/test/handlers/shared/orchestration-epic-tip.test.ts new file mode 100644 index 00000000..87625143 --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-epic-tip.test.ts @@ -0,0 +1,66 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { resolveEpicTip, type TipCandidate } from '../../../src/handlers/shared/orchestration-epic-tip'; + +const node = (id: string, depends_on: string[] = [], created_at = '2026-01-01'): TipCandidate => + ({ sub_issue_id: id, depends_on, created_at }); + +describe('resolveEpicTip (#247 UX.4 — where a new unconstrained node stacks)', () => { + test('empty epic → no tip (degrade to root/main)', () => { + expect(resolveEpicTip([])).toEqual([]); + }); + + test('linear chain A→B→C → tip is the single leaf C', () => { + const epic = [node('A'), node('B', ['A']), node('C', ['B'])]; + expect(resolveEpicTip(epic)).toEqual(['C']); + }); + + test('single node epic → that node is the tip', () => { + expect(resolveEpicTip([node('A')])).toEqual(['A']); + }); + + test('fan-out (two independent leaves) → diamond: both leaves, sorted', () => { + // root R; B and C both depend on R, nothing depends on B or C. + const epic = [node('R'), node('B', ['R']), node('C', ['R'])]; + expect(resolveEpicTip(epic)).toEqual(['B', 'C']); + }); + + test('integration node present → it IS the combined tip (stack on it alone, no redundant diamond)', () => { + // A and B are leaves; the integration node depends on both, so it is the + // single most-downstream node. A new node stacks on integration only. + const epic = [ + node('A'), + node('B'), + node('orch_x__integration', ['A', 'B']), + ]; + expect(resolveEpicTip(epic)).toEqual(['orch_x__integration']); + }); + + test('multiple roots, one chain → only the genuine leaf is the tip', () => { + // A→B (B is a leaf); D is a standalone leaf. Two leaves → diamond. + const epic = [node('A'), node('B', ['A']), node('D')]; + expect(resolveEpicTip(epic)).toEqual(['B', 'D']); + }); + + test('deterministic ordering regardless of input order', () => { + const epic = [node('C', ['R']), node('R'), node('B', ['R'])]; + expect(resolveEpicTip(epic)).toEqual(['B', 'C']); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-graph-source.test.ts b/cdk/test/handlers/shared/orchestration-graph-source.test.ts new file mode 100644 index 00000000..4ae53168 --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-graph-source.test.ts @@ -0,0 +1,104 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +jest.mock('../../../src/handlers/shared/logger', () => ({ + logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn() }, +})); + +import { + declarativeGraphSource, + linearGraphSource, +} from '../../../src/handlers/shared/orchestration-graph-source'; + +/** A `fetch` impl returning a Linear children payload, for the Linear source. */ +function linearFetch(children: Array<{ id: string; blockedBy?: string[] }>): typeof fetch { + return (async () => ({ + ok: true, + status: 200, + json: async () => ({ + data: { + issue: { + id: 'PARENT', + children: { + nodes: children.map((c) => ({ + id: c.id, + inverseRelations: { nodes: (c.blockedBy ?? []).map((b) => ({ type: 'blocks', issue: { id: b } })) }, + })), + }, + }, + }, + }), + })) as unknown as typeof fetch; +} + +describe('declarativeGraphSource', () => { + test('non-empty node list → ok with the same children', async () => { + const nodes = [ + { id: 'a', depends_on: [], title: 'A' }, + { id: 'b', depends_on: ['a'], title: 'B' }, + ]; + const result = await declarativeGraphSource(nodes)(); + expect(result.kind).toBe('ok'); + if (result.kind === 'ok') expect(result.children).toEqual(nodes); + }); + + test('empty node list → no_children (caller falls through to single task)', async () => { + const result = await declarativeGraphSource([])(); + expect(result.kind).toBe('no_children'); + }); + + test('never errors — validity is enforced downstream, not here', async () => { + // A cyclic graph is still "ok" from the source's perspective; validateDag + // (in discoverOrchestration) is what rejects it. + const result = await declarativeGraphSource([ + { id: 'x', depends_on: ['y'] }, + { id: 'y', depends_on: ['x'] }, + ])(); + expect(result.kind).toBe('ok'); + }); +}); + +describe('linearGraphSource', () => { + test('maps a Linear children payload to ok', async () => { + const result = await linearGraphSource('tok', 'PARENT', { + fetchImpl: linearFetch([{ id: 'A' }, { id: 'B', blockedBy: ['A'] }]), + })(); + expect(result.kind).toBe('ok'); + if (result.kind === 'ok') { + expect(result.children.map((c) => c.id)).toEqual(['A', 'B']); + expect(result.children[1].depends_on).toEqual(['A']); + } + }); + + test('no children → no_children', async () => { + const empty = (async () => ({ + ok: true, + status: 200, + json: async () => ({ data: { issue: { id: 'PARENT', children: { nodes: [] } } } }), + })) as unknown as typeof fetch; + const result = await linearGraphSource('tok', 'PARENT', { fetchImpl: empty })(); + expect(result.kind).toBe('no_children'); + }); + + test('Linear API failure → error (not silently empty)', async () => { + const fail = (async () => ({ ok: false, status: 500, json: async () => ({}) })) as unknown as typeof fetch; + const result = await linearGraphSource('tok', 'PARENT', { fetchImpl: fail })(); + expect(result.kind).toBe('error'); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-integration-node.test.ts b/cdk/test/handlers/shared/orchestration-integration-node.test.ts new file mode 100644 index 00000000..2cc01129 --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-integration-node.test.ts @@ -0,0 +1,92 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import type { SubIssueNode } from '../../../src/handlers/shared/linear-subissue-fetch'; +import { + computeLeaves, + INTEGRATION_NODE_SUFFIX, + isIntegrationNode, + withIntegrationNode, +} from '../../../src/handlers/shared/orchestration-integration-node'; + +const n = (id: string, deps: string[] = []): SubIssueNode => ({ id, depends_on: deps }); +const ORCH = 'orch_abc123'; + +describe('computeLeaves', () => { + test('linear chain A→B→C → only C is a leaf', () => { + expect(computeLeaves([n('A'), n('B', ['A']), n('C', ['B'])])).toEqual(['C']); + }); + + test('pure fan-out A→{B,C} → B and C are leaves (A is not)', () => { + expect([...computeLeaves([n('A'), n('B', ['A']), n('C', ['A'])])].sort()).toEqual(['B', 'C']); + }); + + test('diamond A→{B,C}→D → only D is a leaf', () => { + expect(computeLeaves([n('A'), n('B', ['A']), n('C', ['A']), n('D', ['B', 'C'])])).toEqual(['D']); + }); + + test('all independent roots → all are leaves', () => { + expect([...computeLeaves([n('A'), n('B'), n('C')])].sort()).toEqual(['A', 'B', 'C']); + }); +}); + +describe('withIntegrationNode', () => { + test('linear chain (1 leaf) → unchanged, not added', () => { + const r = withIntegrationNode([n('A'), n('B', ['A'])], ORCH); + expect(r.added).toBe(false); + expect(r.nodes).toHaveLength(2); + }); + + test('explicit diamond (1 leaf D) → unchanged, not added', () => { + const r = withIntegrationNode([n('A'), n('B', ['A']), n('C', ['A']), n('D', ['B', 'C'])], ORCH); + expect(r.added).toBe(false); + }); + + test('pure fan-out (>1 leaf) → appends a synthetic node over all leaves', () => { + const r = withIntegrationNode([n('A'), n('B', ['A']), n('C', ['A'])], ORCH); + expect(r.added).toBe(true); + expect(r.nodes).toHaveLength(4); + const integ = r.nodes[r.nodes.length - 1]; + expect(integ.id).toBe(`${ORCH}${INTEGRATION_NODE_SUFFIX}`); + expect([...integ.depends_on].sort()).toEqual(['B', 'C']); + expect(integ.title).toContain('Integration'); + expect(integ.identifier).toBeUndefined(); + }); + + test('three independent roots → integration node depends on all three', () => { + const r = withIntegrationNode([n('A'), n('B'), n('C')], ORCH); + expect(r.added).toBe(true); + expect([...r.nodes[r.nodes.length - 1].depends_on].sort()).toEqual(['A', 'B', 'C']); + }); + + test('synthetic node id is idempotency-key safe (no "#", matches /^[A-Za-z0-9_-]+$/)', () => { + const r = withIntegrationNode([n('A'), n('B')], ORCH); + const id = r.nodes[r.nodes.length - 1].id; + // releaseChild builds `${orch}_${sub}` and createTaskCore validates it. + expect(`${ORCH}_${id}`).toMatch(/^[a-zA-Z0-9_-]{1,128}$/); + }); +}); + +describe('isIntegrationNode', () => { + test('true for the synthetic suffix, false for real ids', () => { + expect(isIntegrationNode(`${ORCH}${INTEGRATION_NODE_SUFFIX}`)).toBe(true); + expect(isIntegrationNode('a1b2c3-uuid')).toBe(false); + expect(isIntegrationNode('#meta')).toBe(false); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-parent-comment.test.ts b/cdk/test/handlers/shared/orchestration-parent-comment.test.ts new file mode 100644 index 00000000..ef293523 --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-parent-comment.test.ts @@ -0,0 +1,142 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { + type ParentCommentNode, + parseParentNodeReference, + renderParentDisambiguationReply, + suggestClosestNode, +} from '../../../src/handlers/shared/orchestration-parent-comment'; + +const NODES: ParentCommentNode[] = [ + { sub_issue_id: 'uuid-305', linear_identifier: 'ABCA-305', title: 'Add a site-wide footer', child_task_id: 't1' }, + { sub_issue_id: 'uuid-306', linear_identifier: 'ABCA-306', title: 'Add a newsletter signup section', child_task_id: 't2' }, + { sub_issue_id: 'orch_x__integration', title: 'Integration — combine sub-issue results', child_task_id: 't3' }, +]; + +describe('parseParentNodeReference (#247 UX.18 — parent comment → sub-issue)', () => { + test('the live case: "for the footer change it to ..." → ABCA-305 only', () => { + const r = parseParentNodeReference('for the footer can you change it to "unforgettable memories await you"', NODES); + expect(r.reason).toBeNull(); + expect(r.matches).toHaveLength(1); + expect(r.matches[0].linear_identifier).toBe('ABCA-305'); + }); + + test('keyword "newsletter" → ABCA-306 only', () => { + const r = parseParentNodeReference('tweak the newsletter copy please', NODES); + expect(r.reason).toBeNull(); + expect(r.matches[0].linear_identifier).toBe('ABCA-306'); + }); + + test('Linear identifier wins outright (even alongside a keyword for another node)', () => { + const r = parseParentNodeReference('ABCA-306 also mention the footer somewhere', NODES); + expect(r.reason).toBeNull(); + expect(r.matches).toHaveLength(1); + expect(r.matches[0].linear_identifier).toBe('ABCA-306'); + }); + + test('identifier is case-insensitive', () => { + const r = parseParentNodeReference('abca-305: bump the year', NODES); + expect(r.matches[0].linear_identifier).toBe('ABCA-305'); + }); + + test('no node referenced → reason "none"', () => { + const r = parseParentNodeReference('looks great, thanks!', NODES); + expect(r.reason).toBe('none'); + expect(r.matches).toHaveLength(0); + }); + + test('a keyword common to two titles → ambiguous (not a silent pick)', () => { + const nodes: ParentCommentNode[] = [ + { sub_issue_id: 'a', linear_identifier: 'ABCA-1', title: 'Add a pricing banner' }, + { sub_issue_id: 'b', linear_identifier: 'ABCA-2', title: 'Add a pricing table' }, + ]; + const r = parseParentNodeReference('update the pricing wording', nodes); + expect(r.reason).toBe('ambiguous'); + expect(r.matches).toHaveLength(2); + }); + + test('two identifiers named → ambiguous', () => { + const r = parseParentNodeReference('ABCA-305 and ABCA-306 both need the new tagline', NODES); + expect(r.reason).toBe('ambiguous'); + expect(r.matches).toHaveLength(2); + }); + + test('noise-only overlap does NOT match (e.g. "add", "page", "section")', () => { + // "add a section" shares only noise words with the titles → no match. + const r = parseParentNodeReference('please add a section somewhere', NODES); + expect(r.reason).toBe('none'); + expect(r.matches).toHaveLength(0); + }); + + test('integration node only matches on an explicit "integration"/"combined" mention', () => { + const r1 = parseParentNodeReference('check the integration result', NODES); + expect(r1.reason).toBeNull(); + expect(r1.matches[0].sub_issue_id).toBe('orch_x__integration'); + // A generic word from its title ("results") must NOT pull it in. + const r2 = parseParentNodeReference('the results look off', NODES); + expect(r2.matches.some((m) => m.sub_issue_id === 'orch_x__integration')).toBe(false); + }); + + test('empty / whitespace instruction → none', () => { + expect(parseParentNodeReference('', NODES).reason).toBe('none'); + expect(parseParentNodeReference(' ', NODES).reason).toBe('none'); + }); +}); + +describe('suggestClosestNode', () => { + test('returns the single best title-overlap node', () => { + // "footers" won't exact-match (plural) but "footer" stem won't either; + // use a word that overlaps a significant title word. + const s = suggestClosestNode('the newsletter box looks cramped', NODES); + expect(s?.linear_identifier).toBe('ABCA-306'); + }); + + test('returns null when nothing overlaps', () => { + expect(suggestClosestNode('ship it', NODES)).toBeNull(); + }); + + test('never suggests the integration node', () => { + const s = suggestClosestNode('the combined integration result', NODES); + expect(s).toBeNull(); // integration excluded from suggestions + }); +}); + +describe('renderParentDisambiguationReply', () => { + test('lists the REAL sub-issues (not the integration node) + how to target one + new-work path', () => { + const body = renderParentDisambiguationReply('none', NODES); + expect(body).toContain('ABCA-305 — Add a site-wide footer'); + expect(body).toContain('ABCA-306 — Add a newsletter signup section'); + expect(body).not.toContain('Integration — combine'); // synthetic node hidden + expect(body).toContain('@bgagent ABCA-123:'); // the how-to hint + expect(body.toLowerCase()).toContain('new work'); // the create-a-sub-issue path + expect(body).toContain('`abca` label'); + }); + + test('surfaces a "did you mean" suggestion when provided', () => { + const body = renderParentDisambiguationReply('none', NODES, NODES[0]); + expect(body).toContain('Did you mean **ABCA-305 — Add a site-wide footer**?'); + expect(body).toContain('@bgagent ABCA-305:'); + }); + + test('ambiguous vs none give different lead copy', () => { + expect(renderParentDisambiguationReply('ambiguous', NODES)).toContain('more than one'); + expect(renderParentDisambiguationReply('none', NODES)).toContain("couldn't tell"); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-reconcile.test.ts b/cdk/test/handlers/shared/orchestration-reconcile.test.ts new file mode 100644 index 00000000..8c31cda2 --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-reconcile.test.ts @@ -0,0 +1,176 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { + computeReconcilePlan, + type ReconcileChild, + type TerminalOutcome, +} from '../../../src/handlers/shared/orchestration-reconcile'; +import type { ChildStatus } from '../../../src/handlers/shared/orchestration-store'; + +const row = ( + sub_issue_id: string, + child_status: ChildStatus, + depends_on: string[] = [], +): ReconcileChild => ({ sub_issue_id, depends_on, child_status }); + +/** Helper: map sub_issue_id → new status from a plan's updates. */ +function updatesById(plan: ReturnType<typeof computeReconcilePlan>): Record<string, ChildStatus> { + return Object.fromEntries(plan.statusUpdates.map((u) => [u.sub_issue_id, u.child_status])); +} + +describe('computeReconcilePlan — success releases dependents', () => { + test('A succeeds → releases its blocked dependent B', () => { + const children = [row('A', 'released'), row('B', 'blocked', ['A'])]; + const outcome: TerminalOutcome = { sub_issue_id: 'A', status: 'COMPLETED' }; + const plan = computeReconcilePlan(outcome, children); + + expect(plan.terminalSucceeded).toBe(true); + expect(updatesById(plan).A).toBe('succeeded'); + expect(plan.toRelease).toEqual(['B']); + }); + + test('linear chain: A succeeds releases B but NOT C (C still blocked on B)', () => { + const children = [ + row('A', 'released'), + row('B', 'blocked', ['A']), + row('C', 'blocked', ['B']), + ]; + const plan = computeReconcilePlan({ sub_issue_id: 'A', status: 'COMPLETED' }, children); + expect(plan.toRelease).toEqual(['B']); + }); + + test('COMPLETED with build_passed=true is a success', () => { + const children = [row('A', 'released'), row('B', 'blocked', ['A'])]; + const plan = computeReconcilePlan({ sub_issue_id: 'A', status: 'COMPLETED', build_passed: true }, children); + expect(plan.terminalSucceeded).toBe(true); + expect(plan.toRelease).toEqual(['B']); + }); + + test('build_passed undefined still counts as success (legacy records)', () => { + const children = [row('A', 'released'), row('B', 'blocked', ['A'])]; + const plan = computeReconcilePlan({ sub_issue_id: 'A', status: 'COMPLETED' }, children); + expect(plan.terminalSucceeded).toBe(true); + }); +}); + +describe('computeReconcilePlan — case 1: COMPLETED but build failed', () => { + test('build_passed=false is NOT a success; dependents are skipped', () => { + const children = [row('A', 'released'), row('B', 'blocked', ['A'])]; + const plan = computeReconcilePlan({ sub_issue_id: 'A', status: 'COMPLETED', build_passed: false }, children); + + expect(plan.terminalSucceeded).toBe(false); + expect(updatesById(plan).A).toBe('failed'); + expect(plan.toRelease).toEqual([]); + expect(updatesById(plan).B).toBe('skipped'); + }); +}); + +describe('computeReconcilePlan — case 2: diamond needs ALL predecessors', () => { + test('D depends on B+C; B succeeds while C still running → D NOT released', () => { + const children = [ + row('B', 'released'), + row('C', 'released'), // C's task is running, not yet succeeded + row('D', 'blocked', ['B', 'C']), + ]; + const plan = computeReconcilePlan({ sub_issue_id: 'B', status: 'COMPLETED' }, children); + expect(plan.toRelease).toEqual([]); // C hasn't succeeded yet + }); + + test('D released only once BOTH B and C have succeeded', () => { + // C is the last to finish; B already succeeded. + const children = [ + row('B', 'succeeded'), + row('C', 'released'), + row('D', 'blocked', ['B', 'C']), + ]; + const plan = computeReconcilePlan({ sub_issue_id: 'C', status: 'COMPLETED' }, children); + expect(plan.toRelease).toEqual(['D']); + }); + + test('diamond with a failed leg: C fails → D skipped even though B succeeded', () => { + const children = [ + row('B', 'succeeded'), + row('C', 'released'), + row('D', 'blocked', ['B', 'C']), + ]; + const plan = computeReconcilePlan({ sub_issue_id: 'C', status: 'FAILED' }, children); + expect(updatesById(plan).C).toBe('failed'); + expect(updatesById(plan).D).toBe('skipped'); + expect(plan.toRelease).toEqual([]); + }); +}); + +describe('computeReconcilePlan — transitive skip + sibling isolation', () => { + test('A fails → B (dep A) and C (dep B) both skipped; independent D untouched', () => { + const children = [ + row('A', 'released'), + row('B', 'blocked', ['A']), + row('C', 'blocked', ['B']), + row('D', 'blocked'), // independent root that hasn't started + ]; + const plan = computeReconcilePlan({ sub_issue_id: 'A', status: 'FAILED' }, children); + const u = updatesById(plan); + expect(u.A).toBe('failed'); + expect(u.B).toBe('skipped'); + expect(u.C).toBe('skipped'); + expect(u.D).toBeUndefined(); // independent sibling not touched + }); + + test('CANCELLED and TIMED_OUT are failures for gating', () => { + for (const status of ['CANCELLED', 'TIMED_OUT'] as const) { + const children = [row('A', 'released'), row('B', 'blocked', ['A'])]; + const plan = computeReconcilePlan({ sub_issue_id: 'A', status }, children); + expect(plan.terminalSucceeded).toBe(false); + expect(updatesById(plan).B).toBe('skipped'); + } + }); + + test('does not skip a dependent that already started (released)', () => { + // B is already released (its task is running) when A fails — leave it + // to its own terminal event; do not retroactively skip. + const children = [row('A', 'released'), row('B', 'released', ['A'])]; + const plan = computeReconcilePlan({ sub_issue_id: 'A', status: 'FAILED' }, children); + expect(updatesById(plan).B).toBeUndefined(); + }); +}); + +describe('computeReconcilePlan — orchestrationComplete', () => { + test('true when the last child reaches terminal', () => { + const children = [row('A', 'succeeded'), row('B', 'released', ['A'])]; + const plan = computeReconcilePlan({ sub_issue_id: 'B', status: 'COMPLETED' }, children); + expect(plan.orchestrationComplete).toBe(true); + }); + + test('false while a released sibling is still running', () => { + const children = [ + row('A', 'released'), + row('B', 'released'), // independent, still running + ]; + const plan = computeReconcilePlan({ sub_issue_id: 'A', status: 'COMPLETED' }, children); + expect(plan.orchestrationComplete).toBe(false); + }); + + test('true when a failure skips all remaining work', () => { + const children = [row('A', 'released'), row('B', 'blocked', ['A'])]; + const plan = computeReconcilePlan({ sub_issue_id: 'A', status: 'FAILED' }, children); + // A→failed, B→skipped → all terminal. + expect(plan.orchestrationComplete).toBe(true); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-release.test.ts b/cdk/test/handlers/shared/orchestration-release.test.ts new file mode 100644 index 00000000..15870fef --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-release.test.ts @@ -0,0 +1,391 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { UpdateCommand } from '@aws-sdk/lib-dynamodb'; +import { + readConcurrencyBudget, + releaseChild, + releaseReadyChildren, +} from '../../../src/handlers/shared/orchestration-release'; +import { deriveOrchestrationId, type OrchestrationChildRow } from '../../../src/handlers/shared/orchestration-store'; +import { isValidIdempotencyKey } from '../../../src/handlers/shared/validation'; + +jest.mock('../../../src/handlers/shared/logger', () => ({ + logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn() }, +})); + +const NOW = '2026-06-09T12:00:00.000Z'; + +function makeRow(overrides: Partial<OrchestrationChildRow> = {}): OrchestrationChildRow { + return { + orchestration_id: 'orch_abc', + sub_issue_id: 'SUB-1', + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + repo: 'owner/repo', + depends_on: [], + child_status: 'ready', + linear_identifier: 'ENG-1', + title: 'Build the thing', + created_at: NOW, + updated_at: NOW, + ...overrides, + }; +} + +function created(taskId: string) { + return jest.fn().mockResolvedValue({ statusCode: 201, body: JSON.stringify({ data: { task_id: taskId } }) }); +} + +describe('releaseChild — idempotency key is accepted by the REAL validator', () => { + // Regression: the key was originally `${orchestration_id}#${sub_issue_id}`, + // but createTaskCore validates against /^[a-zA-Z0-9_-]{1,128}$/ — the '#' + // was rejected with a 400 and the child silently never started. Mocked + // createTaskCore tests didn't catch it; this asserts the generated key + // against the actual validator with production-shaped ids. + test('generated key passes isValidIdempotencyKey for real-world ids', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = created('T-1'); + const realRow = makeRow({ + // orch_<32 hex> — exactly what deriveOrchestrationId produces. + orchestration_id: deriveOrchestrationId('d27fcf21-4876-4be2-96c0-78099bf152de'), + // sub_issue_id is a Linear UUID in production. + sub_issue_id: 'a00650a1-4b97-46a3-9977-baede9a8f001', + }); + + await releaseChild({ + ddb: ddb as never, + tableName: 'OrchestrationTable', + row: realRow, + platformUserId: 'user-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + + const ctx = createTaskCore.mock.calls[0][1]; + expect(isValidIdempotencyKey(ctx.idempotencyKey)).toBe(true); + expect(ctx.idempotencyKey).not.toContain('#'); + }); + + test('key stays within the 128-char limit for max-length ids', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = created('T-1'); + await releaseChild({ + ddb: ddb as never, + tableName: 'OrchestrationTable', + row: makeRow({ + orchestration_id: deriveOrchestrationId('x'.repeat(64)), + sub_issue_id: 'a00650a1-4b97-46a3-9977-baede9a8f001', + }), + platformUserId: 'user-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + const ctx = createTaskCore.mock.calls[0][1]; + expect(ctx.idempotencyKey.length).toBeLessThanOrEqual(128); + expect(isValidIdempotencyKey(ctx.idempotencyKey)).toBe(true); + }); +}); + +describe('releaseChild — happy path', () => { + test('creates a task and flips the row to released', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = created('T-100'); + + const result = await releaseChild({ + ddb: ddb as never, + tableName: 'OrchestrationTable', + row: makeRow(), + platformUserId: 'user-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + + expect(result).toEqual({ kind: 'released', taskId: 'T-100' }); + + // createTaskCore called with linear channel + orchestration metadata + idempotency key. + const [body, ctx, requestId] = createTaskCore.mock.calls[0]; + expect(body).toMatchObject({ repo: 'owner/repo' }); + expect(body.task_description).toContain('ENG-1'); + expect(ctx).toMatchObject({ + userId: 'user-1', + channelSource: 'linear', + idempotencyKey: 'orch_abc_SUB-1', + }); + expect(ctx.channelMetadata).toMatchObject({ + orchestration_id: 'orch_abc', + orchestration_sub_issue_id: 'SUB-1', + parent_linear_issue_id: 'PARENT', + }); + expect(requestId).toBe('orch_abc_SUB-1'); + + // Conditional update flips status + stamps task id. + const update = ddb.send.mock.calls[0][0] as UpdateCommand; + expect(update).toBeInstanceOf(UpdateCommand); + expect(update.input.ConditionExpression).toContain('child_status IN'); + expect(update.input.ExpressionAttributeValues![':tid']).toBe('T-100'); + expect(update.input.ExpressionAttributeValues![':released']).toBe('released'); + }); + + test('defaults channelSource to linear when omitted (#247 back-compat)', async () => { + const createTaskCore = created('T-def'); + await releaseChild({ + ddb: { send: jest.fn().mockResolvedValue({}) } as never, + tableName: 'OrchestrationTable', + row: makeRow(), + platformUserId: 'user-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + expect(createTaskCore.mock.calls[0][1].channelSource).toBe('linear'); + }); + + test('threads an explicit channelSource onto the child task (#247 trigger-agnostic)', async () => { + const createTaskCore = created('T-ch'); + await releaseChild({ + ddb: { send: jest.fn().mockResolvedValue({}) } as never, + tableName: 'OrchestrationTable', + row: makeRow(), + platformUserId: 'user-1', + channelSource: 'webhook', + createTaskCore: createTaskCore as never, + now: NOW, + }); + expect(createTaskCore.mock.calls[0][1].channelSource).toBe('webhook'); + }); + + test('threads Linear OAuth metadata when provided', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = created('T-1'); + await releaseChild({ + ddb: ddb as never, + tableName: 'OrchestrationTable', + row: makeRow(), + platformUserId: 'user-1', + linearOauthSecretArn: 'arn:secret', + linearWorkspaceSlug: 'acme', + linearProjectId: 'proj-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + const ctx = createTaskCore.mock.calls[0][1]; + expect(ctx.channelMetadata).toMatchObject({ + linear_oauth_secret_arn: 'arn:secret', + linear_workspace_slug: 'acme', + linear_project_id: 'proj-1', + }); + }); + + test('treats 200 idempotent replay as success', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = jest.fn().mockResolvedValue({ + statusCode: 200, + body: JSON.stringify({ data: { task_id: 'T-existing' } }), + }); + const result = await releaseChild({ + ddb: ddb as never, + tableName: 'OrchestrationTable', + row: makeRow(), + platformUserId: 'user-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + expect(result).toEqual({ kind: 'released', taskId: 'T-existing' }); + }); +}); + +describe('releaseChild — idempotency + failure', () => { + test('ConditionalCheckFailed on the flip → already_released (no throw)', async () => { + const conditionalErr = Object.assign(new Error('conditional'), { name: 'ConditionalCheckFailedException' }); + const ddb = { send: jest.fn().mockRejectedValue(conditionalErr) }; + const createTaskCore = created('T-1'); + + const result = await releaseChild({ + ddb: ddb as never, + tableName: 'OrchestrationTable', + row: makeRow(), + platformUserId: 'user-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + expect(result).toEqual({ kind: 'already_released' }); + }); + + test('createTaskCore non-success → create_failed, no row update', async () => { + const ddb = { send: jest.fn() }; + const createTaskCore = jest.fn().mockResolvedValue({ statusCode: 503, body: '{"error":{"message":"down"}}' }); + + const result = await releaseChild({ + ddb: ddb as never, + tableName: 'OrchestrationTable', + row: makeRow(), + platformUserId: 'user-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + expect(result.kind).toBe('create_failed'); + if (result.kind === 'create_failed') expect(result.statusCode).toBe(503); + expect(ddb.send).not.toHaveBeenCalled(); + }); + + test('createTaskCore throw → error', async () => { + const ddb = { send: jest.fn() }; + const createTaskCore = jest.fn().mockRejectedValue(new Error('boom')); + const result = await releaseChild({ + ddb: ddb as never, + tableName: 'OrchestrationTable', + row: makeRow(), + platformUserId: 'user-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + expect(result.kind).toBe('error'); + expect(ddb.send).not.toHaveBeenCalled(); + }); + + test('non-conditional DDB error on flip → error', async () => { + const ddb = { send: jest.fn().mockRejectedValue(new Error('throttle')) }; + const createTaskCore = created('T-1'); + const result = await releaseChild({ + ddb: ddb as never, + tableName: 'OrchestrationTable', + row: makeRow(), + platformUserId: 'user-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + expect(result.kind).toBe('error'); + }); + + test('falls back to sub_issue_id in description when title absent', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = created('T-1'); + await releaseChild({ + ddb: ddb as never, + tableName: 'OrchestrationTable', + row: makeRow({ title: undefined, linear_identifier: undefined }), + platformUserId: 'user-1', + createTaskCore: createTaskCore as never, + now: NOW, + }); + expect(createTaskCore.mock.calls[0][0].task_description).toContain('SUB-1'); + }); +}); + +describe('releaseReadyChildren — #331 concurrency throttle', () => { + // 5 ready leaves, all roots (no deps) so base selection is trivial. + const readyRows = (n: number): OrchestrationChildRow[] => + Array.from({ length: n }, (_, i) => + makeRow({ sub_issue_id: `L${String(i).padStart(2, '0')}`, child_status: 'ready', depends_on: [] })); + + function createOk() { + let i = 0; + return jest.fn().mockImplementation(() => + Promise.resolve({ statusCode: 201, body: JSON.stringify({ data: { task_id: `T-${i++}` } }) })); + } + + test('undefined budget → releases ALL ready children (back-compat)', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = createOk(); + const results = await releaseReadyChildren( + ddb as never, 'OrchTable', readyRows(5), { platform_user_id: 'u1' } as never, + createTaskCore as never, NOW, readyRows(5), 'main', undefined, + ); + expect(results.filter((r) => r.kind === 'released')).toHaveLength(5); + expect(createTaskCore).toHaveBeenCalledTimes(5); + }); + + test('budget caps the number released; the rest are NOT created (no fail)', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = createOk(); + const rows = readyRows(5); + const results = await releaseReadyChildren( + ddb as never, 'OrchTable', rows, { platform_user_id: 'u1' } as never, + createTaskCore as never, NOW, rows, 'main', 2, // budget = 2 free slots + ); + // Only 2 tasks created — the other 3 are simply not released this pass. + expect(createTaskCore).toHaveBeenCalledTimes(2); + expect(results).toHaveLength(2); + expect(results.every((r) => r.kind === 'released')).toBe(true); + }); + + test('budget 0 → releases nothing this pass (no tasks created, no failures)', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = createOk(); + const rows = readyRows(5); + const results = await releaseReadyChildren( + ddb as never, 'OrchTable', rows, { platform_user_id: 'u1' } as never, + createTaskCore as never, NOW, rows, 'main', 0, + ); + expect(createTaskCore).not.toHaveBeenCalled(); + expect(results).toHaveLength(0); + }); + + test('negative budget is treated as 0 (releases nothing)', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = createOk(); + const rows = readyRows(3); + await releaseReadyChildren( + ddb as never, 'OrchTable', rows, { platform_user_id: 'u1' } as never, + createTaskCore as never, NOW, rows, 'main', -4, + ); + expect(createTaskCore).not.toHaveBeenCalled(); + }); + + test('release order is deterministic by sub_issue_id when throttled', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + const createTaskCore = createOk(); + // Shuffled input; budget 2 should pick L00, L01 (sorted), not input order. + const rows = [makeRow({ sub_issue_id: 'L02', child_status: 'ready' }), + makeRow({ sub_issue_id: 'L00', child_status: 'ready' }), + makeRow({ sub_issue_id: 'L01', child_status: 'ready' })]; + const results = await releaseReadyChildren( + ddb as never, 'OrchTable', rows, { platform_user_id: 'u1' } as never, + createTaskCore as never, NOW, rows, 'main', 2, + ); + expect(results).toHaveLength(2); + // The two UpdateCommands that flip ready→released name L00 then L01. + const releasedSubs = (ddb.send.mock.calls as { 0: { input?: { Key?: { sub_issue_id?: string } } } }[]) + .map((c) => c[0]?.input?.Key?.sub_issue_id) + .filter(Boolean); + expect(releasedSubs).toEqual(['L00', 'L01']); + }); +}); + +describe('readConcurrencyBudget — #331', () => { + test('free budget = cap - active_count', async () => { + const ddb = { send: jest.fn().mockResolvedValue({ Item: { active_count: 3 } }) }; + expect(await readConcurrencyBudget(ddb as never, 'ConcTable', 'u1', 10)).toBe(7); + }); + + test('no row yet → full cap available', async () => { + const ddb = { send: jest.fn().mockResolvedValue({}) }; + expect(await readConcurrencyBudget(ddb as never, 'ConcTable', 'u1', 10)).toBe(10); + }); + + test('at cap → 0 (never negative)', async () => { + const ddb = { send: jest.fn().mockResolvedValue({ Item: { active_count: 12 } }) }; + expect(await readConcurrencyBudget(ddb as never, 'ConcTable', 'u1', 10)).toBe(0); + }); + + test('read error → degrades to full cap (admission still gates)', async () => { + const ddb = { send: jest.fn().mockRejectedValue(new Error('ddb down')) }; + expect(await readConcurrencyBudget(ddb as never, 'ConcTable', 'u1', 10)).toBe(10); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-restack.test.ts b/cdk/test/handlers/shared/orchestration-restack.test.ts new file mode 100644 index 00000000..a0b9568d --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-restack.test.ts @@ -0,0 +1,159 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { planDirectRestack, planRestack } from '../../../src/handlers/shared/orchestration-restack'; +import type { OrchestrationChildRow } from '../../../src/handlers/shared/orchestration-store'; + +/** Build a child row. `started` → released with a branch; else blocked. */ +function row( + sub: string, + deps: string[] = [], + opts: { started?: boolean; status?: string } = {}, +): OrchestrationChildRow { + const started = opts.started ?? true; + return { + orchestration_id: 'orch_1', + sub_issue_id: sub, + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + repo: 'o/r', + depends_on: deps, + child_status: (opts.status ?? (started ? 'released' : 'blocked')) as never, + created_at: 'now', + updated_at: 'now', + ...(started && { child_task_id: `task-${sub}`, child_branch_name: `branch-${sub}` }), + }; +} + +describe('planRestack', () => { + test('linear chain A→B→C, A changes → re-stack B then C (topo order)', () => { + const steps = planRestack([row('A'), row('B', ['A']), row('C', ['B'])], 'A'); + expect(steps.map((s) => s.child.sub_issue_id)).toEqual(['B', 'C']); + // B merges A's branch; C merges B's branch (both in scope). + expect(steps[0].mergeBranches).toEqual(['branch-A']); + expect(steps[1].mergeBranches).toEqual(['branch-B']); + }); + + test('the changed node itself is never re-stacked', () => { + const steps = planRestack([row('A'), row('B', ['A'])], 'A'); + expect(steps.map((s) => s.child.sub_issue_id)).not.toContain('A'); + }); + + test('only STARTED dependents are re-stacked; blocked ones are skipped', () => { + // A changed; B started (released), C still blocked. + const steps = planRestack([row('A'), row('B', ['A']), row('C', ['B'], { started: false })], 'A'); + expect(steps.map((s) => s.child.sub_issue_id)).toEqual(['B']); // C will get fresh code on its first release + }); + + test('diamond A→{B,C}→D, A changes → B, C, then D (D merges both updated preds)', () => { + const steps = planRestack( + [row('A'), row('B', ['A']), row('C', ['A']), row('D', ['B', 'C'])], + 'A', + ); + const ids = steps.map((s) => s.child.sub_issue_id); + expect(ids).toContain('B'); + expect(ids).toContain('C'); + expect(ids[ids.length - 1]).toBe('D'); // D is last (depends on B + C) + const dStep = steps.find((s) => s.child.sub_issue_id === 'D')!; + expect([...dStep.mergeBranches].sort()).toEqual(['branch-B', 'branch-C']); + }); + + test('mid-chain change A→B→C→D, C changes → only D re-stacks', () => { + const steps = planRestack( + [row('A'), row('B', ['A']), row('C', ['B']), row('D', ['C'])], + 'C', + ); + expect(steps.map((s) => s.child.sub_issue_id)).toEqual(['D']); + expect(steps[0].mergeBranches).toEqual(['branch-C']); + }); + + test('changed node with no dependents → empty plan', () => { + expect(planRestack([row('A'), row('B', ['A'])], 'B')).toEqual([]); + }); + + test('unknown changed node → empty plan', () => { + expect(planRestack([row('A')], 'nonexistent')).toEqual([]); + }); + + test('a re-stack with no resolvable predecessor branch is dropped', () => { + // B depends on A, but A somehow has no branch — nothing to merge. + const a = { ...row('A'), child_branch_name: undefined }; + const steps = planRestack([a, row('B', ['A'])], 'A'); + expect(steps).toEqual([]); // B's only predecessor (A) has no branch → no merge → dropped + }); +}); + +describe('planDirectRestack (reconciler cascade — one hop)', () => { + test('linear chain A→B→C, A changes → re-stacks ONLY B (its direct dependent)', () => { + // C is NOT re-stacked now — it cascades when B's restack task completes. + const steps = planDirectRestack([row('A'), row('B', ['A']), row('C', ['B'])], 'A'); + expect(steps.map((s) => s.child.sub_issue_id)).toEqual(['B']); + expect(steps[0].mergeBranches).toEqual(['branch-A']); + }); + + test('next hop: B changes → re-stacks ONLY C', () => { + const steps = planDirectRestack([row('A'), row('B', ['A']), row('C', ['B'])], 'B'); + expect(steps.map((s) => s.child.sub_issue_id)).toEqual(['C']); + expect(steps[0].mergeBranches).toEqual(['branch-B']); + }); + + test('diamond A→{B,C}→D, A changes → re-stacks B and C (both direct), NOT D', () => { + const steps = planDirectRestack( + [row('A'), row('B', ['A']), row('C', ['A']), row('D', ['B', 'C'])], 'A', + ); + expect(steps.map((s) => s.child.sub_issue_id)).toEqual(['B', 'C']); + }); + + test('diamond fan-in: B changes → D re-stacks merging BOTH arms (B + C current branches)', () => { + const steps = planDirectRestack( + [row('A'), row('B', ['A']), row('C', ['A']), row('D', ['B', 'C'])], 'B', + ); + expect(steps.map((s) => s.child.sub_issue_id)).toEqual(['D']); + expect([...steps[0].mergeBranches].sort()).toEqual(['branch-B', 'branch-C']); + }); + + test('changed node itself is never in the plan', () => { + const steps = planDirectRestack([row('A'), row('B', ['A'])], 'A'); + expect(steps.map((s) => s.child.sub_issue_id)).not.toContain('A'); + }); + + test('only STARTED direct dependents are re-stacked', () => { + const steps = planDirectRestack([row('A'), row('B', ['A'], { started: false })], 'A'); + expect(steps).toEqual([]); // B not started → gets fresh code on its first release + }); + + test('changed node with no dependents → empty', () => { + expect(planDirectRestack([row('A'), row('B', ['A'])], 'B')).toEqual([]); + }); + + test('unknown changed node → empty', () => { + expect(planDirectRestack([row('A')], 'nope')).toEqual([]); + }); + + test('direct dependent whose every predecessor lacks a branch is dropped', () => { + const a = { ...row('A'), child_branch_name: undefined }; + expect(planDirectRestack([a, row('B', ['A'])], 'A')).toEqual([]); + }); + + test('does NOT recurse: grandchild is untouched even when started', () => { + // A→B→C all started; A changes → only B (C waits for B to finish). + const steps = planDirectRestack([row('A'), row('B', ['A']), row('C', ['B'])], 'A'); + expect(steps.map((s) => s.child.sub_issue_id)).not.toContain('C'); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-rollup.test.ts b/cdk/test/handlers/shared/orchestration-rollup.test.ts new file mode 100644 index 00000000..6424f1a9 --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-rollup.test.ts @@ -0,0 +1,553 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +const postIssueCommentMock = jest.fn(); +const transitionIssueStateMock = jest.fn(); +const swapIssueReactionMock = jest.fn(); +const upsertStatusCommentMock = jest.fn(); +jest.mock('../../../src/handlers/shared/linear-feedback', () => ({ + postIssueComment: (...args: unknown[]) => postIssueCommentMock(...args), + transitionIssueState: (...args: unknown[]) => transitionIssueStateMock(...args), + swapIssueReaction: (...args: unknown[]) => swapIssueReactionMock(...args), + upsertStatusComment: (...args: unknown[]) => upsertStatusCommentMock(...args), + EMOJI_SUCCESS: 'white_check_mark', + EMOJI_FAILURE: 'x', +})); +const loggerMock = { info: jest.fn(), warn: jest.fn(), error: jest.fn() }; +jest.mock('../../../src/handlers/shared/logger', () => ({ logger: loggerMock })); + +import { ORCH_LOG } from '../../../src/handlers/shared/orchestration-log-events'; +import { + renderRollupComment, + renderStatusBlock, + renderEpicPanel, + truncateQuote, + cascadeNodeLabel, + rollupKindFromChildren, + postRollup, + type RollupChildView, + type EpicPanelRow, +} from '../../../src/handlers/shared/orchestration-rollup'; +import type { OrchestrationChildRow } from '../../../src/handlers/shared/orchestration-store'; + +const view = (sub: string, status: string, ident?: string, title?: string, pr_url?: string): RollupChildView => ({ + sub_issue_id: sub, + child_status: status, + ...(ident && { linear_identifier: ident }), + ...(title && { title }), + ...(pr_url && { pr_url }), +}); + +describe('renderRollupComment', () => { + test('complete: all succeeded → completion heading + counts', () => { + const body = renderRollupComment('complete', [ + view('a', 'succeeded', 'ENG-1', 'Step A'), + view('b', 'succeeded', 'ENG-2', 'Step B'), + ]); + expect(body).toContain('orchestration complete'); + expect(body).toContain('2 succeeded, 0 failed, 0 skipped'); + expect(body).toContain('✅ ENG-1: Step A'); + }); + + test('partial_failure: shows failed + skipped with icons + summary', () => { + const body = renderRollupComment('partial_failure', [ + view('a', 'failed', 'ENG-1'), + view('b', 'skipped', 'ENG-2'), + view('c', 'succeeded', 'ENG-3'), + ]); + expect(body).toContain('finished with failures'); + expect(body).toContain('1 succeeded, 1 failed, 1 skipped'); + expect(body).toContain('❌ ENG-1'); + expect(body).toContain('⏭️ ENG-2'); + }); + + test('cancelled: cancellation heading', () => { + const body = renderRollupComment('cancelled', [view('a', 'failed', 'ENG-1')]); + expect(body).toContain('cancelled'); + }); + + test('children are sorted by identifier (deterministic comment)', () => { + const body = renderRollupComment('complete', [ + view('z', 'succeeded', 'ENG-9'), + view('a', 'succeeded', 'ENG-1'), + ]); + expect(body.indexOf('ENG-1')).toBeLessThan(body.indexOf('ENG-9')); + }); + + // #323: per-child PR links + integration-node combined-PR callout. + test('renders a PR link on a child line when pr_url is present', () => { + const body = renderRollupComment('complete', [ + view('a', 'succeeded', 'ENG-1', 'Step A', 'https://github.com/o/r/pull/10'), + view('b', 'succeeded', 'ENG-2', 'Step B'), // no PR + ]); + expect(body).toContain('✅ ENG-1: Step A — succeeded — [PR](https://github.com/o/r/pull/10)'); + // A child without a PR renders no link (no broken markdown). + expect(body).toContain('✅ ENG-2: Step B — succeeded'); + expect(body).not.toContain('ENG-2: Step B — succeeded — [PR]'); + }); + + test('surfaces the integration node combined PR as a prominent callout', () => { + const body = renderRollupComment('complete', [ + view('a', 'succeeded', 'ENG-1', 'Leaf A', 'https://github.com/o/r/pull/1'), + view('b', 'succeeded', 'ENG-2', 'Leaf B', 'https://github.com/o/r/pull/2'), + view('orch_x__integration', 'succeeded', undefined, 'Integration — combine sub-issue results', 'https://github.com/o/r/pull/9'), + ]); + expect(body).toContain('🔗 **Combined PR (all sub-issues merged):** [https://github.com/o/r/pull/9](https://github.com/o/r/pull/9)'); + // The callout appears BEFORE the per-child list. + expect(body.indexOf('Combined PR')).toBeLessThan(body.indexOf('ENG-1')); + }); + + test('no combined-PR callout when the integration node opened no PR', () => { + const body = renderRollupComment('partial_failure', [ + view('a', 'succeeded', 'ENG-1', 'Leaf A', 'https://github.com/o/r/pull/1'), + view('orch_x__integration', 'skipped', undefined, 'Integration — combine sub-issue results'), // no PR (skipped) + ]); + expect(body).not.toContain('Combined PR'); + }); + + test('no combined-PR callout for a plain chain (no integration node)', () => { + const body = renderRollupComment('complete', [ + view('a', 'succeeded', 'ENG-1', 'A', 'https://github.com/o/r/pull/1'), + view('b', 'succeeded', 'ENG-2', 'B', 'https://github.com/o/r/pull/2'), + ]); + expect(body).not.toContain('Combined PR'); + }); +}); + +describe('renderStatusBlock (#3 live status)', () => { + test('header shows N/M complete (terminal children only)', () => { + const body = renderStatusBlock([ + view('a', 'succeeded', 'ENG-1', 'Guide'), + view('b', 'released', 'ENG-2', 'Cards'), + view('c', 'blocked', 'ENG-3', 'Quiz'), + ]); + expect(body).toContain('1/3 complete'); + expect(body).toContain('🔄 **ABCA orchestration**'); + }); + + test('maps in-flight statuses to human words (running / blocked)', () => { + const body = renderStatusBlock([ + view('a', 'released', 'ENG-1', 'A'), + view('b', 'blocked', 'ENG-2', 'B'), + ]); + expect(body).toContain('ENG-1: A — running'); + expect(body).toContain('ENG-2: B — blocked'); + }); + + test('links a child PR in the live block when pr_url is known (#323)', () => { + const body = renderStatusBlock([ + view('a', 'released', 'ENG-1', 'A', 'https://github.com/o/r/pull/7'), + view('b', 'blocked', 'ENG-2', 'B'), + ]); + expect(body).toContain('ENG-1: A — running — [PR](https://github.com/o/r/pull/7)'); + expect(body).toContain('ENG-2: B — blocked'); + expect(body).not.toContain('ENG-2: B — blocked — [PR]'); + }); + + test('terminal statuses keep their word + icon', () => { + const body = renderStatusBlock([ + view('a', 'succeeded', 'ENG-1'), + view('b', 'failed', 'ENG-2'), + view('c', 'skipped', 'ENG-3'), + ]); + expect(body).toContain('✅ ENG-1 — succeeded'); + expect(body).toContain('❌ ENG-2 — failed'); + expect(body).toContain('⏭️ ENG-3 — skipped'); + expect(body).toContain('3/3 complete'); + }); + + test('children sorted by identifier (stable edit-in-place body)', () => { + const body = renderStatusBlock([view('z', 'released', 'ENG-9'), view('a', 'released', 'ENG-1')]); + expect(body.indexOf('ENG-1')).toBeLessThan(body.indexOf('ENG-9')); + }); +}); + +describe('rollupKindFromChildren', () => { + test('all succeeded → complete', () => { + expect(rollupKindFromChildren([view('a', 'succeeded'), view('b', 'succeeded')])).toBe('complete'); + }); + test('any failed → partial_failure', () => { + expect(rollupKindFromChildren([view('a', 'succeeded'), view('b', 'failed')])).toBe('partial_failure'); + }); + test('any skipped → partial_failure', () => { + expect(rollupKindFromChildren([view('a', 'succeeded'), view('b', 'skipped')])).toBe('partial_failure'); + }); +}); + +const row = (sub: string, status: string): OrchestrationChildRow => ({ + orchestration_id: 'orch_1', + sub_issue_id: sub, + parent_linear_issue_id: 'PARENT', + linear_workspace_id: 'WS', + repo: 'o/r', + depends_on: [], + child_status: status as never, + created_at: 'now', + updated_at: 'now', +}); + +describe('postRollup', () => { + beforeEach(() => { + postIssueCommentMock.mockReset(); + transitionIssueStateMock.mockReset().mockResolvedValue(true); + swapIssueReactionMock.mockReset().mockResolvedValue(true); + upsertStatusCommentMock.mockReset().mockResolvedValue('cmt-1'); + loggerMock.info.mockReset(); + loggerMock.warn.mockReset(); + }); + + test('success → posts comment + logs orch.rollup.posted', async () => { + postIssueCommentMock.mockResolvedValue({ ok: true }); + const ok = await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'complete', + children: [row('a', 'succeeded')], + }); + expect(ok).toBe(true); + expect(postIssueCommentMock).toHaveBeenCalledTimes(1); + // The stable log event automated tests grep for. + const posted = loggerMock.info.mock.calls.find((c) => c[1]?.event === ORCH_LOG.rollupPosted); + expect(posted).toBeDefined(); + expect(posted![1]).toMatchObject({ orchestration_id: 'orch_1', parent_linear_issue_id: 'PARENT', rollup_kind: 'complete' }); + }); + + test('complete → advances parent to In Review + ✅ reaction (mirrors children)', async () => { + postIssueCommentMock.mockResolvedValue({ ok: true }); + await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'complete', + children: [row('a', 'succeeded')], + }); + expect(transitionIssueStateMock).toHaveBeenCalledWith( + { linearWorkspaceId: 'WS', registryTableName: 'REG' }, 'PARENT', 'started', ['In Review'], + ); + expect(swapIssueReactionMock).toHaveBeenCalledWith( + { linearWorkspaceId: 'WS', registryTableName: 'REG' }, 'PARENT', 'white_check_mark', + ); + }); + + test('partial_failure → does NOT advance state, swaps to ❌ reaction', async () => { + postIssueCommentMock.mockResolvedValue({ ok: true }); + await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'partial_failure', + children: [row('a', 'failed')], + }); + expect(transitionIssueStateMock).not.toHaveBeenCalled(); + expect(swapIssueReactionMock).toHaveBeenCalledWith( + { linearWorkspaceId: 'WS', registryTableName: 'REG' }, 'PARENT', 'x', + ); + }); + + test('comment fails → does NOT transition state or react (state mirrors only on posted rollup)', async () => { + postIssueCommentMock.mockResolvedValue({ ok: false, retryable: false }); + await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'complete', + children: [row('a', 'succeeded')], + }); + expect(transitionIssueStateMock).not.toHaveBeenCalled(); + expect(swapIssueReactionMock).not.toHaveBeenCalled(); + }); + + test('post returns false → logs orch.rollup.failed, returns false', async () => { + postIssueCommentMock.mockResolvedValue({ ok: false, retryable: false }); + const ok = await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'partial_failure', + children: [row('a', 'failed')], + }); + expect(ok).toBe(false); + expect(loggerMock.warn.mock.calls.some((c) => c[1]?.event === ORCH_LOG.rollupFailed)).toBe(true); + }); + + test('non-linear channelSource → no Linear post/transition/reaction, returns false (#247 seam)', async () => { + const ok = await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'complete', + children: [row('a', 'succeeded')], + channelSource: 'slack', + }); + expect(ok).toBe(false); + expect(postIssueCommentMock).not.toHaveBeenCalled(); + expect(transitionIssueStateMock).not.toHaveBeenCalled(); + expect(swapIssueReactionMock).not.toHaveBeenCalled(); + }); + + test('explicit linear channelSource behaves like the default', async () => { + postIssueCommentMock.mockResolvedValue({ ok: true }); + const ok = await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'complete', + children: [row('a', 'succeeded')], + channelSource: 'linear', + }); + expect(ok).toBe(true); + expect(postIssueCommentMock).toHaveBeenCalledTimes(1); + }); + + test('with statusCommentId → EDITS the live block in place (no fresh comment) (#3)', async () => { + upsertStatusCommentMock.mockResolvedValue('cmt-1'); + const ok = await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'complete', + children: [row('a', 'succeeded')], + statusCommentId: 'cmt-1', + }); + expect(ok).toBe(true); + // Edited the existing comment; did NOT post a fresh one. + expect(upsertStatusCommentMock).toHaveBeenCalledWith( + { linearWorkspaceId: 'WS', registryTableName: 'REG' }, 'PARENT', expect.any(String), 'cmt-1', + ); + expect(postIssueCommentMock).not.toHaveBeenCalled(); + }); + + test('threads prUrls → rendered comment links child PRs + combined PR (#323)', async () => { + postIssueCommentMock.mockResolvedValue({ ok: true }); + await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'complete', + children: [row('a', 'succeeded'), row('orch_1__integration', 'succeeded')], + prUrls: { + a: 'https://github.com/o/r/pull/3', + orch_1__integration: 'https://github.com/o/r/pull/9', + }, + }); + const body = postIssueCommentMock.mock.calls[0][2] as string; + expect(body).toContain('[PR](https://github.com/o/r/pull/3)'); + expect(body).toContain('🔗 **Combined PR (all sub-issues merged):**'); + expect(body).toContain('https://github.com/o/r/pull/9'); + }); + + test('without statusCommentId → posts a fresh comment (back-compat)', async () => { + postIssueCommentMock.mockResolvedValue({ ok: true }); + await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'complete', + children: [row('a', 'succeeded')], + }); + expect(postIssueCommentMock).toHaveBeenCalledTimes(1); + expect(upsertStatusCommentMock).not.toHaveBeenCalled(); + }); + + test('post throws → swallowed, logs orch.rollup.failed, returns false', async () => { + postIssueCommentMock.mockRejectedValue(new Error('linear down')); + const ok = await postRollup({ + ctx: { linearWorkspaceId: 'WS', registryTableName: 'REG' }, + orchestrationId: 'orch_1', + parentLinearIssueId: 'PARENT', + kind: 'complete', + children: [row('a', 'succeeded')], + }); + expect(ok).toBe(false); + expect(loggerMock.warn.mock.calls.some((c) => c[1]?.event === ORCH_LOG.rollupFailed)).toBe(true); + }); +}); + +describe('truncateQuote', () => { + test('short text passes through, trimmed + whitespace-collapsed', () => { + expect(truncateQuote(' the button doesnt work ')).toBe('the button doesnt work'); + }); + test('long text is truncated with an ellipsis', () => { + const out = truncateQuote('a'.repeat(60), 40); + expect(out.length).toBe(40); + expect(out.endsWith('…')).toBe(true); + }); +}); + +describe('cascadeNodeLabel (#247 — short name inside the cascade reason)', () => { + test('integration node → "the integration" (not its raw synthetic title)', () => { + // Live-caught under UX.6 stress: the integration node title read clumsily + // in the possessive reason "Integration — combine sub-issue results's change". + const label = cascadeNodeLabel('orch_abc__integration', undefined, 'Integration — combine sub-issue results'); + expect(label).toBe('the integration'); + // Reads cleanly in the possessive: "the integration's change". + expect(`updating to include ${label}'s change`).toBe("updating to include the integration's change"); + }); + + test('real node prefers the Linear identifier', () => { + expect(cascadeNodeLabel('uuid-1', 'ABCA-42', 'Some title')).toBe('ABCA-42'); + }); + + test('real node with no identifier falls back to title, then a generic name', () => { + expect(cascadeNodeLabel('uuid-1', undefined, 'Some title')).toBe('Some title'); + expect(cascadeNodeLabel('uuid-1')).toBe('a predecessor'); + }); +}); + +describe('renderEpicPanel (#247 UX — the single maturing panel)', () => { + const row = (sub: string, status: string, opts: Partial<EpicPanelRow> = {}): EpicPanelRow => ({ + sub_issue_id: sub, child_status: status, ...opts, + }); + + test('in-progress header shows N/M complete', () => { + const body = renderEpicPanel({ + inProgress: true, + rows: [ + row('a', 'succeeded', { linear_identifier: 'ENG-1', title: 'A' }), + row('b', 'released', { linear_identifier: 'ENG-2', title: 'B' }), + row('c', 'blocked', { linear_identifier: 'ENG-3', title: 'C' }), + ], + }); + expect(body).toContain('🔄 **ABCA orchestration** · 1/3 complete'); + expect(body).toContain('✅ ENG-1: A — succeeded'); + expect(body).toContain('🔄 ENG-2: B — running'); + expect(body).toContain('⏳ ENG-3: C — blocked'); + }); + + test('all settled + ok → complete header; failures → ⚠️', () => { + expect(renderEpicPanel({ inProgress: false, rows: [row('a', 'succeeded')] })) + .toContain('✅ **ABCA orchestration complete**'); + expect(renderEpicPanel({ inProgress: false, rows: [row('a', 'succeeded'), row('b', 'failed')] })) + .toContain('⚠️ **ABCA orchestration finished with failures**'); + }); + + test('PR link shown ONLY when a PR exists (first run mid-flight has none)', () => { + const body = renderEpicPanel({ + inProgress: true, + rows: [ + row('a', 'released', { linear_identifier: 'ENG-1', title: 'A' }), // running, no PR yet + row('b', 'succeeded', { linear_identifier: 'ENG-2', title: 'B', pr_url: 'https://github.com/o/r/pull/9' }), + ], + }); + expect(body).toContain('🔄 ENG-1: A — running\n'); // no — [PR] suffix + expect(body).not.toContain('ENG-1: A — running — [PR]'); + expect(body).toContain('✅ ENG-2: B — succeeded — [PR](https://github.com/o/r/pull/9)'); + }); + + test('a row with updatingReason renders 🔄 updating <reason>, even when status is succeeded', () => { + const body = renderEpicPanel({ + inProgress: true, + rows: [ + row('a', 'succeeded', { + linear_identifier: 'ENG-1', + title: 'UI', + pr_url: 'https://github.com/o/r/pull/7', + updatingReason: 'per ENG-2\'s "button doesnt work"', + }), + ], + }); + expect(body).toContain('🔄 ENG-1: UI — updating per ENG-2\'s "button doesnt work" — [PR](https://github.com/o/r/pull/7)'); + }); + + test('a mid-update row keeps the header in-progress (does NOT count as done)', () => { + // inProgress is passed true by the caller when any row is updating; the + // updating row is excluded from the done count. + const body = renderEpicPanel({ + inProgress: true, + rows: [ + row('a', 'succeeded', { updatingReason: 'to include ENG-3\'s change' }), + row('b', 'succeeded'), + ], + }); + expect(body).toContain('· 1/2 complete'); // only b counts as done + }); + + test('integration node renders friendly, never its raw id', () => { + const body = renderEpicPanel({ + inProgress: false, + rows: [ + row('a', 'succeeded', { linear_identifier: 'ENG-1' }), + row('orch_x__integration', 'succeeded', { pr_url: 'https://github.com/o/r/pull/9' }), + ], + combinedPrUrl: 'https://github.com/o/r/pull/9', + }); + expect(body).toContain('Integration — combined result'); + expect(body).not.toContain('orch_x__integration'); + expect(body).toContain('🔗 **Combined PR (all sub-issues merged):**'); + }); + + test('embeds the combined preview screenshot when present', () => { + const body = renderEpicPanel({ + inProgress: false, + rows: [row('a', 'succeeded')], + combinedScreenshotUrl: 'https://cdn/x.png', + }); + expect(body).toContain('🖼️ **Combined preview**'); + expect(body).toContain('![combined preview](https://cdn/x.png)'); + }); + + test('#247 UX.17: makes the combined preview a clickable deep-link when the preview URL is known', () => { + const body = renderEpicPanel({ + inProgress: false, + rows: [row('a', 'succeeded')], + combinedScreenshotUrl: 'https://cdn/x.png', + combinedPreviewUrl: 'https://my-app-abc123.vercel.app', + }); + expect(body).toContain('🖼️ **Combined preview**'); + // Linked image: the embedded screenshot opens the running combined site. + expect(body).toContain('[![combined preview](https://cdn/x.png)](https://my-app-abc123.vercel.app)'); + // Plain "open it" link too, for clients that don't render linked images. + expect(body).toContain('[Open the combined preview](https://my-app-abc123.vercel.app)'); + }); + + test('#247 UX.17: percent-encodes parens in the preview URL so it cannot break out of the markdown link', () => { + const body = renderEpicPanel({ + inProgress: false, + rows: [row('a', 'succeeded')], + combinedScreenshotUrl: 'https://cdn/x.png', + combinedPreviewUrl: 'https://preview.vercel.app/x)](https://evil/a.png)', + }); + // No raw `](` breakout delimiter from the attacker-controlled preview URL. + expect(body).not.toContain('x)](https://evil'); + expect(body).toContain('%29'); // encoded paren survives + }); + + test('#247 UX.17: falls back to a plain embedded image when no preview URL is known', () => { + const body = renderEpicPanel({ + inProgress: false, + rows: [row('a', 'succeeded')], + combinedScreenshotUrl: 'https://cdn/x.png', + }); + expect(body).toContain('![combined preview](https://cdn/x.png)'); + expect(body).not.toContain('[![combined preview]'); // not a linked image + expect(body).not.toContain('Open the combined preview'); + }); + + test('rows are sorted by identifier for a stable edited body', () => { + const body = renderEpicPanel({ + inProgress: true, + rows: [ + row('z', 'released', { linear_identifier: 'ENG-9' }), + row('a', 'released', { linear_identifier: 'ENG-1' }), + ], + }); + expect(body.indexOf('ENG-1')).toBeLessThan(body.indexOf('ENG-9')); + }); +}); diff --git a/cdk/test/handlers/shared/orchestration-store.test.ts b/cdk/test/handlers/shared/orchestration-store.test.ts new file mode 100644 index 00000000..ff831e0f --- /dev/null +++ b/cdk/test/handlers/shared/orchestration-store.test.ts @@ -0,0 +1,599 @@ +/** + * MIT No Attribution + * + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +import { GetCommand, BatchWriteCommand, UpdateCommand, QueryCommand } from '@aws-sdk/lib-dynamodb'; +import type { SubIssueNode } from '../../../src/handlers/shared/linear-subissue-fetch'; +import { + seedOrchestration, + extendOrchestration, + deriveOrchestrationId, + claimRollup, + clearRollupClaim, + claimCommentAck, + loadOrchestration, + findOrchestrationChildByBranch, +} from '../../../src/handlers/shared/orchestration-store'; + +jest.mock('../../../src/handlers/shared/logger', () => ({ + logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn() }, +})); + +const child = (id: string, depends_on: string[] = [], extra: Partial<SubIssueNode> = {}): SubIssueNode => ({ + id, + depends_on, + ...extra, +}); + +interface MockDdb { + send: jest.Mock; +} + +function makeDdb(): MockDdb { + return { send: jest.fn() }; +} + +const TABLE = 'OrchestrationTable'; +const NOW = '2026-06-09T12:00:00.000Z'; +const RC = { platform_user_id: 'platform-user-1' }; + +describe('deriveOrchestrationId', () => { + test('is deterministic for the same parent id', () => { + expect(deriveOrchestrationId('ISSUE-123')).toBe(deriveOrchestrationId('ISSUE-123')); + }); + + test('differs for different parent ids', () => { + expect(deriveOrchestrationId('A')).not.toBe(deriveOrchestrationId('B')); + }); + + test('is prefixed and fixed-length', () => { + const id = deriveOrchestrationId('anything'); + expect(id).toMatch(/^orch_[0-9a-f]{32}$/); + }); +}); + +describe('seedOrchestration — first write', () => { + test('writes one row per child plus a meta row', async () => { + const ddb = makeDdb(); + ddb.send + .mockResolvedValueOnce({ Item: undefined }) // GetCommand: no existing meta + .mockResolvedValueOnce({}); // BatchWrite + + const result = await seedOrchestration({ + ddb: ddb as never, + tableName: TABLE, + parentLinearIssueId: 'PARENT', + linearWorkspaceId: 'WS', + repo: 'o/r', + children: [child('A'), child('B', ['A'])], + now: NOW, + releaseContext: RC, + }); + + expect(result.alreadyExisted).toBe(false); + // 2 children + 1 meta row. + expect(result.rowsWritten).toBe(3); + expect(result.orchestrationId).toBe(deriveOrchestrationId('PARENT')); + + // First call is the idempotency GetCommand. + expect(ddb.send.mock.calls[0][0]).toBeInstanceOf(GetCommand); + // Second is the BatchWrite. + const batch = ddb.send.mock.calls[1][0]; + expect(batch).toBeInstanceOf(BatchWriteCommand); + const puts = batch.input.RequestItems[TABLE]; + expect(puts).toHaveLength(3); + }); + + test('roots get child_status=ready, blocked children get blocked', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce({ Item: undefined }).mockResolvedValueOnce({}); + + await seedOrchestration({ + ddb: ddb as never, + tableName: TABLE, + parentLinearIssueId: 'PARENT', + linearWorkspaceId: 'WS', + repo: 'o/r', + children: [child('A'), child('B', ['A'])], + now: NOW, + releaseContext: RC, + }); + + const puts = ddb.send.mock.calls[1][0].input.RequestItems[TABLE] as Array<{ PutRequest: { Item: Record<string, unknown> } }>; + const byId = Object.fromEntries(puts.map((p) => [p.PutRequest.Item.sub_issue_id, p.PutRequest.Item])); + expect(byId.A.child_status).toBe('ready'); + expect(byId.B.child_status).toBe('blocked'); + expect(byId.B.depends_on).toEqual(['A']); + }); + + test('persists linear_identifier and title when present', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce({ Item: undefined }).mockResolvedValueOnce({}); + + await seedOrchestration({ + ddb: ddb as never, + tableName: TABLE, + parentLinearIssueId: 'PARENT', + linearWorkspaceId: 'WS', + repo: 'o/r', + children: [child('A', [], { identifier: 'ENG-1', title: 'Do thing' })], + now: NOW, + releaseContext: RC, + }); + + const puts = ddb.send.mock.calls[1][0].input.RequestItems[TABLE] as Array<{ PutRequest: { Item: Record<string, unknown> } }>; + const a = puts.find((p) => p.PutRequest.Item.sub_issue_id === 'A')!.PutRequest.Item; + expect(a.linear_identifier).toBe('ENG-1'); + expect(a.title).toBe('Do thing'); + }); + + test('chunks BatchWrite into groups of 25', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValue({}); // Get + all batches + ddb.send.mockResolvedValueOnce({ Item: undefined }); // first call = Get + + // 30 children + 1 meta = 31 rows → 2 batches (25 + 6). + const children = Array.from({ length: 30 }, (_, i) => child(`C${i}`)); + const result = await seedOrchestration({ + ddb: ddb as never, + tableName: TABLE, + parentLinearIssueId: 'PARENT', + linearWorkspaceId: 'WS', + repo: 'o/r', + children, + now: NOW, + releaseContext: RC, + }); + + expect(result.rowsWritten).toBe(31); + // 1 Get + 2 BatchWrite = 3 sends. + expect(ddb.send).toHaveBeenCalledTimes(3); + }); + + test('includes ttl on rows when provided', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce({ Item: undefined }).mockResolvedValueOnce({}); + + await seedOrchestration({ + ddb: ddb as never, + tableName: TABLE, + parentLinearIssueId: 'PARENT', + linearWorkspaceId: 'WS', + repo: 'o/r', + children: [child('A')], + now: NOW, + releaseContext: RC, + ttl: 9999999999, + }); + + const puts = ddb.send.mock.calls[1][0].input.RequestItems[TABLE] as Array<{ PutRequest: { Item: Record<string, unknown> } }>; + expect(puts.every((p) => p.PutRequest.Item.ttl === 9999999999)).toBe(true); + }); + + test('persists channel_source on the meta row when supplied (#247 trigger-agnostic)', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce({ Item: undefined }).mockResolvedValueOnce({}); + + await seedOrchestration({ + ddb: ddb as never, + tableName: TABLE, + parentLinearIssueId: 'PARENT', + linearWorkspaceId: 'WS', + repo: 'o/r', + children: [child('A')], + now: NOW, + releaseContext: { platform_user_id: 'u1', channel_source: 'linear' }, + }); + + const puts = ddb.send.mock.calls[1][0].input.RequestItems[TABLE] as Array<{ PutRequest: { Item: Record<string, unknown> } }>; + const meta = puts.find((p) => p.PutRequest.Item.sub_issue_id === '#meta')!.PutRequest.Item; + expect(meta.channel_source).toBe('linear'); + }); + + test('omits channel_source from the meta row when not supplied (back-compat)', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce({ Item: undefined }).mockResolvedValueOnce({}); + + await seedOrchestration({ + ddb: ddb as never, + tableName: TABLE, + parentLinearIssueId: 'PARENT', + linearWorkspaceId: 'WS', + repo: 'o/r', + children: [child('A')], + now: NOW, + releaseContext: RC, // no channel_source + }); + + const puts = ddb.send.mock.calls[1][0].input.RequestItems[TABLE] as Array<{ PutRequest: { Item: Record<string, unknown> } }>; + const meta = puts.find((p) => p.PutRequest.Item.sub_issue_id === '#meta')!.PutRequest.Item; + expect(meta.channel_source).toBeUndefined(); + }); +}); + +describe('seedOrchestration — idempotent replay', () => { + test('skips writing when a meta row already exists', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce({ Item: { orchestration_id: 'x', sub_issue_id: '#meta' } }); + + const result = await seedOrchestration({ + ddb: ddb as never, + tableName: TABLE, + parentLinearIssueId: 'PARENT', + linearWorkspaceId: 'WS', + repo: 'o/r', + children: [child('A'), child('B', ['A'])], + now: NOW, + releaseContext: RC, + }); + + expect(result.alreadyExisted).toBe(true); + expect(result.rowsWritten).toBe(0); + // Only the Get fired — no BatchWrite. + expect(ddb.send).toHaveBeenCalledTimes(1); + expect(ddb.send.mock.calls[0][0]).toBeInstanceOf(GetCommand); + }); +}); + +describe('claimRollup — exactly-once parent rollup', () => { + function makeDdb(): MockDdb { return { send: jest.fn() }; } + + test('first claim wins (conditional write succeeds) → true', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce({}); + const won = await claimRollup(ddb as never, TABLE, 'orch_1', NOW); + expect(won).toBe(true); + const cmd = ddb.send.mock.calls[0][0] as UpdateCommand; + expect(cmd).toBeInstanceOf(UpdateCommand); + expect(cmd.input.ConditionExpression).toContain('attribute_not_exists(rollup_posted_at)'); + expect(cmd.input.Key).toMatchObject({ sub_issue_id: '#meta' }); + }); + + test('second claim loses (ConditionalCheckFailed) → false, no throw', async () => { + const ddb = makeDdb(); + const e = Object.assign(new Error('c'), { name: 'ConditionalCheckFailedException' }); + ddb.send.mockRejectedValueOnce(e); + const won = await claimRollup(ddb as never, TABLE, 'orch_1', NOW); + expect(won).toBe(false); + }); + + test('non-conditional error propagates', async () => { + const ddb = makeDdb(); + ddb.send.mockRejectedValueOnce(new Error('throttle')); + await expect(claimRollup(ddb as never, TABLE, 'orch_1', NOW)).rejects.toThrow('throttle'); + }); +}); + +describe('clearRollupClaim — release the claim so a re-completing epic re-settles', () => { + test('REMOVEs rollup_posted_at on the meta row (unconditional, idempotent)', async () => { + const ddb = { send: jest.fn().mockResolvedValueOnce({}) }; + await clearRollupClaim(ddb as never, TABLE, 'orch_1', NOW); + const cmd = ddb.send.mock.calls[0][0] as UpdateCommand; + expect(cmd).toBeInstanceOf(UpdateCommand); + expect(cmd.input.UpdateExpression).toContain('REMOVE rollup_posted_at'); + expect(cmd.input.Key).toMatchObject({ sub_issue_id: '#meta', orchestration_id: 'orch_1' }); + // No conditional — a no-op when already absent. + expect(cmd.input.ConditionExpression).toBeUndefined(); + }); +}); + +describe('claimCommentAck — exactly-once per comment (#247 UX.20 redelivery dedup)', () => { + test('first delivery wins → true, conditional create-once on a per-comment SK + TTL', async () => { + const ddb = { send: jest.fn().mockResolvedValueOnce({}) }; + const won = await claimCommentAck(ddb as never, TABLE, 'orch_1', 'cmt-9', NOW, 1781800000); + expect(won).toBe(true); + const cmd = ddb.send.mock.calls[0][0] as UpdateCommand; + expect(cmd).toBeInstanceOf(UpdateCommand); + expect(cmd.input.Key).toMatchObject({ orchestration_id: 'orch_1', sub_issue_id: 'ack#cmt-9' }); + expect(cmd.input.ConditionExpression).toContain('attribute_not_exists(orchestration_id)'); + expect(cmd.input.ExpressionAttributeValues).toMatchObject({ ':ttl': 1781800000 }); + // ``ttl`` is a DynamoDB reserved keyword — must be aliased, else the write + // 400s with ValidationException (live-caught: the unaliased form errored + // out the whole handler, silently dropping the comment). + expect(cmd.input.ExpressionAttributeNames).toMatchObject({ '#ttl': 'ttl' }); + expect(cmd.input.UpdateExpression).toContain('#ttl'); + }); + + test('redelivery of the same comment loses (ConditionalCheckFailed) → false, no throw', async () => { + const ddb = { send: jest.fn().mockRejectedValueOnce(Object.assign(new Error('c'), { name: 'ConditionalCheckFailedException' })) }; + expect(await claimCommentAck(ddb as never, TABLE, 'orch_1', 'cmt-9', NOW, 1781800000)).toBe(false); + }); + + test('non-conditional error propagates', async () => { + const ddb = { send: jest.fn().mockRejectedValueOnce(new Error('throttle')) }; + await expect(claimCommentAck(ddb as never, TABLE, 'orch_1', 'cmt-9', NOW, 1781800000)).rejects.toThrow('throttle'); + }); +}); + +describe('loadOrchestration — marker rows are not children (#247 UX.20)', () => { + test('excludes ack#<commentId> marker rows from children (only real sub-issues count)', async () => { + const ddb = { + send: jest.fn().mockResolvedValueOnce({ + Items: [ + { orchestration_id: 'orch_1', sub_issue_id: '#meta', parent_linear_issue_id: 'P', linear_workspace_id: 'WS', repo: 'o/r', platform_user_id: 'u1', child_count: 2 }, + { orchestration_id: 'orch_1', sub_issue_id: 'uuid-A', depends_on: [], child_status: 'succeeded' }, + { orchestration_id: 'orch_1', sub_issue_id: 'orch_1__integration', depends_on: ['uuid-A'], child_status: 'succeeded' }, + { orchestration_id: 'orch_1', sub_issue_id: 'ack#cmt-9', acked_at: NOW, ttl: 1781800000 }, // marker — must NOT be a child + ], + }), + }; + const snap = await loadOrchestration(ddb as never, TABLE, 'orch_1'); + expect(snap).not.toBeNull(); + const ids = snap!.children.map((c) => c.sub_issue_id).sort(); + expect(ids).toEqual(['orch_1__integration', 'uuid-A']); // ack# row excluded; integration kept + }); +}); + +describe('findOrchestrationChildByBranch (#305 A6)', () => { + test('queries the ChildBranchIndex GSI by branch and returns the child row', async () => { + const ddb = makeDdb(); + const row = { orchestration_id: 'orch_1', sub_issue_id: 'SUB-A', child_branch_name: 'bgagent/01T/abca-1-x' }; + ddb.send.mockResolvedValueOnce({ Items: [row] }); + + const result = await findOrchestrationChildByBranch( + ddb as never, TABLE, 'ChildBranchIndex', 'bgagent/01T/abca-1-x', + ); + + expect(result).toEqual(row); + const cmd = ddb.send.mock.calls[0][0] as QueryCommand; + expect(cmd).toBeInstanceOf(QueryCommand); + expect(cmd.input.IndexName).toBe('ChildBranchIndex'); + expect(cmd.input.KeyConditionExpression).toBe('child_branch_name = :b'); + expect(cmd.input.ExpressionAttributeValues).toEqual({ ':b': 'bgagent/01T/abca-1-x' }); + expect(cmd.input.Limit).toBe(1); + }); + + test('returns null when no released child owns the branch (non-orchestration PR)', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce({ Items: [] }); + const result = await findOrchestrationChildByBranch( + ddb as never, TABLE, 'ChildBranchIndex', 'feature/some-human-branch', + ); + expect(result).toBeNull(); + }); +}); + +describe('extendOrchestration — add nodes to an already-seeded epic', () => { + const PARENT = 'parent-issue-1'; + const ORCH = deriveOrchestrationId(PARENT); + + /** A loadOrchestration Query response: meta + existing child rows. */ + function existing(children: Array<{ id: string; deps?: string[]; status: string }>) { + const meta = { + orchestration_id: ORCH, + sub_issue_id: '#meta', + parent_linear_issue_id: PARENT, + linear_workspace_id: 'WS', + repo: 'o/r', + child_count: children.length, + platform_user_id: 'u1', + created_at: NOW, + updated_at: NOW, + }; + const rows = children.map((c) => ({ + orchestration_id: ORCH, + sub_issue_id: c.id, + parent_linear_issue_id: PARENT, + linear_workspace_id: 'WS', + repo: 'o/r', + depends_on: c.deps ?? [], + child_status: c.status, + created_at: NOW, + updated_at: NOW, + })); + return { Items: [meta, ...rows] }; + } + + function extendParams(graph: SubIssueNode[]) { + return { + tableName: TABLE, + parentLinearIssueId: PARENT, + linearWorkspaceId: 'WS', + repo: 'o/r', + graph, + now: NOW, + }; + } + + test('adds a NEW node blocked-by a finished node → releasable immediately', async () => { + const ddb = makeDdb(); + // load (Query) → existing A succeeded; then BatchWrite (new rows) + Update (meta). + ddb.send + .mockResolvedValueOnce(existing([{ id: 'A', status: 'succeeded' }])) + .mockResolvedValueOnce({}) // BatchWrite + .mockResolvedValueOnce({}); // Update meta + // Graph now has A (existing) + B (new, depends on the finished A). + const result = await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('A'), child('B', ['A'], { title: 'UI' })]), + }); + expect(result.addedSubIssueIds).toEqual(['B']); + expect(result.releasableSubIssueIds).toEqual(['B']); // A already succeeded + // The new row was written as 'ready' (deps satisfied). + const bw = ddb.send.mock.calls.find((c) => c[0] instanceof BatchWriteCommand)![0]; + const written = (bw.input.RequestItems[TABLE] as Array<{ PutRequest: { Item: { sub_issue_id: string; child_status: string } } }>)[0].PutRequest.Item; + expect(written.sub_issue_id).toBe('B'); + expect(written.child_status).toBe('ready'); + }); + + test('adds a NEW node whose predecessor is NOT yet done → blocked, not releasable', async () => { + const ddb = makeDdb(); + ddb.send + .mockResolvedValueOnce(existing([{ id: 'A', status: 'released' }])) // A still running + .mockResolvedValueOnce({}) + .mockResolvedValueOnce({}); + const result = await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('A'), child('B', ['A'])]), + }); + expect(result.addedSubIssueIds).toEqual(['B']); + expect(result.releasableSubIssueIds).toEqual([]); // A not succeeded → B blocked + }); + + // #247 UX.4: a new node with NO declared dependency stacks on the epic TIP + // (the leaf frontier of existing nodes), not bare main. + test('new UNCONSTRAINED node → implicit depends_on = epic tip (linear chain → its leaf)', async () => { + const ddb = makeDdb(); + ddb.send + .mockResolvedValueOnce(existing([ + { id: 'A', status: 'succeeded' }, + { id: 'B', deps: ['A'], status: 'succeeded' }, // B is the leaf / tip + ])) + .mockResolvedValueOnce({}) + .mockResolvedValueOnce({}); + // New node C declares NO dependency. + const result = await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('A'), child('B', ['A']), child('C', [], { title: 'New step' })]), + }); + expect(result.addedSubIssueIds).toEqual(['C']); + const bw = ddb.send.mock.calls.find((c) => c[0] instanceof BatchWriteCommand)![0]; + const written = (bw.input.RequestItems[TABLE] as Array<{ PutRequest: { Item: { sub_issue_id: string; depends_on: string[]; child_status: string } } }>)[0].PutRequest.Item; + expect(written.sub_issue_id).toBe('C'); + // Stacked on the tip B (not []), and B succeeded so C is releasable. + expect(written.depends_on).toEqual(['B']); + expect(written.child_status).toBe('ready'); + expect(result.releasableSubIssueIds).toEqual(['C']); + }); + + test('new unconstrained node, tip NOT done → blocked on the tip (stacks, waits)', async () => { + const ddb = makeDdb(); + ddb.send + .mockResolvedValueOnce(existing([{ id: 'A', status: 'released' }])) // tip A still running + .mockResolvedValueOnce({}) + .mockResolvedValueOnce({}); + const result = await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('A'), child('B', [])]), + }); + const bw = ddb.send.mock.calls.find((c) => c[0] instanceof BatchWriteCommand)![0]; + const written = (bw.input.RequestItems[TABLE] as Array<{ PutRequest: { Item: { depends_on: string[]; child_status: string } } }>)[0].PutRequest.Item; + expect(written.depends_on).toEqual(['A']); // stacked on the tip + expect(written.child_status).toBe('blocked'); + expect(result.releasableSubIssueIds).toEqual([]); + }); + + test('new unconstrained node on a fan-out epic → diamond implicit deps (all leaves)', async () => { + const ddb = makeDdb(); + ddb.send + .mockResolvedValueOnce(existing([ + { id: 'R', status: 'succeeded' }, + { id: 'B', deps: ['R'], status: 'succeeded' }, + { id: 'C', deps: ['R'], status: 'succeeded' }, // B and C are both leaves + ])) + .mockResolvedValueOnce({}) + .mockResolvedValueOnce({}); + const result = await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('R'), child('B', ['R']), child('C', ['R']), child('D', [])]), + }); + const bw = ddb.send.mock.calls.find((c) => c[0] instanceof BatchWriteCommand)![0]; + const written = (bw.input.RequestItems[TABLE] as Array<{ PutRequest: { Item: { sub_issue_id: string; depends_on: string[] } } }>)[0].PutRequest.Item; + expect(written.depends_on).toEqual(['B', 'C']); // diamond over both leaves + expect(result.releasableSubIssueIds).toEqual(['D']); // both succeeded + }); + + test('new node WITH an explicit dependency keeps it (user intent wins over the tip)', async () => { + const ddb = makeDdb(); + ddb.send + .mockResolvedValueOnce(existing([ + { id: 'A', status: 'succeeded' }, + { id: 'B', deps: ['A'], status: 'succeeded' }, // tip would be B + ])) + .mockResolvedValueOnce({}) + .mockResolvedValueOnce({}); + // New node C explicitly depends on A (not the tip B). + const result = await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('A'), child('B', ['A']), child('C', ['A'])]), + }); + const bw = ddb.send.mock.calls.find((c) => c[0] instanceof BatchWriteCommand)![0]; + const written = (bw.input.RequestItems[TABLE] as Array<{ PutRequest: { Item: { depends_on: string[] } } }>)[0].PutRequest.Item; + expect(written.depends_on).toEqual(['A']); // explicit edge preserved, NOT overridden to ['B'] + expect(result.addedSubIssueIds).toEqual(['C']); + }); + + test('no new nodes (graph unchanged) → no-op, no writes', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce(existing([{ id: 'A', status: 'succeeded' }])); + const result = await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('A')]), + }); + expect(result.addedSubIssueIds).toEqual([]); + // Only the load Query ran — no BatchWrite/Update. + expect(ddb.send.mock.calls.filter((c) => c[0] instanceof BatchWriteCommand)).toHaveLength(0); + expect(ddb.send.mock.calls.filter((c) => c[0] instanceof UpdateCommand)).toHaveLength(0); + }); + + test('a new edge that introduces a CYCLE → rejected, nothing written', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce(existing([ + { id: 'A', status: 'succeeded' }, { id: 'B', deps: ['A'], status: 'succeeded' }, + ])); + // New node C depends on B, but the augmented graph also makes A depend on C → cycle. + const result = await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('A', ['C']), child('B', ['A']), child('C', ['B'])]), + }); + expect(result.rejected?.reason).toBe('cycle'); + expect(result.addedSubIssueIds).toEqual([]); + expect(ddb.send.mock.calls.filter((c) => c[0] instanceof BatchWriteCommand)).toHaveLength(0); + }); + + test('no existing orchestration (load returns nothing) → empty result', async () => { + const ddb = makeDdb(); + ddb.send.mockResolvedValueOnce({ Items: [] }); // loadOrchestration → null + const result = await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('A')]), + }); + expect(result.addedSubIssueIds).toEqual([]); + }); + + test('bumps meta child_count by the number of added nodes', async () => { + const ddb = makeDdb(); + ddb.send + .mockResolvedValueOnce(existing([{ id: 'A', status: 'succeeded' }, { id: 'B', deps: ['A'], status: 'succeeded' }])) + .mockResolvedValueOnce({}) + .mockResolvedValueOnce({}); + await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('A'), child('B', ['A']), child('C', ['A']), child('D', ['B'])]), + }); + const upd = ddb.send.mock.calls.find((c) => c[0] instanceof UpdateCommand)![0]; + // 2 existing + 2 new (C, D) = 4. + expect(upd.input.ExpressionAttributeValues[':n']).toBe(4); + }); + + test('clears rollup_posted_at so a re-completed (post-completion) epic can rollup again (#247 UX.4)', async () => { + const ddb = makeDdb(); + ddb.send + .mockResolvedValueOnce(existing([{ id: 'A', status: 'succeeded' }])) + .mockResolvedValueOnce({}) + .mockResolvedValueOnce({}); + await extendOrchestration({ + ddb: ddb as never, + ...extendParams([child('A'), child('B', [])]), + }); + const upd = ddb.send.mock.calls.find((c) => c[0] instanceof UpdateCommand)![0]; + // The meta update REMOVEs rollup_posted_at so the reconciler can re-claim + // and re-settle the parent state when the added node finishes. + expect(upd.input.UpdateExpression).toContain('REMOVE rollup_posted_at'); + }); +}); diff --git a/cdk/test/handlers/shared/screenshot-url.test.ts b/cdk/test/handlers/shared/screenshot-url.test.ts index 1a911b28..954bb4f5 100644 --- a/cdk/test/handlers/shared/screenshot-url.test.ts +++ b/cdk/test/handlers/shared/screenshot-url.test.ts @@ -17,7 +17,28 @@ * SOFTWARE. */ -import { buildScreenshotKey, encodeMarkdownUrl, isAllowedScreenshotUrl } from '../../../src/handlers/shared/screenshot-url'; +import { buildScreenshotKey, encodeMarkdownUrl, extractTaskIdFromBranch, isAllowedScreenshotUrl } from '../../../src/handlers/shared/screenshot-url'; + +describe('extractTaskIdFromBranch (#247 — screenshot → parent panel)', () => { + test('pulls the taskId from a standard ABCA branch (2nd segment)', () => { + expect(extractTaskIdFromBranch('bgagent/01TASKID123/abca-300-book-with-points')) + .toBe('01TASKID123'); + }); + test('tolerates extra trailing segments (taskId is always 2nd)', () => { + expect(extractTaskIdFromBranch('bgagent/01TASKID123/abca-300/extra')).toBe('01TASKID123'); + }); + test('null for a non-ABCA branch (human / fork default / too few segments)', () => { + expect(extractTaskIdFromBranch('main')).toBeNull(); + expect(extractTaskIdFromBranch('feature/foo')).toBeNull(); + expect(extractTaskIdFromBranch('bgagent')).toBeNull(); + expect(extractTaskIdFromBranch('bgagent//slug')).toBeNull(); // empty taskId + }); + test('null for empty / nullish', () => { + expect(extractTaskIdFromBranch('')).toBeNull(); + expect(extractTaskIdFromBranch(undefined)).toBeNull(); + expect(extractTaskIdFromBranch(null)).toBeNull(); + }); +}); describe('buildScreenshotKey', () => { test('produces a screenshots/<owner>_<repo>/<sha>-<id>-<suffix>.png shape', () => { diff --git a/cdk/test/handlers/shared/workflows.test.ts b/cdk/test/handlers/shared/workflows.test.ts index 53b6d75c..7e4d4977 100644 --- a/cdk/test/handlers/shared/workflows.test.ts +++ b/cdk/test/handlers/shared/workflows.test.ts @@ -72,10 +72,27 @@ describe('resolveWorkflowRef', () => { expect(resolveWorkflowRefError(undefined)).toBeNull(); }); - test('falls back to the platform default when ref is absent', () => { + test('falls back to the repo-less platform default when ref is absent AND no repo', () => { expect(resolveWorkflowRef(undefined)).toEqual({ id: DEFAULT_WORKFLOW_ID, version: '1.0.0' }); expect(resolveWorkflowRef(null)).toEqual({ id: DEFAULT_WORKFLOW_ID, version: '1.0.0' }); expect(resolveWorkflowRef('')).toEqual({ id: DEFAULT_WORKFLOW_ID, version: '1.0.0' }); + // explicit hasRepo=false is the same as omitting it + expect(resolveWorkflowRef(undefined, false)).toEqual({ id: DEFAULT_WORKFLOW_ID, version: '1.0.0' }); + }); + + test('falls back to coding/new-task-v1 when ref is absent BUT a repo is present (#296 regression fix)', () => { + // A repo-bound task with no explicit workflow_ref is a coding task — it must + // get the disciplined coding workflow (edit/commit/push, platform ensure_pr), + // NOT the freeform repo-less default/agent-v1 that broke pr_url/screenshot/stacking. + expect(resolveWorkflowRef(undefined, true)).toEqual({ id: 'coding/new-task-v1', version: '1.0.0' }); + expect(resolveWorkflowRef(null, true)).toEqual({ id: 'coding/new-task-v1', version: '1.0.0' }); + expect(resolveWorkflowRef('', true)).toEqual({ id: 'coding/new-task-v1', version: '1.0.0' }); + }); + + test('an EXPLICIT ref is honored regardless of hasRepo (repo-less workflow against a repo)', () => { + // hasRepo only steers the absent-ref fallback; an explicit ref always wins. + expect(resolveWorkflowRef('default/agent-v1', true)).toEqual({ id: 'default/agent-v1', version: '1.0.0' }); + expect(resolveWorkflowRef('knowledge/web-research-v1', true)).toEqual({ id: 'knowledge/web-research-v1', version: '1.0.0' }); }); test('returns null for an unknown but well-formed ref', () => { @@ -194,7 +211,9 @@ describe('CDK descriptors stay in sync with agent/workflows/**', () => { const configPy = fs.readFileSync( path.resolve(__dirname, '../../../../agent/src/config.py'), 'utf8', ); - const match = configPy.match(/_KNOWN_WRITEABLE_WORKFLOW_IDS\s*=\s*frozenset\(\(([^)]*)\)\)/s); + // Tolerate ruff's formatting of the frozenset: it may render single-line + // ``frozenset(("a", "b"))`` or multi-line with whitespace between the parens. + const match = configPy.match(/_KNOWN_WRITEABLE_WORKFLOW_IDS\s*=\s*frozenset\(\s*\(([^)]*)\)\s*\)/s); expect(match).not.toBeNull(); const agentWriteable = new Set( [...match![1].matchAll(/"([^"]+)"/g)].map(m => m[1]), diff --git a/cdk/test/integration/orchestration-e2e.test.ts b/cdk/test/integration/orchestration-e2e.test.ts new file mode 100644 index 00000000..6bdc908a Binary files /dev/null and b/cdk/test/integration/orchestration-e2e.test.ts differ diff --git a/cdk/test/stacks/agent.test.ts b/cdk/test/stacks/agent.test.ts index 22a295fd..5b1dafd6 100644 --- a/cdk/test/stacks/agent.test.ts +++ b/cdk/test/stacks/agent.test.ts @@ -36,16 +36,18 @@ describe('AgentStack', () => { expect(template).toBeDefined(); }); - test('creates exactly 18 DynamoDB tables', () => { + test('creates exactly 19 DynamoDB tables', () => { // task, task-events, repo, user-concurrency, webhook, task-nudges, // task-approvals (Cedar HITL V2), // slack-installation, slack-user-mapping, // linear-project-mapping, linear-user-mapping, linear-webhook-dedup, // linear-workspace-registry (added in Phase 2.0b for OAuth bookkeeping), + // github-webhook-dedup (added by GitHubScreenshotIntegration), // jira-project-mapping, jira-user-mapping, jira-workspace-registry, - // jira-webhook-dedup (added for the Jira Cloud integration), - // github-webhook-dedup (added by GitHubScreenshotIntegration on main) - template.resourceCountIs('AWS::DynamoDB::Table', 18); + // jira-webhook-dedup (added for the Jira Cloud integration on main), + // orchestration (added by #247 — parent/sub-issue DAG state). + // = 14 shared/base + 4 Jira + 1 orchestration = 19. + template.resourceCountIs('AWS::DynamoDB::Table', 19); }); test('creates TaskApprovalsTable with user_id-status-index GSI', () => { diff --git a/cli/src/commands/linear.ts b/cli/src/commands/linear.ts index 475dbebe..e2326e4c 100644 --- a/cli/src/commands/linear.ts +++ b/cli/src/commands/linear.ts @@ -109,9 +109,9 @@ export function renderLinearAppTemplate(opts: LinearAppTemplateOptions = {}): st '', 'Click Save, copy the Client ID and Client Secret, then return here.', '', - 'Why these specific fields:', - ' • GitHub username with [bot] suffix gates the actor=app agent flow.', - ' Without it, Linear surfaces a misleading "Invalid redirect_uri" error.', + 'Non-obvious gotchas (Linear explains the fields themselves inline):', + ' • GitHub username is REQUIRED for actor=app — leaving it blank surfaces a', + ' misleading "Invalid redirect_uri" error, not a "missing username" one.', ' • Webhooks toggle must be ON for the same reason; the URL value is unused', ' by the OAuth dance and can be a placeholder.', ' • Wildcard callback URLs are not accepted by Linear; list each URL fully.', @@ -369,7 +369,9 @@ export function makeLinearCommand(): Command { console.log('In Linear → Settings → API → Webhooks → + New webhook, paste:'); console.log(); console.log(` URL: ${webhookUrl}`); - console.log(' Resource types: Issues'); + console.log(' Resource types: Issues, Comments'); + console.log(' (Issues = label-triggered tasks + epic orchestration;'); + console.log(' Comments = @bgagent re-iteration on a sub-issue PR)'); console.log(' Team: (whichever team owns the projects you map)'); console.log(); console.log('Save, then open the webhook detail page and copy the signing secret'); diff --git a/docs/decisions/ADR-001-stacked-pull-requests.md b/docs/decisions/ADR-001-stacked-pull-requests.md index 99679834..621eb9ac 100644 --- a/docs/decisions/ADR-001-stacked-pull-requests.md +++ b/docs/decisions/ADR-001-stacked-pull-requests.md @@ -38,15 +38,20 @@ This gives reviewers and agents immediate orientation. The "Next" section is opt - PR 1 targets `main` - PR N targets PR N-1's branch -- Final PR merges the full stack to `main` +- PRs merge **bottom-up, one at a time** — each to its current base — NOT by + merging the top PR and having the whole stack land at once. See §8 for the + merge sequence and GitHub's auto-retarget-on-delete behaviour. ``` main - └── feat/first-concern (PR 1) - └── feat/second-concern (PR 2) - └── feat/third-concern (PR 3 → merge to main) + └── feat/first-concern (PR 1, base: main) + └── feat/second-concern (PR 2, base: PR 1's branch) + └── feat/third-concern (PR 3, base: PR 2's branch) ``` +Merge order is PR 1 → PR 2 → PR 3, each landing on `main` after its +predecessor (§8), not a single "merge the tip" operation. + ### 3. Self-contained reviewability Each PR: @@ -91,8 +96,9 @@ When a lower PR changes after review feedback: ### 8. Merge semantics -The default topology is a **classic stack** — each PR targets its predecessor's branch. When an early PR merges to `main` before later PRs are reviewed: +The default topology is a **classic stack** — each PR targets its predecessor's branch. Merges proceed **bottom-up, one PR at a time**: there is no single operation that merges the tip and lands the whole stack. When an early PR merges to `main` before later PRs are reviewed: +0. **Deleting the merged branch is what triggers GitHub's auto-retarget.** When PR N's branch is deleted after merge, GitHub automatically retargets the PRs that pointed at it onto PR N's base (`main`). The merge *itself* does not retarget — the branch deletion does. If you keep the merged branch around, the child PRs keep showing the already-merged commits in their diff. Steps 1–3 are the manual fallback when auto-retarget doesn't apply (branch kept, base is a non-deleted intermediate, etc.). 1. **Retarget** all PRs that pointed at the merged branch to `main` (or to the next unmerged predecessor). Use `gh pr edit <N> --base main` or GitHub's "Retarget" button. 2. **Rebase** each retargeted PR onto its new base so the diff is clean — use `git rebase --skip` for commits whose content is already in main via the merged predecessor. 3. **Force-push with lease** (`--force-with-lease`) so the PR diff on GitHub shows only net-new changes, not already-merged content. @@ -104,6 +110,16 @@ After retargeting, the remaining PRs form a shorter stack rooted on `main`. This **When the stack diverges:** If review feedback on PR 2 invalidates assumptions in PRs 3+, prefer closing and re-opening the affected PRs over accumulating fixup commits that obscure intent. The parent issue remains the source of truth for what shipped and what remains. +### 9. Agent-orchestrated stacks (issue #247) + +§1–§8 describe a **human-authored** stack. ABCA's Linear orchestration (#247) builds the same topology **automatically** from a parent issue's sub-issue DAG, with three differences reviewers should know: + +- **Base branch is threaded, not retargeted by hand.** When the orchestrator releases a stacked child, it passes the predecessor's branch as the child's `base_branch` (persisted on the `TaskRecord`); the agent creates the child branch *from* that base and opens the PR against it. The classic stack of §2 is produced up front, so the §8 retarget dance is only needed if a human merges mid-run. A child is released only once all its predecessors have **succeeded** (task-complete), not merged. +- **Diamonds, not just linear stacks.** A sub-issue with multiple predecessors (fan-in) cannot target two bases. The orchestrator branches it off `main` and **merges each predecessor branch into the child's branch** before the agent starts, so the child sees all predecessors' code. Linear chains still use the single-predecessor base-targeting of §2. +- **Merge is still human + bottom-up.** The orchestrator opens the stack; it does **not** merge. A human merges bottom-up per §8, and GitHub's delete-triggers-retarget (§8.0) collapses the remaining children onto `main`. The parent epic carries a live status block + rollup (it is the §1 "position statement" / §6 source-of-truth, maintained by the platform). + +**Open follow-up (#305 / A6):** §5 rebase discipline and the diamond re-merge above are *initial-creation* only — if a predecessor branch is **edited after** a dependent child already merged it in, the child goes stale. Automatic re-stack / re-merge on predecessor change is tracked in #305 (A6) and is not yet wired. + ## Consequences - (+) Each PR stays in the "reviewable without fatigue" window (~15–40 min) diff --git a/docs/decisions/ADR-017-linear-agent-session-interaction.md b/docs/decisions/ADR-017-linear-agent-session-interaction.md new file mode 100644 index 00000000..22364083 --- /dev/null +++ b/docs/decisions/ADR-017-linear-agent-session-interaction.md @@ -0,0 +1,201 @@ +# ADR-017: Linear agent-session as a future interaction channel + +**Status:** proposed +**Date:** 2026-06-17 + +## Context + +ABCA's Linear integration today triggers and reports work through a +**hand-rolled comment protocol** layered on Linear's generic Issue/Comment +webhooks: + +- **Trigger** — a string match on `@bgagent` in a `Comment` webhook body + (`parseCommentTrigger`), plus a label-add on an issue to seed a #247 + orchestration. +- **Acknowledgement** — emoji reactions managed by hand (👀 on receipt → + ✅/❌ on settle via `swapCommentReaction`/`swapIssueReaction`), threaded + replies (`replyToComment`), and a single maturing "epic panel" comment + edited in place (`upsertEpicPanel`). + +This protocol works and is now well-tested (see the #247 UX.1–23 series), +but the comment seam has been the single richest source of edge-case bugs: +reply `issueId` vs `parentId` rules, "parent comment must be top-level" +threading, webhook-redelivery reply spam, self-trigger loops from our own +`@bgagent` example text, and reaction/state flapping. Each was a +consequence of bolting an agent protocol onto a human-comment surface. + +Linear now ships a first-class **Agents API** (agent-session model): +delegate or @mention an installed agent app → a typed `AgentSessionEvent` +webhook (`created`/`prompted`) → the agent emits typed **activities** +(`thought` / `action` / `response` / `elicitation` / `error`) and Linear +derives a native session **state** (`pending`/`active`/`awaitingInput`/ +`error`/`complete`/`stale`) with a built-in "thinking"/activity UI. + +Two facts establish the starting point: + +1. **The auth migration is already done.** ABCA's OAuth flow + (`cli/src/linear-oauth.ts`) requests + `read write app:assignable app:mentionable` with `actor=app`. Verified + live on `backgroundagent-dev` (2026-06-17): both deployed workspace + tokens (`bgagent-linear-oauth-maguireb`, `…-demo-abca`) carry exactly + that scope. **bgagent is already installed as an app actor** — it is + assignable, mentionable, and delegatable today. No auth work is needed + to adopt agent sessions. +2. **Linear is an interaction layer, not compute.** Adopting agent sessions + changes *how we are triggered* and *how status is shown*. All compute + (clone, run the coding agent, build/test, open the PR) still runs on + ABCA's own AgentCore Runtime + ECS. The switch offloads nothing to + Linear and does not change the AWS architecture or cost model. + +## Decision + +**Adopt the Linear agent-session model as an ADDITIONAL, flag-gated +trigger/ack channel once Linear marks the Agents API GA — not now, and not +as a replacement for the comment path.** + +The orchestration **engine** is channel-agnostic by design (the #247 +trigger-agnostic seams): graph discovery, the reconciler, the epic +panel/rollup, base-branch stacking, and the cascade do not care how a task +was triggered. Agent sessions slot in as a new front end to that engine, +mapping cleanly onto what we already built: + +| ABCA today (hand-rolled) | Linear agent-session (native) | +|-------------------------------------|-----------------------------------| +| `@bgagent` string match in comment | `created` AgentSessionEvent (mention/delegate) | +| 👀 reaction "on it" | `thought` activity | +| 🤖 Starting / 🔗 PR opened | `action` activity (+ result) | +| ✅ Updated / completion | `response` activity | +| ❌ failure reply | `error` activity | +| "reply with guidance" retry (UX.9) | `elicitation` + `prompted` webhook + conversation history | +| panel header state (🔄/✅/⚠️) | session state (active/complete/error) | + +### Preview-API spike (2026-06-17, UX.24) + +A time-boxed, no-infra spike validated the API surface against the deployed +**app-actor** token (`bgagent`, workspace `maguireb`) — read-only schema +probes + mutation input validation, no migration code: + +- **API reachable by our token.** Introspection confirms `agentActivityCreate`, + `agentSessionCreateOnIssue`/`OnComment`/`Create`, `AgentSession` (fields incl. + `status`, `issue`, `comment`, `appUser`), and `AgentActivityType` = + `thought, action, response, elicitation, error, prompt` — exactly the docs. +- **Activity input shape verified callable.** `agentActivityCreate(input: + {agentSessionId, content: JSONObject, signal, ephemeral})` accepts our + `{type:'thought', body}` content — a call failed only on session-id lookup, + not schema/enablement, so the ack-emission half of the loop is proven. +- **BLOCKER (config, not code):** `agentSessionCreateOnIssue` returns + `"Agent sessions are not enabled for this application."` The bgagent OAuth + app has the scopes + `actor=app` but has **not been enabled as an agent** in + its Linear Application settings. Per docs, enabling = edit the app at + *Settings → API → Applications*, enable webhooks, and select the **"Agent + session events"** category. App-owner action; no waitlist mentioned. +- **The 10s-ack-vs-long-compute risk is therefore NOT yet proven end-to-end** — + it needs a real `agentSessionId`, which is gated on the enablement toggle + above. The pieces it depends on (immediate `thought` ack, then later + `action`/`response` activities) are individually confirmed callable; the + remaining unknown is purely whether Linear marks the session unresponsive if + our spawn exceeds 10s after the initial `thought` (docs say the `thought` + ack within 10s is sufficient, which our processor can emit synchronously + before the async spawn — same shape as today's 👀). + +Net (first pass): the spike de-risked reachability + the activity model and +pinpointed the single enablement step, without committing to migration. + +**Spike re-run (2026-06-17, after the app owner enabled "Agent session events") +— the core risk is RESOLVED end-to-end:** + +- `agentSessionCreateOnIssue` now succeeds → session `status: active`. +- **The 10s-vs-long-compute question is answered:** emit a `thought` at t+0 + (status `active`), then **wait 14s with no further activity** → session + **stays `active`** (not stale/unresponsive). The 10s rule governs only the + *initial* ack; once a `thought` lands, an arbitrarily long gap before the + next activity is fine. ABCA's webhook can emit the `thought` synchronously + (exactly like today's 👀) and let the >10s async spawn proceed — **no + architectural conflict.** +- **Full lifecycle derives correctly**, matching the mapping table below: + `thought`→active, `action`→active, `action`+result→active, + `response`→**complete**; on a second session `elicitation`→**awaitingInput**, + `error`→**error**. All five emittable types accepted; states auto-derive + from the last activity. (`AgentActivityContent` is a union — + `AgentActivityActionContent`/`…ElicitationContent`/`…ErrorContent`/etc. — so + each type persists as a distinct typed record.) + +Conclusion: the **trigger/ack half is fully validated** against the live +Preview API. The remaining gate for an actual additive channel is unchanged — +it's the per-issue-session vs. cross-issue-epic-rollup gap (engine stays ours) +plus the Preview→GA stability wait, NOT any technical blocker we found. The +spike issues were created + deleted; no migration code written. + +> **⚠️ The enablement toggle is NOT a side-effect-free no-op (2026-06-17).** +> Leaving "Agent session events" ON after the spike means **every `@bgagent` +> mention now also spawns a native agent session** that Linear expects answered +> via `agentActivityCreate` within 10s. Our deployed code answers on the +> **comment** path (👀 + reply) and emits no session activity, so the session +> gets zero activities, goes `stale`, and Linear surfaces a misleading +> **"bgagent did not respond"** banner — even though the comment reply posted +> fine (observed live on ABCA-310: reply at t+2s, session `stale`, activities +> `[]`). **Consequence for phasing:** adoption is *not* "additive alongside the +> comment path for free" — once the toggle is on, mentions route to sessions +> and the adapter MUST emit activities or every mention looks dead. So the +> toggle stays **OFF** until the flag-gated adapter (Phase 2 below) ships in the +> same change that flips it. Interim action after the spike: **turn the toggle +> off** (app owner, Settings → API → Applications). + +### Why a channel, not a rewrite + +- The win is **real but partial**: agent sessions retire the brittle + *trigger + per-comment ack* seam (the bug class above), but Linear agent + sessions are **per-issue delegations with no native cross-issue epic + rollup**. The #247 parent-epic panel, fan-out integration node, dependency + cascade, and base-branch stacking stay ABCA's responsibility either way — + so roughly half of the recent bug classes (panel settle, cross-issue + concurrency) are unaffected by the migration. +- The Agents API is a **Developer Preview** (confirmed against + `developers.linear.app`, 2026-06-17): "in active development… may change + before GA." Ripping out a working, now-hardened comment path to depend on + an unstable API is the wrong trade today. +- Treating it as an additive channel behind a flag (per ADR-006) lets us + reuse the channel-agnostic engine, run both paths side by side during + evaluation, and revert via the flag if the Preview API shifts. + +## Consequences + +- **Positive:** removes the highest-friction seam (string-match trigger + + hand-rolled threading/reactions); native progress UI; conversation-history + retry replaces our bespoke loop; no auth work (already app-actor). +- **Negative / risk:** Preview API churn; hard runtime constraints (webhook + receiver must return within ~5s; an activity or external URL must be + emitted within ~10s of `created` or the session is marked unresponsive) — + ABCA's task spawn is async and slower than 10s, so the `created` handler + must emit an immediate `thought` ack and hand off, exactly as the current + processor 👀s then spawns. +- **No-op surfaces:** the orchestration engine, panel/rollup renderer, + reconciler, cascade, and base-branch logic are untouched by this decision. + +## Phasing + +1. **Now (this ADR):** record the decision; auth verified; do not build. + Keep the hardened comment path as the sole Linear interaction channel. +2. **When Linear GAs the Agents API:** spike a flag-gated `agent-session` + trigger/ack adapter behind the existing channel-agnostic engine — + `created`→seed/iterate, activities↔our ack states — running in parallel + with the comment path on `backgroundagent-dev`. +3. **After evaluation:** if the native path is strictly better, default the + flag on and deprecate the `@bgagent` string-match trigger; keep the + panel/rollup engine. + +## Out of scope (this ADR) + +- Any implementation. This is a direction + go/no-go record only. +- Changes to the orchestration engine, OAuth/token storage (done, ADR-016 + governs pluggable identity), or the Slack/Jira channels. +- The Mode B planner (#299) — orthogonal. + +## References + +- `cli/src/linear-oauth.ts` — `actor=app`, `app:assignable`/`app:mentionable` +- `cdk/src/handlers/linear-webhook-processor.ts` — current comment trigger + acks +- ADR-006 (feature flags), ADR-015 (Jira integration), ADR-016 (pluggable identity and auth) +- Linear Agents API — `https://linear.app/developers/agents`, + `https://linear.app/developers/agent-interaction` (Developer Preview, fetched 2026-06-17) +- #247 UX.16–23 — the comment-path bug classes this would retire diff --git a/docs/design/SECURITY.md b/docs/design/SECURITY.md index 4066f578..464d8668 100644 --- a/docs/design/SECURITY.md +++ b/docs/design/SECURITY.md @@ -54,7 +54,7 @@ Input screening happens at two points in the pipeline, forming a defense-in-dept ### Submission-time screening - **Input validation** - Required fields, types, and size limits are enforced before any processing. Task descriptions are capped at 10,000 characters. -- **Bedrock Guardrails** - A `PROMPT_ATTACK` content filter at `MEDIUM` input strength screens task descriptions for prompt injection. +- **Bedrock Guardrails** - A `PROMPT_ATTACK` content filter at `MEDIUM` input strength screens task descriptions for prompt injection. `MEDIUM` is deliberate: `HIGH` (which also blocks LOW-confidence) false-positives on ordinary imperative task descriptions ("make no changes, just inspect…", "ignore the legacy config and migrate…"). A 2026-06 empirical pass against the live guardrail confirmed `MEDIUM` blocks the prompt-injection class (instructions to ignore/override/reveal the system prompt, exfiltrate credentials) while passing benign imperatives with no false positives. **Scope:** this filter catches *attacks on the model*, not *destructive-but-honest task requests* (e.g. "delete .github/workflows and force-push to main") — those are not prompt injection and are intentionally NOT this layer's job. They are caught downstream at the agent tool-use layer by the Cedar HITL gates (`force_push_main`, `write_git_internals`, `rm_rf_root`; see [CEDAR_HITL_GATES.md](./CEDAR_HITL_GATES.md)). Input screening + Cedar tool gates are complementary layers, not redundant. - **Attachment screening** - All attachments (images, text files, URLs) pass through security screening before reaching the agent. Images (PNG and JPEG only) are validated via magic bytes and dimension checks, then screened through Bedrock Guardrails (image content blocks). Text files and PDFs are extracted and screened through Bedrock Guardrails text content screening. URL attachments undergo SSRF protection (DNS resolution pinning, private IP blocking, redirect validation) and content screening during hydration. See [ATTACHMENTS.md](./ATTACHMENTS.md) for the full screening pipeline. - **Fail-closed** - If the Bedrock API is unavailable, submissions are rejected (HTTP 503). Unscreened content never reaches the agent. diff --git a/docs/guides/DEVELOPER_GUIDE.md b/docs/guides/DEVELOPER_GUIDE.md index 79dfdf39..4cef3144 100644 --- a/docs/guides/DEVELOPER_GUIDE.md +++ b/docs/guides/DEVELOPER_GUIDE.md @@ -81,12 +81,22 @@ new Blueprint(this, 'MyServiceBlueprint', { systemPromptOverrides: 'Extra instructions...', // appended to the platform prompt }, credentials: { githubTokenSecretArn: '...' }, // per-repo GitHub token secret - pipeline: { pollIntervalMs: 5000 }, // poll interval awaiting completion + pipeline: { + pollIntervalMs: 5000, // poll interval awaiting completion + buildCommand: 'npm run build && npm test', // build/test verification (default: mise run build) + lintCommand: 'npm run lint', // lint verification (default: mise run lint) + }, }); ``` If you use a custom `compute.runtimeArn` or `credentials.githubTokenSecretArn`, pass the ARNs to `TaskOrchestrator` via `additionalRuntimeArns` and `additionalSecretArns` so the Lambda has IAM permission. See [Repo onboarding](../design/REPO_ONBOARDING.md) for the full model. +#### Build-regression gating (important for non-mise repos) + +Before opening a PR, the agent runs a **build** and **lint** command in its cloud container — once on the clean clone (baseline) and again after its changes. If the build was green before and fails after, the task fails (a build-**regression** gate). This is a compile/test verification, **not** a deployment — your app's actual deploy stays in your own CI/CD after the PR merges. + +The command defaults to **`mise run build`** / **`mise run lint`**. A repo that uses [mise](https://mise.jdx.dev/) with `build` / `lint` tasks gets gating for free. A repo that uses npm, gradle, cargo, make, etc. **must set `pipeline.buildCommand`** (and optionally `lintCommand`) to its real command — otherwise the default `mise run build` finds no task, **build-regression gating is silently OFF, and a change that breaks the build still reports success**. When that happens the agent surfaces a `⚠️ Build-regression gating is OFF` warning on the PR so the gap is visible, but the fix is to configure the command. For #247 orchestration this matters doubly: dependent sub-issues stack onto a predecessor's branch, so an unverified broken predecessor propagates downstream. + Redeploy after changing Blueprints: `mise //cdk:deploy`. ### Customizing the agent image diff --git a/docs/guides/LINEAR_SETUP_GUIDE.md b/docs/guides/LINEAR_SETUP_GUIDE.md index 423bd63a..fe502ee5 100644 --- a/docs/guides/LINEAR_SETUP_GUIDE.md +++ b/docs/guides/LINEAR_SETUP_GUIDE.md @@ -65,6 +65,11 @@ bgagent linear webhook-info This prints the URL and values to paste into Linear. Open `https://linear.app/<slug>/settings/api/webhooks` and create the webhook with those values. +Under **Resource types**, enable both **Issues** and **Comments**: + +- **Issues** — label-triggered tasks and parent/sub-issue epic orchestration. +- **Comments** — the `@bgagent` re-iteration trigger: a reviewer comments `@bgagent <change>` on a sub-issue and ABCA updates that sub-issue's PR, then re-stacks its dependents. Without the Comments subscription this trigger silently never fires. + Then open the webhook detail page and copy the **signing secret** (`lin_wh_…`). ### 5. Tell ABCA the signing secret @@ -148,12 +153,61 @@ The fallback path keeps existing single-workspace deployments working without re **Trust model.** The `organizationId` in the body is attacker-controlled, but it only **selects** which secret to verify against; an attacker still needs the matching signing secret to forge a valid signature. Cross-workspace impersonation is prevented by the no-fallback-on-mismatch rule. +## Attachments and documents + +Beyond the issue title and description, Linear stores additional context the agent may need: + +- **Paperclip attachments** (PDFs, logs, spec files attached to an issue) +- **Project documents** (Linear's wiki-style docs attached to a project) +- **Comments posted after the task starts** (clarifications, approve / deny signals) + +ABCA does not pre-fetch this material into S3 or run it through Bedrock Guardrails — it stays in Linear, and the agent fetches it on demand at runtime via the Linear MCP. Concretely: + +- The webhook processor calls Linear's GraphQL API once per triggered issue to check for paperclip attachments and project documents. If anything is present it prepends a one-line hint (`Linear may have additional context for this issue: …`) to the task description, naming the relevant MCP tools. +- The agent's system prompt addendum tells it to call `mcp__linear-server__get_issue` for the full issue (including the `attachments` connection), `mcp__linear-server__get_attachment` per paperclip, `mcp__linear-server__list_documents` / `get_document` for project wikis, and `mcp__linear-server__list_comments` before opening the PR to pick up new comments. + +No additional setup is required — once Linear MCP is wired (steps above), this works automatically. Only embedded markdown images in the issue description (`![alt](https://…)`) are still pre-fetched and screened at task-creation time, because they enter the agent's context as URL attachments. + ## Usage - **Trigger a task**: apply the trigger label to an issue in a mapped Linear project. The issue title + description becomes the task description. - **Check status**: from the Linear issue (progress comments) or `bgagent list` / `bgagent status <task-id>`. - **Cancel**: `bgagent cancel <task-id>`. Removing the Linear label does not cancel a running task. +## Parent/sub-issue orchestration + +If you apply the trigger label to a **parent issue that has sub-issues**, ABCA orchestrates the whole epic instead of creating one task: + +1. **Discovery** — it reads the sub-issues and their `blocked by` / `blocking` relations, builds a dependency graph (DAG), and rejects cycles with a terminal comment on the parent. +2. **Dependency-ordered execution** — root sub-issues (no blockers) start immediately; a blocked sub-issue does not start until **all** its blockers reach terminal-success (a sub-issue that completes but fails its build does **not** release its dependents). Independent sub-issues run in parallel. +3. **Stacked PRs** — a sub-issue with a single predecessor branches from that predecessor's branch (so it sees its code before merge); a sub-issue with multiple predecessors branches from the default branch and merges all predecessor branches in. Review/merge the resulting stack bottom-up. +4. **Rollup** — when every sub-issue reaches a terminal state, ABCA posts an aggregate **rollup comment on the parent** (succeeded / failed / skipped counts + per-child status). Each sub-issue also gets its own final-status comment. +5. **Failure handling** — if a sub-issue fails (or is cancelled), its transitive dependents are **skipped** (never started); independent siblings still finish. The parent rollup reflects the partial outcome. + +Notes and current limitations: + +- The parent issue itself spawns **no task** — a human-authored sub-issue graph is treated as consent to execute. +- **No "cancel the whole epic" button yet.** Cancelling an individual sub-issue's task (`bgagent cancel <task-id>`) stops it and skips its dependents, but there is no single command to cancel a whole in-flight orchestration. Tracked as a follow-up. +- A scheduled backstop (every ~10 min) recovers sub-issues whose terminal events were lost during a transient outage, so a stalled orchestration self-heals rather than hanging. +- Multi-predecessor ("diamond") sub-issues merge their predecessors' branches at start time; if a predecessor is later edited in review, re-integration of the dependent is a tracked follow-up. + +## Parent/sub-issue orchestration + +If you apply the trigger label to a **parent issue that has sub-issues**, ABCA orchestrates the whole epic instead of creating one task: + +1. **Discovery** — it reads the sub-issues and their `blocked by` / `blocking` relations, builds a dependency graph (DAG), and rejects cycles with a terminal comment on the parent. +2. **Dependency-ordered execution** — root sub-issues (no blockers) start immediately; a blocked sub-issue does not start until **all** its blockers reach terminal-success (a sub-issue that completes but fails its build does **not** release its dependents). Independent sub-issues run in parallel. +3. **Stacked PRs** — a sub-issue with a single predecessor branches from that predecessor's branch (so it sees its code before merge); a sub-issue with multiple predecessors branches from the default branch and merges all predecessor branches in. Review/merge the resulting stack bottom-up. +4. **Rollup** — when every sub-issue reaches a terminal state, ABCA posts an aggregate **rollup comment on the parent** (succeeded / failed / skipped counts + per-child status). Each sub-issue also gets its own final-status comment. +5. **Failure handling** — if a sub-issue fails (or is cancelled), its transitive dependents are **skipped** (never started); independent siblings still finish. The parent rollup reflects the partial outcome. + +Notes and current limitations: + +- The parent issue itself spawns **no task** — a human-authored sub-issue graph is treated as consent to execute. +- **No "cancel the whole epic" button yet.** Cancelling an individual sub-issue's task (`bgagent cancel <task-id>`) stops it and skips its dependents, but there is no single command to cancel a whole in-flight orchestration. Tracked as a follow-up. +- A scheduled backstop (every ~10 min) recovers sub-issues whose terminal events were lost during a transient outage, so a stalled orchestration self-heals rather than hanging. +- Multi-predecessor ("diamond") sub-issues merge their predecessors' branches at start time; if a predecessor is later edited in review, re-integration of the dependent is a tracked follow-up. + ## Troubleshooting ### Webhook doesn't trigger a task @@ -181,11 +235,11 @@ If the failing event's `organizationId` doesn't match any registered workspace a ### "Invalid redirect_uri parameter for the application" during step 3 -Linear's misleading error for `actor=app` flows where the OAuth app config is incomplete. In your Linear app settings: +Linear's misleading error for `actor=app` flows where the OAuth app config is incomplete (it reports `Invalid redirect_uri` regardless of which required field is actually missing). In your Linear app settings, confirm: -- **GitHub username** must end with `[bot]` (e.g. `bgagent[bot]`) -- **Webhooks** toggle must be ON -- The Callback URL must be on a **single line** (line-wrapped URLs become two malformed entries Linear silently rejects) +- **GitHub username** is filled in (Linear's inline help describes the field and the `[bot]` suffix) — a blank value triggers this error. +- **Webhooks** toggle is ON. +- The Callback URL is on a **single line** (line-wrapped URLs become two malformed entries Linear silently rejects). Re-run `bgagent linear setup` after fixing. diff --git a/docs/guides/ROADMAP.md b/docs/guides/ROADMAP.md index 3e8aa438..119c42a1 100644 --- a/docs/guides/ROADMAP.md +++ b/docs/guides/ROADMAP.md @@ -85,6 +85,7 @@ What's shipped and what's coming next. - [x] **Slack integration** - @mention task submission, `bgagent slack link` / `setup`, file attachments on submit, threaded progress notifications. See [SLACK_SETUP_GUIDE.md](./SLACK_SETUP_GUIDE.md) - [x] **Linear integration** - Label-triggered tasks, `bgagent linear setup` / `link`, progress comments on issues. See [LINEAR_SETUP_GUIDE.md](./LINEAR_SETUP_GUIDE.md) +- [x] **Linear parent/sub-issue orchestration** - Label a parent issue with sub-issues and a `blocked by` graph; ABCA builds a DAG (rejecting cycles), creates child tasks in dependency order, gates blocked children until predecessors succeed, stacks child PRs on their predecessors' branches, and posts an aggregate rollup comment on the parent. A scheduled backstop recovers children whose terminal events were lost. See [LINEAR_SETUP_GUIDE.md](./LINEAR_SETUP_GUIDE.md#parentsub-issue-orchestration) (#247) - [x] **Jira integration** - Label-triggered tasks on Jira Cloud, `bgagent jira setup` / `map` / `link`, progress comments via the Jira REST v3 API. See [JIRA_SETUP_GUIDE.md](./JIRA_SETUP_GUIDE.md) and [ADR-015](../decisions/ADR-015-jira-integration.md) ### Observability diff --git a/docs/research/a4-stacked-base-branch-design.md b/docs/research/a4-stacked-base-branch-design.md new file mode 100644 index 00000000..eb4b4140 --- /dev/null +++ b/docs/research/a4-stacked-base-branch-design.md @@ -0,0 +1,74 @@ +# A4 — base-branch targeting design (#247) + +Decided: **stacking, no delay, full code visibility**, and **multi-dep +(diamond) is first-class** (not deferred). + +## The uniform rule + +Every child branches so it *sees all its predecessors' code* without +waiting for a human merge: + +| Child shape | Base branch | Mechanism | PR diff | +|---|---|---|---| +| **0 predecessors** (root) | `main` | branch off main (today) | clean | +| **1 predecessor** (linear) | predecessor's branch | true stack (`base = pred branch`) | clean (only child's changes) | +| **N predecessors** (diamond) | `main` + merge all predecessor branches in | branch off main, octopus-merge predecessors | noisier (shows merged-in code until predecessors land) | + +Single-predecessor is the clean stacked-PR case. Multi-predecessor can't +target two bases, so the child branches off main and **merges its +predecessor branches into its own branch** before the agent starts — D +sees B's and C's code, starts as soon as both are task-complete (no human +merge needed). + +### Sub-decision: merge-into-D vs `bc` join branch +Build **merge-into-D directly** for MVP. The `bc` shared-join-branch +optimization only pays off when distinct children share the *same* +predecessor set; it adds no-PR/no-review branches + collapse bookkeeping. +Start with merge-into-D (always correct, no synthetic branches); add the +join-branch optimization later iff shared fan-in shows up in real epics. + +## Data flow (threading base + merge-list) + +1. **`createTaskCore`** — accept optional `base_branch` (string) and + `merge_branches` (string[]) on the request; persist onto TaskRecord. +2. **release path** (`releaseChild` / reconciler / #303 sweep) — when + releasing child C, look up each predecessor's `branch_name` from its + TaskRecord (predecessors are `succeeded`, so their branch is known): + - 1 predecessor → `base_branch = pred.branch_name`, `merge_branches = []` + - N predecessors → `base_branch = main`, `merge_branches = [all pred branches]` + - 0 → neither (root, today's behavior) +3. **orchestrator** — forward `base_branch` + `merge_branches` into the + agent payload (base_branch wiring mostly exists for PR tasks). +4. **agent `repo.py`** — for `new_task`: branch from `base_branch` if set + (today it ignores it for new_task); then `git merge` each + `merge_branches` entry. Conflict on a predecessor-merge → **agent + resolves it** (same ABCA-native stance as #305: it's a coding task; PR + review is the safety net). Fall back to a clear failure if unresolvable. +5. **agent `post_hooks.py`** — open the PR with `--base <base_branch>` + (today hardcoded to default branch for non-PR tasks). + +## Predecessor branch_name availability +A predecessor is `succeeded` before its dependent releases, so its +TaskRecord (and `branch_name`) exists. NOTE: `branch_name` is generated at +create-time as `bgagent/{task_id}/{slug}` and may be updated to the agent's +resolved head ref — the release path must read the CURRENT persisted +`branch_name`, not reconstruct it. + +## What this does NOT change +- Gating (release-on-predecessor-success) — unchanged from A1–A3. +- #303 backstop — already release-path-based, inherits base selection. +- Merge flow — humans still merge bottom-up (A5 docs); GitHub auto-retargets. + +## Open risk (flagged, not blocking) +Multi-predecessor merge-into-D re-merge churn: if B is edited in review +after D merged it in, D's branch has stale B. This is the same restack +concern #305 (auto-restack) addresses — multi-dep children are in scope +for that follow-up's re-merge handling. + +## Build order +1. base-selection logic (pure, testable) — given predecessor rows, return + `{base_branch, merge_branches}`. ← start here +2. `createTaskCore` + types: accept/persist the fields. +3. release path: compute selection from predecessor TaskRecords, pass through. +4. agent `repo.py` + `post_hooks.py`: honor base + merge for new_task. +5. orchestrator forwarding + tests + synth. diff --git a/docs/research/orchestration-branch-maintenance-design.md b/docs/research/orchestration-branch-maintenance-design.md new file mode 100644 index 00000000..b0fe3607 --- /dev/null +++ b/docs/research/orchestration-branch-maintenance-design.md @@ -0,0 +1,214 @@ +# Orchestration branch maintenance design (#247 → #305/A6 + #16) + +**Status:** proposed (design only — not built). Sequenced after the verified +A1–A5 executor + A4 stacking + the parent lifecycle / trigger-agnostic work. + +Two related gaps in stacked orchestration, both **post-DAG-creation branch +maintenance**, hence one design: + +- **#16 — combined result for fan-out.** When an epic's DAG has multiple + *leaves* (sub-issues with no successors), there is no single artifact that + combines them. Each leaf is an independent PR; nothing shows "everything + together." +- **#305 / A6 — re-stack on predecessor change.** A4 merges a predecessor's + code into a dependent child *once, at child-creation time*. If the + predecessor's PR is **edited after** the dependent already merged it in, + the dependent goes stale (it has old predecessor code). + +A4 handles *initial* base/merge selection; this design handles *keeping that +relationship correct over the epic's life* and *guaranteeing one combined +result*. + +--- + +## Part 1 — #16: auto-integration node for fan-out leaves + +### Decision + +When a validated DAG has **more than one leaf**, the platform appends a +**synthetic integration node** that depends on all leaves. It is a diamond +fan-in over the leaves, so it reuses A4's existing multi-predecessor merge +**unchanged** — its branch is cut from `main` and every leaf branch is +merged in, producing one combined PR/preview. That PR *is* the "see it all +together" artifact, surfaced on the parent epic. + +| Case | Today | With #16 | +|---|---|---| +| linear chain (1 leaf) | last node is cumulative ✓ | unchanged — no integration node added | +| explicit diamond (1 fan-in leaf) | fan-in node is the combined result ✓ | unchanged — already 1 leaf | +| **pure fan-out (N leaves)** | N independent PRs, **no combined result** | synthetic integration node merges all N → 1 combined PR | + +### Where it's injected + +`orchestration-discovery.ts`, **after `validateDag` succeeds, before +`seedOrchestration`** — NOT in the graph-source layer (graph sources are +channel-agnostic producers; "compute leaves + integrate" is an +orchestration concern that needs the validated DAG shape). `validateDag` +exposes roots (`layers[0]`) but not leaves, so compute leaves here: +a leaf is any node id that appears in no other node's `depends_on`. + +``` +children = graphSource() # tier 1/2/3 (#11) +validateDag(children) # cycle / dangling / dup +leaves = nodesWithNoSuccessors(children) +if leaves.length > 1: + children += syntheticIntegrationNode(depends_on = leaves) + validateDag(children) # re-validate: still acyclic, no dangles +seedOrchestration(children) +``` + +### Synthetic node shape + +``` +id: `${orchestrationId}#integration` (NOT a real Linear issue id) +depends_on: [all leaf ids] +title: "Integration — combine sub-issue results" +identifier: undefined +``` + +It flows through the whole pipeline unchanged. Verified seam-by-seam: + +| Seam | Behaviour with synthetic node | +|---|---| +| `selectBaseBranch` (diamond) | N predecessors → base `main` + merge all leaf branches. **Reused as-is.** | +| `repo.py` `_merge_predecessor_branch` | merges each leaf branch into the integration branch (conflict → abort + note, agent resolves). **Reused as-is.** | +| release / `createTaskCore` | normal child release; idempotency key `${orch}_${orch}#integration`. `sub_issue_id` is an opaque DDB SK — any string works. | +| status block / rollup render | label falls back to `title` when `linear_identifier` is absent → renders "Integration — …". **Graceful.** | +| **agent reactions** (`linear_reactions.py`) | 👀/✅/❌ `reactionCreate(issueId=<synthetic>)` **fails 4xx** — there's no real Linear issue. Already best-effort/advisory (logged, never gates the task). **Acceptable graceful-degrade.** | + +### Open sub-decisions (#16) + +1. **Integration task description.** It's a merge-and-reconcile task, not a + feature task. Description should tell the agent: "all sub-issue branches + are merged into your branch; resolve any conflicts, ensure the combined + result builds, open a PR." Likely wants its own workflow + (`coding/integration-v1`) rather than `coding/new-task-v1`, so the prompt + is merge-focused. (TBD — could start with new-task-v1 + a templated + description.) +2. **Where the combined result shows on the parent.** The rollup/status + block should link the integration node's PR as the headline "combined + result" (vs. the per-leaf PRs). Small render change. +3. **Skip when a single leaf already integrates.** Linear chains + explicit + diamonds already have one leaf — no node added (the `leaves.length > 1` + guard). Confirm we never double-integrate. + +--- + +## Part 2 — #305 / A6: re-stack on predecessor change + +### The staleness + +A4 merges predecessor code into a dependent **once**, when the dependent is +released. Lifecycle that breaks it: + +1. Child D released; A4 merges predecessor B's branch into D. D's PR is correct. +2. Reviewer asks B's author (the agent or a human) for changes; **B's branch + gets new commits**. +3. D still has B's *old* code. D's PR is now stale — it will conflict or ship + wrong behaviour when merged. + +### Detection: webhook (primary) + sweep (backstop) + +| | Webhook | Sweep | +|---|---|---| +| trigger | `pull_request: synchronize` (new commits on a PR) | scheduled scan (extend `reconcile-stranded-orchestrations`) | +| latency | seconds | minutes | +| role | **primary** | recovery (missed/failed webhooks) | + +The GitHub webhook receiver (`github-webhook.ts`) today handles **only** +`deployment_status`; it's a general signed App webhook, so adding a +`pull_request` branch is a filter + parse + dispatch extension, not new +infra. The sweep already iterates all orchestrations and can compare each +released child's predecessor head SHA against what the child last merged. + +### The missing lookup (required for either path) + +There is **no PR/branch → orchestration-child index** today (only +`ChildTaskIndex` on `child_task_id`). When a `pull_request` event arrives we +have the head branch; we must find *which orchestration children depend on +the sub-issue whose branch this is*. Options: + +1. **New sparse GSI on `child_branch_name`** — O(1) "who is on this branch", + then walk the orchestration's rows for dependents. **Recommended.** +2. Parse `{taskId}` out of the `bgagent/{taskId}/...` branch and use the + existing `ChildTaskIndex`. Fragile if the agent renamed the branch (see + the session's branch-discipline fixes) — but post-fix the branch is the + provisioned one, so viable as a fallback. + +### The re-stack action — reuse, don't reinvent + +A re-stack of dependent D against changed predecessor B is: fetch B's new +branch, merge it into D's branch, push. This is **exactly** +`_merge_predecessor_branch` again, run as a follow-up task on D's existing +branch. So model it as a **`coding/restack-v1` workflow** that uses the +`pr_iteration` family's `ensure_pr(push_resolve)` strategy (push follow-up +commits to the existing PR branch, resolve the existing PR URL — no new PR). + +Idempotency key includes the predecessor SHA so the same predecessor update +doesn't re-stack twice: `restack_${orch}_${childSub}_${predHeadSha}`. + +### The key design call: conflict → agent, NOT human + +When the re-merge **conflicts**, do **not** escalate to a human approval +gate. Spawn the re-stack as a normal agent task whose job is to resolve the +conflict and push — **PR review is the safety net** (a human reviews the +re-stacked PR like any other). This matches the existing +`_merge_predecessor_branch` philosophy (abort the raw merge, hand the agent +a clean tree + a note) and avoids turning every predecessor edit into a +human interrupt. Rationale: the agent already resolves merge conflicts as +part of normal work; a stale-dependent is a coding task, not a policy +decision. + +### Cascade + bounding + +- A re-stack of D pushes new commits to D → if D itself has dependents, they + are now stale → cascade. Re-stack walks **down** the DAG from the changed + node, re-stacking each dependent in topo order. +- **Bound the cascade**: an idempotency key per (child, predecessor-SHA) + prevents loops; a per-orchestration re-stack budget (mirror the + approval-gate cap) prevents a thrash storm if PRs are being rapidly edited. +- Re-stack only **released, non-terminal-merged** children. A child whose PR + is already merged to main is out of the stack — leave it (its code is in + main; GitHub's auto-retarget-on-delete handles the rest, per ADR-001 §8). + +### What this does NOT do + +- Not auto-**merge** the stack — merge stays human + bottom-up (ADR-001 §8/§9). +- Not re-stack on every `push` — only `pull_request: synchronize` on a branch + that is a *predecessor of a still-open dependent in an active orchestration*. + +--- + +## Build order + +1. **#16 first** (small, self-contained, no new infra): leaf computation + + synthetic node in discovery; render the integration PR as the combined + result on the parent; tests (multi-leaf → node added, single-leaf → + not, synthetic node renders, reuses diamond merge). Live-verify with a + pure-fan-out epic. +2. **#305 lookup**: add the `child_branch_name` GSI; PR→child resolver. +3. **#305 detection**: extend `github-webhook.ts` for `pull_request: + synchronize`; dispatch to a re-stack handler; mirror into the sweep as + backstop. +4. **#305 action**: `coding/restack-v1` workflow (push_resolve + re-merge); + cascade in topo order; idempotency + budget bound; conflict → agent task. + +## Open risks + +- **Re-stack thrash** during active review of an early predecessor — bounded + by the per-(child,SHA) idempotency key + per-orchestration budget, but + worth a metric + cap-fires log. +- **Synthetic-node identity** leaks into any future code that assumes + `sub_issue_id` is a real Linear issue — guard with a clear + `#integration`-suffixed id and a helper `isSyntheticNode()`. +- Diamond re-merge conflict resolution quality is only as good as the agent; + PR review remains the gate (by design). + +## References + +- `docs/research/a4-stacked-base-branch-design.md` — the initial stacking it extends +- `docs/decisions/ADR-001-stacked-pull-requests.md` §8/§9 — merge semantics + #247 extension +- `cdk/src/handlers/shared/orchestration-base-branch.ts` — `selectBaseBranch` (reused) +- `cdk/src/handlers/shared/orchestration-discovery.ts` — injection point for #16 +- `cdk/src/handlers/github-webhook.ts` — webhook to extend for #305 +- `cdk/src/handlers/reconcile-stranded-orchestrations.ts` — sweep backstop diff --git a/docs/research/orchestration-reconciler-correctness.md b/docs/research/orchestration-reconciler-correctness.md new file mode 100644 index 00000000..cfd417c5 --- /dev/null +++ b/docs/research/orchestration-reconciler-correctness.md @@ -0,0 +1,181 @@ +# Orchestration reconciler — correctness as a proof problem (#247) + +A worksheet for reasoning about the Mode A reconciler's gating logic +rigorously, rather than patching failures one at a time. Work the proof +obligations + adversarial schedules below by hand; each is a place a bug +can hide. Known findings (from the integration test) are listed at the +end — try to *derive* them before reading. + +--- + +## 1. The model + +**State.** An orchestration is a DAG of children. Each child `c` has: +- `deps(c)` ⊆ children — its predecessors (immutable after discovery), +- `status(c) ∈ {blocked, ready, released, succeeded, failed, skipped}`, +- at most one `task(c)` (an ABCA task), created when released. + +Persisted in DynamoDB: one row per child (PK `orchestration_id`, SK +`sub_issue_id`), plus a `#meta` row. A `ChildTaskIndex` GSI maps +`task_id → row`. + +**Events.** The only inputs are **terminal task events** arriving on the +TaskTable stream: `complete(c, build_passed)`, `fail(c)`, +`cancel(c)`, `timeout(c)`. Each is delivered **at least once** (stream +redelivery) and events for *different* children may be processed +**concurrently** by separate Lambda invocations. Roots are released once +at seed time (separate path). + +**Success predicate.** `succ(c) ≝ status(c)=succeeded`, set only by a +`complete(c, true)`. (`complete(c,false)` → `failed`; see Obligation O3.) + +**Release rule (intended).** A child `c` becomes releasable iff +`status(c)=blocked ∧ ∀d∈deps(c): succ(d)`. Releasing creates `task(c)` +and sets `status(c)=released`. + +**Operations available** (their atomicity matters): +- `Put(item, cond)` — conditional put, atomic. +- `Update(key, set, cond)` — conditional update, atomic per item. +- `Query(partition | GSI)` — **not** atomic with any write. +- `createTaskCore(...)` — internally does `Query(IdempotencyIndex)` then + `Put(cond: attribute_not_exists(task_id))`. **Check-then-act across two + calls → NOT atomic.** A new `task_id` (ulid) is minted each call, so the + `attribute_not_exists` condition does **not** dedup two calls with the + same idempotency key. + +--- + +## 2. Invariants to preserve (state these as ∀-properties) + +- **I1 (no premature start):** if `status(c)∈{released,succeeded}` then at + the moment of release `∀d∈deps(c): succ(d)`. +- **I2 (exactly-once task):** at most one `task(c)` is ever created per `c`. +- **I3 (no lost release):** if at any quiescent point + `∀d∈deps(c): succ(d)` and `status(c)=blocked`, then eventually `c` is + released. (Liveness — no stranding.) +- **I4 (terminal monotonicity):** `succeeded/failed/skipped` are terminal; + no event moves `c` out of them. +- **I5 (failure closure):** if `∃d∈deps*(c)` (transitive) with + `status(d)∈{failed,skipped}` then `c` is eventually `skipped`, never + released. (No child runs on a failed predecessor.) +- **I6 (completion soundness):** the orchestration is reported complete iff + `∀c: status(c)∈{succeeded,failed,skipped}`. + +--- + +## 3. Proof obligations + +For the reconcile procedure `R(e)` run per event `e`, prove each holds +under **(a)** single-threaded sequential delivery, **(b)** at-least-once +redelivery, **(c)** concurrent delivery of distinct-child events. + +- **O1.** `R` preserves I1. *(Does the release decision read a state in + which all deps are truly `succeeded`, or a stale snapshot?)* +- **O2.** `R` preserves I2 under (c). *(If two events each conclude `c` is + releasable, how many `task(c)` get created? Which step is the + serialization point — the row flip or the task create? Does the + serialization point come **before** or **after** the irreversible + `createTaskCore`?)* +- **O3.** `complete(c, false)` is treated as `fail(c)` for all of + I1/I5. *(Build-passed gate.)* +- **O4.** `R` preserves I3 under (c). *(The "diamond race": `d∈deps(D)` and + `e∈deps(D)` complete concurrently; each invocation persists only its own + child as succeeded. Construct a schedule where **neither** invocation + sees both `succ(d)∧succ(e)` → D stranded. What read ordering defeats + it?)* +- **O5.** `R` preserves I2 **and** I3 simultaneously. *(This is the crux: + O4's fix — "re-read fresh and release if all deps succeeded" — can + reintroduce O2 violations. Show whether your `R` can satisfy both, or + prove they require a single atomic compare-and-release.)* +- **O6.** Redelivery of an already-processed `e` is a no-op (idempotent). +- **O7.** Termination: `R` halts and the DAG reaches all-terminal in + finite events (no infinite re-release loop). + +--- + +## 4. Adversarial schedules to run by hand + +Use `▸` for "invocation reads", `✎` for "invocation writes". Two +invocations P, Q. Find the interleaving that breaks an invariant. + +**S1 — diamond, simultaneous (O4):** D deps {B,C}, both `released`. +Events `complete(B,true)`, `complete(C,true)` processed by P, Q. +``` +P▸snapshot{B:released,C:released,D:blocked} +Q▸snapshot{B:released,C:released,D:blocked} +P✎ B:=succeeded +Q✎ C:=succeeded +P: in P's snapshot, C≠succeeded → P does NOT release D +Q: in Q's snapshot, B≠succeeded → Q does NOT release D +⇒ D stranded blocked, both deps succeeded. I3 violated. +``` +Fix attempt: each invocation, after writing its own child, RE-READS. +Re-derive — does re-read alone guarantee someone sees both? (Hint: depends +whether the re-read happens-after both writes; construct the schedule where +both re-reads still precede the other's write.) + +**S2 — double release (O2/O5):** continue S1 with the re-read fix, where +both re-reads DO see {B:succeeded, C:succeeded}. +``` +P▸fresh{B:succ,C:succ,D:blocked} → P decides release D +Q▸fresh{B:succ,C:succ,D:blocked} → Q decides release D +P✎ createTaskCore(D) → task_P (idempotency Query saw nothing yet) +Q✎ createTaskCore(D) → task_Q (idempotency Query saw nothing yet) +P✎ flip D:blocked→released (cond) ✓ +Q✎ flip D:blocked→released (cond) ✗ ConditionalCheckFailed +⇒ TWO tasks created, one orphaned. I2 violated. +``` +Question: reorder so the **conditional flip precedes the task create**. +Does flip-then-create satisfy I2? What new failure does it admit (crash +between flip and create → I3 / stranded `released`-with-no-task)? Is that +recoverable by the #303 stranded sweep? State the trade. + +**S3 — redelivery during release (O6):** `complete(B,true)` delivered +twice, processed by P then Q after P fully finished. Show I2/I3 hold. + +**S4 — failed leg + concurrent success (O5×O3):** D deps {B,C}; +`complete(B,true)` and `fail(C)` concurrent. Show D ends `skipped`, never +released, regardless of interleaving, AND B ends `succeeded`. + +**S5 — skip vs release ordering:** A fails; B deps {A}; C deps {B}. +`fail(A)` and a stale `complete`-driven attempt to release B race. Show C +never starts. + +--- + +## 5. The central design question (decide, then prove) + +The irreversible action is `createTaskCore`. I2 (exactly-once) requires a +**single serialization point that gates the irreversible action**. Options: + +1. **create-then-flip** (current): create always happens; flip dedups the + row. → I2 broken under concurrency (S2). I3 safe. +2. **flip-then-create**: only the invocation that wins the conditional + `blocked→released` flip calls createTaskCore. → I2 safe (one winner). + New risk: crash/throw after flip, before create → `released` row, no + task → I3 needs the #303 stranded sweep to recover (re-create for a + `released` row with no live task). +3. **atomic claim**: flip `blocked→releasing` (cond) as the claim; winner + creates + sets `released`+`task_id`; sweep recovers stuck `releasing`. + A 3-state version of (2). + +Prove which of {2,3} gives I2 ∧ I3 (with the sweep as the I3 backstop), +and whether (1) is salvageable at all under at-least-once + concurrent +delivery. The integration test `concurrent predecessors (wired)` is the +executable witness for S2. + +--- + +## 6. Known findings (try to derive before reading) + +- **F1 (= S1):** stale-snapshot release decision strands D under + simultaneous predecessor completion. *Lost update.* (Fixed attempt: + re-read fresh.) +- **F2 (= S2):** the re-read fix then admits double task creation, because + `createTaskCore` idempotency is check-then-act (non-atomic) and + `releaseChild` is create-then-flip, so the flip (the only serialization + point) happens *after* the irreversible create. *Double create.* +- **Open:** adopt flip-then-create (Option 2/3) so the conditional flip is + the gate, with #303's stranded sweep as the I3 backstop for a + crash-after-flip. Prove I2 ∧ I3 for the chosen option, then encode S1–S5 + as tests. diff --git a/docs/research/stacked-pr-merge-practices.md b/docs/research/stacked-pr-merge-practices.md new file mode 100644 index 00000000..9f7cc176 --- /dev/null +++ b/docs/research/stacked-pr-merge-practices.md @@ -0,0 +1,118 @@ +# Stacked PR merge practices — research findings (#247 A4/A5) + +> Compiled 2026-06-10 to settle how ABCA's Linear orchestration (Mode A) +> should structure child PRs and how they get merged. Sources are +> live-fetched (URLs inline). Where a claim is industry practice rather +> than documented behavior, it is labelled. + +## TL;DR for #247 + +- **Children stack: PR-A → main, PR-B → A's branch, PR-C → B's branch.** +- **A downstream PR does NOT wait for upstream PRs to merge.** Because + C's branch is cut from B's (which was cut from A's), C's branch + *already physically contains* A's and B's commits. C's author/agent + works on top of them immediately; C's PR *diff* shows only C's changes + (diffed against B). Review status of A/B is irrelevant to this. +- **Merge is bottom-up, one PR at a time** — NOT "merge the top and the + whole stack lands." Merge A, then B, then C. +- **GitHub auto-retargets** the dependent PRs as lower ones merge — but + only **when you delete the merged head branch** (see exact quote). +- **Auto-merging a stack** is a real, supported pattern via merge queues, + gated on required approvals + green CI. It is a deliberate follow-up for + ABCA, not MVP (#247 lists "auto-merge when all children complete" as + out of scope). + +## Q1/Q2 — Merge flow + retargeting (GitHub native) + +**GitHub automatically retargets dependent PRs when the merged branch is +deleted** (not from the merge itself): + +> "If you delete a branch that has open pull requests based on it, GitHub +> automatically updates any such pull requests, changing their base +> branch to the merged pull request's base branch." +> — GitHub Docs, *Deleting and restoring branches* +> https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/managing-branches-in-your-repository/deleting-and-restoring-branches-in-a-repository + +Practical consequence for a stack A←B←C: merge A to `main` and delete +`feat/A` → B's base auto-flips from `feat/A` to `main`, and B's diff stays +clean (it no longer double-counts A's commits, since A is now in main). +Repeat for B, then C. This is the **bottom-up, one-at-a-time** model. + +## Q3 — How downstream work proceeds during review (the key mechanic) + +Stacking decouples *building dependent work* from *merging*. From the +Pragmatic Engineer analysis of stacked diffs +(https://newsletter.pragmaticengineer.com/p/stacked-diffs): + +> stacks "can be built continuously, one on top of the other, allowing +> engineers to stay unblocked." + +And the unit of change becomes the individual commit/diff, each of which +"can be tested, reviewed, landed, and reverted individually." The +dependency is physical (git branch lineage), so a downstream change sees +upstream code the moment the branch exists — **no waiting for merge.** + +When an upstream PR changes after review, the stack must be **restacked** +(rebased): "later diffs cannot be landed to the main branch while they +don't contain changes from the updated Diff 1" → resolved via +`git rebase -i` up the stack (Pragmatic Engineer). Tools (ghstack, +Graphite) automate this restack. + +## Q4 — Tooling: ghstack (Meta's open-source tool) + +ghstack (https://github.com/ezyang/ghstack) — "Conveniently submit stacks +of diffs to GitHub as separate pull requests." +- Each commit on top of `main` becomes its own PR. +- Land with `ghstack land $PR_URL` — lands a ghstack'd PR (handles the + base rewriting so the rest of the stack stays correct). +- Stack another PR by `git commit` on top + re-run `ghstack`. +This is the closest reference for an **automated agent** opening stacked +PRs: one branch/PR per commit, tool owns the base-branch bookkeeping. + +## Q5 — Auto-merging a stack (GitHub merge queue) + +GitHub **merge queue** supports ordered, stack-like merging +(https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue): + +- Entry gate: "Once a pull request has passed all required branch + protection checks, a user with write access ... can add the pull + request to the queue." → **required status checks + approvals** gate it. +- Ordering: "merged in a first-in-first-out order where the required + checks are always satisfied." +- Stacking semantics: each queued PR's temp branch "contains code changes + from the target branch, pull request #1, and pull request #2" — i.e. + later entries build on earlier ones, exactly like a stack. +- Caveat: cannot be used with wildcard (`*`) branch protection patterns. + +So "auto-merge the stack once all green + approved" is a real pattern, but +it rides on branch-protection + merge-queue config — a per-repo/per-project +policy decision, hence a follow-up for ABCA rather than MVP. + +## On research papers + +Stacked diffs is an **industry practice**, not an academic topic — there +is no peer-reviewed literature on "stacked PRs" mechanics. The scholarly +grounding is for the *premise* (small, incremental changes review better), +not the stacking technique: +- Bacchelli & Bird, *Expectations, Outcomes, and Challenges of Modern Code + Review*, ICSE 2013 — foundational modern-code-review empirical study. +- Rigby & Bird, *Convergent Contemporary Software Peer Review Practices*, + FSE 2013 — documents small-incremental-change review. +Treat blogs/tool-docs (above) as authoritative for the *mechanics*; the +papers only justify *why small stacked PRs beat one large PR*. + +## Implications for #247 A4 / A5 + +- **A4 (base-branch targeting):** child B's branch must be cut from A's + branch and B's PR `base` set to `feat/A` (GitHub API `base` param on + `POST /repos/{owner}/{repo}/pulls`). Roots target `main`. This makes the + downstream-sees-upstream-code property hold without waiting on merges. +- **A5 (rollup + docs):** "orchestration complete" means *all child PRs + opened*, NOT merged. Document the **human bottom-up merge + delete-branch + (for auto-retarget)** flow. Auto-merge stays a follow-up (per-project + opt-in, gated on approvals+CI via merge queue). +- **ADR-001 ambiguity to resolve:** the ADR says both "PR N targets PR + N-1's branch" and "Final PR merges the full stack to main." Per GitHub's + actual behavior, the correct reading is **bottom-up sequential merges + with auto-retarget on branch delete**, not a single top-merge. Worth a + clarifying ADR-001 amendment. diff --git a/docs/src/content/docs/architecture/Security.md b/docs/src/content/docs/architecture/Security.md index b1efd930..3f532e0f 100644 --- a/docs/src/content/docs/architecture/Security.md +++ b/docs/src/content/docs/architecture/Security.md @@ -58,7 +58,7 @@ Input screening happens at two points in the pipeline, forming a defense-in-dept ### Submission-time screening - **Input validation** - Required fields, types, and size limits are enforced before any processing. Task descriptions are capped at 10,000 characters. -- **Bedrock Guardrails** - A `PROMPT_ATTACK` content filter at `MEDIUM` input strength screens task descriptions for prompt injection. +- **Bedrock Guardrails** - A `PROMPT_ATTACK` content filter at `MEDIUM` input strength screens task descriptions for prompt injection. `MEDIUM` is deliberate: `HIGH` (which also blocks LOW-confidence) false-positives on ordinary imperative task descriptions ("make no changes, just inspect…", "ignore the legacy config and migrate…"). A 2026-06 empirical pass against the live guardrail confirmed `MEDIUM` blocks the prompt-injection class (instructions to ignore/override/reveal the system prompt, exfiltrate credentials) while passing benign imperatives with no false positives. **Scope:** this filter catches *attacks on the model*, not *destructive-but-honest task requests* (e.g. "delete .github/workflows and force-push to main") — those are not prompt injection and are intentionally NOT this layer's job. They are caught downstream at the agent tool-use layer by the Cedar HITL gates (`force_push_main`, `write_git_internals`, `rm_rf_root`; see [CEDAR_HITL_GATES.md](/architecture/cedar-hitl-gates)). Input screening + Cedar tool gates are complementary layers, not redundant. - **Attachment screening** - All attachments (images, text files, URLs) pass through security screening before reaching the agent. Images (PNG and JPEG only) are validated via magic bytes and dimension checks, then screened through Bedrock Guardrails (image content blocks). Text files and PDFs are extracted and screened through Bedrock Guardrails text content screening. URL attachments undergo SSRF protection (DNS resolution pinning, private IP blocking, redirect validation) and content screening during hydration. See [ATTACHMENTS.md](/architecture/attachments) for the full screening pipeline. - **Fail-closed** - If the Bedrock API is unavailable, submissions are rejected (HTTP 503). Unscreened content never reaches the agent. diff --git a/docs/src/content/docs/decisions/Adr-001-stacked-pull-requests.md b/docs/src/content/docs/decisions/Adr-001-stacked-pull-requests.md index 77062c04..71b58375 100644 --- a/docs/src/content/docs/decisions/Adr-001-stacked-pull-requests.md +++ b/docs/src/content/docs/decisions/Adr-001-stacked-pull-requests.md @@ -42,15 +42,20 @@ This gives reviewers and agents immediate orientation. The "Next" section is opt - PR 1 targets `main` - PR N targets PR N-1's branch -- Final PR merges the full stack to `main` +- PRs merge **bottom-up, one at a time** — each to its current base — NOT by + merging the top PR and having the whole stack land at once. See §8 for the + merge sequence and GitHub's auto-retarget-on-delete behaviour. ``` main - └── feat/first-concern (PR 1) - └── feat/second-concern (PR 2) - └── feat/third-concern (PR 3 → merge to main) + └── feat/first-concern (PR 1, base: main) + └── feat/second-concern (PR 2, base: PR 1's branch) + └── feat/third-concern (PR 3, base: PR 2's branch) ``` +Merge order is PR 1 → PR 2 → PR 3, each landing on `main` after its +predecessor (§8), not a single "merge the tip" operation. + ### 3. Self-contained reviewability Each PR: @@ -95,8 +100,9 @@ When a lower PR changes after review feedback: ### 8. Merge semantics -The default topology is a **classic stack** — each PR targets its predecessor's branch. When an early PR merges to `main` before later PRs are reviewed: +The default topology is a **classic stack** — each PR targets its predecessor's branch. Merges proceed **bottom-up, one PR at a time**: there is no single operation that merges the tip and lands the whole stack. When an early PR merges to `main` before later PRs are reviewed: +0. **Deleting the merged branch is what triggers GitHub's auto-retarget.** When PR N's branch is deleted after merge, GitHub automatically retargets the PRs that pointed at it onto PR N's base (`main`). The merge *itself* does not retarget — the branch deletion does. If you keep the merged branch around, the child PRs keep showing the already-merged commits in their diff. Steps 1–3 are the manual fallback when auto-retarget doesn't apply (branch kept, base is a non-deleted intermediate, etc.). 1. **Retarget** all PRs that pointed at the merged branch to `main` (or to the next unmerged predecessor). Use `gh pr edit <N> --base main` or GitHub's "Retarget" button. 2. **Rebase** each retargeted PR onto its new base so the diff is clean — use `git rebase --skip` for commits whose content is already in main via the merged predecessor. 3. **Force-push with lease** (`--force-with-lease`) so the PR diff on GitHub shows only net-new changes, not already-merged content. @@ -108,6 +114,16 @@ After retargeting, the remaining PRs form a shorter stack rooted on `main`. This **When the stack diverges:** If review feedback on PR 2 invalidates assumptions in PRs 3+, prefer closing and re-opening the affected PRs over accumulating fixup commits that obscure intent. The parent issue remains the source of truth for what shipped and what remains. +### 9. Agent-orchestrated stacks (issue #247) + +§1–§8 describe a **human-authored** stack. ABCA's Linear orchestration (#247) builds the same topology **automatically** from a parent issue's sub-issue DAG, with three differences reviewers should know: + +- **Base branch is threaded, not retargeted by hand.** When the orchestrator releases a stacked child, it passes the predecessor's branch as the child's `base_branch` (persisted on the `TaskRecord`); the agent creates the child branch *from* that base and opens the PR against it. The classic stack of §2 is produced up front, so the §8 retarget dance is only needed if a human merges mid-run. A child is released only once all its predecessors have **succeeded** (task-complete), not merged. +- **Diamonds, not just linear stacks.** A sub-issue with multiple predecessors (fan-in) cannot target two bases. The orchestrator branches it off `main` and **merges each predecessor branch into the child's branch** before the agent starts, so the child sees all predecessors' code. Linear chains still use the single-predecessor base-targeting of §2. +- **Merge is still human + bottom-up.** The orchestrator opens the stack; it does **not** merge. A human merges bottom-up per §8, and GitHub's delete-triggers-retarget (§8.0) collapses the remaining children onto `main`. The parent epic carries a live status block + rollup (it is the §1 "position statement" / §6 source-of-truth, maintained by the platform). + +**Open follow-up (#305 / A6):** §5 rebase discipline and the diamond re-merge above are *initial-creation* only — if a predecessor branch is **edited after** a dependent child already merged it in, the child goes stale. Automatic re-stack / re-merge on predecessor change is tracked in #305 (A6) and is not yet wired. + ## Consequences - (+) Each PR stays in the "reviewable without fatigue" window (~15–40 min) diff --git a/docs/src/content/docs/decisions/Adr-017-linear-agent-session-interaction.md b/docs/src/content/docs/decisions/Adr-017-linear-agent-session-interaction.md new file mode 100644 index 00000000..6ddf361f --- /dev/null +++ b/docs/src/content/docs/decisions/Adr-017-linear-agent-session-interaction.md @@ -0,0 +1,205 @@ +--- +title: Adr 017 linear agent session interaction +--- + +# ADR-017: Linear agent-session as a future interaction channel + +**Status:** proposed +**Date:** 2026-06-17 + +## Context + +ABCA's Linear integration today triggers and reports work through a +**hand-rolled comment protocol** layered on Linear's generic Issue/Comment +webhooks: + +- **Trigger** — a string match on `@bgagent` in a `Comment` webhook body + (`parseCommentTrigger`), plus a label-add on an issue to seed a #247 + orchestration. +- **Acknowledgement** — emoji reactions managed by hand (👀 on receipt → + ✅/❌ on settle via `swapCommentReaction`/`swapIssueReaction`), threaded + replies (`replyToComment`), and a single maturing "epic panel" comment + edited in place (`upsertEpicPanel`). + +This protocol works and is now well-tested (see the #247 UX.1–23 series), +but the comment seam has been the single richest source of edge-case bugs: +reply `issueId` vs `parentId` rules, "parent comment must be top-level" +threading, webhook-redelivery reply spam, self-trigger loops from our own +`@bgagent` example text, and reaction/state flapping. Each was a +consequence of bolting an agent protocol onto a human-comment surface. + +Linear now ships a first-class **Agents API** (agent-session model): +delegate or @mention an installed agent app → a typed `AgentSessionEvent` +webhook (`created`/`prompted`) → the agent emits typed **activities** +(`thought` / `action` / `response` / `elicitation` / `error`) and Linear +derives a native session **state** (`pending`/`active`/`awaitingInput`/ +`error`/`complete`/`stale`) with a built-in "thinking"/activity UI. + +Two facts establish the starting point: + +1. **The auth migration is already done.** ABCA's OAuth flow + (`cli/src/linear-oauth.ts`) requests + `read write app:assignable app:mentionable` with `actor=app`. Verified + live on `backgroundagent-dev` (2026-06-17): both deployed workspace + tokens (`bgagent-linear-oauth-maguireb`, `…-demo-abca`) carry exactly + that scope. **bgagent is already installed as an app actor** — it is + assignable, mentionable, and delegatable today. No auth work is needed + to adopt agent sessions. +2. **Linear is an interaction layer, not compute.** Adopting agent sessions + changes *how we are triggered* and *how status is shown*. All compute + (clone, run the coding agent, build/test, open the PR) still runs on + ABCA's own AgentCore Runtime + ECS. The switch offloads nothing to + Linear and does not change the AWS architecture or cost model. + +## Decision + +**Adopt the Linear agent-session model as an ADDITIONAL, flag-gated +trigger/ack channel once Linear marks the Agents API GA — not now, and not +as a replacement for the comment path.** + +The orchestration **engine** is channel-agnostic by design (the #247 +trigger-agnostic seams): graph discovery, the reconciler, the epic +panel/rollup, base-branch stacking, and the cascade do not care how a task +was triggered. Agent sessions slot in as a new front end to that engine, +mapping cleanly onto what we already built: + +| ABCA today (hand-rolled) | Linear agent-session (native) | +|-------------------------------------|-----------------------------------| +| `@bgagent` string match in comment | `created` AgentSessionEvent (mention/delegate) | +| 👀 reaction "on it" | `thought` activity | +| 🤖 Starting / 🔗 PR opened | `action` activity (+ result) | +| ✅ Updated / completion | `response` activity | +| ❌ failure reply | `error` activity | +| "reply with guidance" retry (UX.9) | `elicitation` + `prompted` webhook + conversation history | +| panel header state (🔄/✅/⚠️) | session state (active/complete/error) | + +### Preview-API spike (2026-06-17, UX.24) + +A time-boxed, no-infra spike validated the API surface against the deployed +**app-actor** token (`bgagent`, workspace `maguireb`) — read-only schema +probes + mutation input validation, no migration code: + +- **API reachable by our token.** Introspection confirms `agentActivityCreate`, + `agentSessionCreateOnIssue`/`OnComment`/`Create`, `AgentSession` (fields incl. + `status`, `issue`, `comment`, `appUser`), and `AgentActivityType` = + `thought, action, response, elicitation, error, prompt` — exactly the docs. +- **Activity input shape verified callable.** `agentActivityCreate(input: + {agentSessionId, content: JSONObject, signal, ephemeral})` accepts our + `{type:'thought', body}` content — a call failed only on session-id lookup, + not schema/enablement, so the ack-emission half of the loop is proven. +- **BLOCKER (config, not code):** `agentSessionCreateOnIssue` returns + `"Agent sessions are not enabled for this application."` The bgagent OAuth + app has the scopes + `actor=app` but has **not been enabled as an agent** in + its Linear Application settings. Per docs, enabling = edit the app at + *Settings → API → Applications*, enable webhooks, and select the **"Agent + session events"** category. App-owner action; no waitlist mentioned. +- **The 10s-ack-vs-long-compute risk is therefore NOT yet proven end-to-end** — + it needs a real `agentSessionId`, which is gated on the enablement toggle + above. The pieces it depends on (immediate `thought` ack, then later + `action`/`response` activities) are individually confirmed callable; the + remaining unknown is purely whether Linear marks the session unresponsive if + our spawn exceeds 10s after the initial `thought` (docs say the `thought` + ack within 10s is sufficient, which our processor can emit synchronously + before the async spawn — same shape as today's 👀). + +Net (first pass): the spike de-risked reachability + the activity model and +pinpointed the single enablement step, without committing to migration. + +**Spike re-run (2026-06-17, after the app owner enabled "Agent session events") +— the core risk is RESOLVED end-to-end:** + +- `agentSessionCreateOnIssue` now succeeds → session `status: active`. +- **The 10s-vs-long-compute question is answered:** emit a `thought` at t+0 + (status `active`), then **wait 14s with no further activity** → session + **stays `active`** (not stale/unresponsive). The 10s rule governs only the + *initial* ack; once a `thought` lands, an arbitrarily long gap before the + next activity is fine. ABCA's webhook can emit the `thought` synchronously + (exactly like today's 👀) and let the >10s async spawn proceed — **no + architectural conflict.** +- **Full lifecycle derives correctly**, matching the mapping table below: + `thought`→active, `action`→active, `action`+result→active, + `response`→**complete**; on a second session `elicitation`→**awaitingInput**, + `error`→**error**. All five emittable types accepted; states auto-derive + from the last activity. (`AgentActivityContent` is a union — + `AgentActivityActionContent`/`…ElicitationContent`/`…ErrorContent`/etc. — so + each type persists as a distinct typed record.) + +Conclusion: the **trigger/ack half is fully validated** against the live +Preview API. The remaining gate for an actual additive channel is unchanged — +it's the per-issue-session vs. cross-issue-epic-rollup gap (engine stays ours) +plus the Preview→GA stability wait, NOT any technical blocker we found. The +spike issues were created + deleted; no migration code written. + +> **⚠️ The enablement toggle is NOT a side-effect-free no-op (2026-06-17).** +> Leaving "Agent session events" ON after the spike means **every `@bgagent` +> mention now also spawns a native agent session** that Linear expects answered +> via `agentActivityCreate` within 10s. Our deployed code answers on the +> **comment** path (👀 + reply) and emits no session activity, so the session +> gets zero activities, goes `stale`, and Linear surfaces a misleading +> **"bgagent did not respond"** banner — even though the comment reply posted +> fine (observed live on ABCA-310: reply at t+2s, session `stale`, activities +> `[]`). **Consequence for phasing:** adoption is *not* "additive alongside the +> comment path for free" — once the toggle is on, mentions route to sessions +> and the adapter MUST emit activities or every mention looks dead. So the +> toggle stays **OFF** until the flag-gated adapter (Phase 2 below) ships in the +> same change that flips it. Interim action after the spike: **turn the toggle +> off** (app owner, Settings → API → Applications). + +### Why a channel, not a rewrite + +- The win is **real but partial**: agent sessions retire the brittle + *trigger + per-comment ack* seam (the bug class above), but Linear agent + sessions are **per-issue delegations with no native cross-issue epic + rollup**. The #247 parent-epic panel, fan-out integration node, dependency + cascade, and base-branch stacking stay ABCA's responsibility either way — + so roughly half of the recent bug classes (panel settle, cross-issue + concurrency) are unaffected by the migration. +- The Agents API is a **Developer Preview** (confirmed against + `developers.linear.app`, 2026-06-17): "in active development… may change + before GA." Ripping out a working, now-hardened comment path to depend on + an unstable API is the wrong trade today. +- Treating it as an additive channel behind a flag (per ADR-006) lets us + reuse the channel-agnostic engine, run both paths side by side during + evaluation, and revert via the flag if the Preview API shifts. + +## Consequences + +- **Positive:** removes the highest-friction seam (string-match trigger + + hand-rolled threading/reactions); native progress UI; conversation-history + retry replaces our bespoke loop; no auth work (already app-actor). +- **Negative / risk:** Preview API churn; hard runtime constraints (webhook + receiver must return within ~5s; an activity or external URL must be + emitted within ~10s of `created` or the session is marked unresponsive) — + ABCA's task spawn is async and slower than 10s, so the `created` handler + must emit an immediate `thought` ack and hand off, exactly as the current + processor 👀s then spawns. +- **No-op surfaces:** the orchestration engine, panel/rollup renderer, + reconciler, cascade, and base-branch logic are untouched by this decision. + +## Phasing + +1. **Now (this ADR):** record the decision; auth verified; do not build. + Keep the hardened comment path as the sole Linear interaction channel. +2. **When Linear GAs the Agents API:** spike a flag-gated `agent-session` + trigger/ack adapter behind the existing channel-agnostic engine — + `created`→seed/iterate, activities↔our ack states — running in parallel + with the comment path on `backgroundagent-dev`. +3. **After evaluation:** if the native path is strictly better, default the + flag on and deprecate the `@bgagent` string-match trigger; keep the + panel/rollup engine. + +## Out of scope (this ADR) + +- Any implementation. This is a direction + go/no-go record only. +- Changes to the orchestration engine, OAuth/token storage (done, ADR-016 + governs pluggable identity), or the Slack/Jira channels. +- The Mode B planner (#299) — orthogonal. + +## References + +- `cli/src/linear-oauth.ts` — `actor=app`, `app:assignable`/`app:mentionable` +- `cdk/src/handlers/linear-webhook-processor.ts` — current comment trigger + acks +- ADR-006 (feature flags), ADR-015 (Jira integration), ADR-016 (pluggable identity and auth) +- Linear Agents API — `https://linear.app/developers/agents`, + `https://linear.app/developers/agent-interaction` (Developer Preview, fetched 2026-06-17) +- #247 UX.16–23 — the comment-path bug classes this would retire diff --git a/docs/src/content/docs/developer-guide/Repository-preparation.md b/docs/src/content/docs/developer-guide/Repository-preparation.md index 01a0e240..5531288b 100644 --- a/docs/src/content/docs/developer-guide/Repository-preparation.md +++ b/docs/src/content/docs/developer-guide/Repository-preparation.md @@ -53,12 +53,22 @@ new Blueprint(this, 'MyServiceBlueprint', { systemPromptOverrides: 'Extra instructions...', // appended to the platform prompt }, credentials: { githubTokenSecretArn: '...' }, // per-repo GitHub token secret - pipeline: { pollIntervalMs: 5000 }, // poll interval awaiting completion + pipeline: { + pollIntervalMs: 5000, // poll interval awaiting completion + buildCommand: 'npm run build && npm test', // build/test verification (default: mise run build) + lintCommand: 'npm run lint', // lint verification (default: mise run lint) + }, }); ``` If you use a custom `compute.runtimeArn` or `credentials.githubTokenSecretArn`, pass the ARNs to `TaskOrchestrator` via `additionalRuntimeArns` and `additionalSecretArns` so the Lambda has IAM permission. See [Repo onboarding](/architecture/repo-onboarding) for the full model. +#### Build-regression gating (important for non-mise repos) + +Before opening a PR, the agent runs a **build** and **lint** command in its cloud container — once on the clean clone (baseline) and again after its changes. If the build was green before and fails after, the task fails (a build-**regression** gate). This is a compile/test verification, **not** a deployment — your app's actual deploy stays in your own CI/CD after the PR merges. + +The command defaults to **`mise run build`** / **`mise run lint`**. A repo that uses [mise](https://mise.jdx.dev/) with `build` / `lint` tasks gets gating for free. A repo that uses npm, gradle, cargo, make, etc. **must set `pipeline.buildCommand`** (and optionally `lintCommand`) to its real command — otherwise the default `mise run build` finds no task, **build-regression gating is silently OFF, and a change that breaks the build still reports success**. When that happens the agent surfaces a `⚠️ Build-regression gating is OFF` warning on the PR so the gap is visible, but the fix is to configure the command. For #247 orchestration this matters doubly: dependent sub-issues stack onto a predecessor's branch, so an unverified broken predecessor propagates downstream. + Redeploy after changing Blueprints: `mise //cdk:deploy`. ### Customizing the agent image diff --git a/docs/src/content/docs/roadmap/Roadmap.md b/docs/src/content/docs/roadmap/Roadmap.md index 1d54f545..b20bad2c 100644 --- a/docs/src/content/docs/roadmap/Roadmap.md +++ b/docs/src/content/docs/roadmap/Roadmap.md @@ -89,6 +89,7 @@ What's shipped and what's coming next. - [x] **Slack integration** - @mention task submission, `bgagent slack link` / `setup`, file attachments on submit, threaded progress notifications. See [SLACK_SETUP_GUIDE.md](/using/slack-setup-guide) - [x] **Linear integration** - Label-triggered tasks, `bgagent linear setup` / `link`, progress comments on issues. See [LINEAR_SETUP_GUIDE.md](/using/linear-setup-guide) +- [x] **Linear parent/sub-issue orchestration** - Label a parent issue with sub-issues and a `blocked by` graph; ABCA builds a DAG (rejecting cycles), creates child tasks in dependency order, gates blocked children until predecessors succeed, stacks child PRs on their predecessors' branches, and posts an aggregate rollup comment on the parent. A scheduled backstop recovers children whose terminal events were lost. See [LINEAR_SETUP_GUIDE.md](/using/linear-setup-guide#parentsub-issue-orchestration) (#247) - [x] **Jira integration** - Label-triggered tasks on Jira Cloud, `bgagent jira setup` / `map` / `link`, progress comments via the Jira REST v3 API. See [JIRA_SETUP_GUIDE.md](/using/jira-setup-guide) and [ADR-015](/architecture/adr-015-jira-integration) ### Observability diff --git a/docs/src/content/docs/using/Linear-setup-guide.md b/docs/src/content/docs/using/Linear-setup-guide.md index aa39d07c..a2d433d4 100644 --- a/docs/src/content/docs/using/Linear-setup-guide.md +++ b/docs/src/content/docs/using/Linear-setup-guide.md @@ -69,6 +69,11 @@ bgagent linear webhook-info This prints the URL and values to paste into Linear. Open `https://linear.app/<slug>/settings/api/webhooks` and create the webhook with those values. +Under **Resource types**, enable both **Issues** and **Comments**: + +- **Issues** — label-triggered tasks and parent/sub-issue epic orchestration. +- **Comments** — the `@bgagent` re-iteration trigger: a reviewer comments `@bgagent <change>` on a sub-issue and ABCA updates that sub-issue's PR, then re-stacks its dependents. Without the Comments subscription this trigger silently never fires. + Then open the webhook detail page and copy the **signing secret** (`lin_wh_…`). ### 5. Tell ABCA the signing secret @@ -152,12 +157,61 @@ The fallback path keeps existing single-workspace deployments working without re **Trust model.** The `organizationId` in the body is attacker-controlled, but it only **selects** which secret to verify against; an attacker still needs the matching signing secret to forge a valid signature. Cross-workspace impersonation is prevented by the no-fallback-on-mismatch rule. +## Attachments and documents + +Beyond the issue title and description, Linear stores additional context the agent may need: + +- **Paperclip attachments** (PDFs, logs, spec files attached to an issue) +- **Project documents** (Linear's wiki-style docs attached to a project) +- **Comments posted after the task starts** (clarifications, approve / deny signals) + +ABCA does not pre-fetch this material into S3 or run it through Bedrock Guardrails — it stays in Linear, and the agent fetches it on demand at runtime via the Linear MCP. Concretely: + +- The webhook processor calls Linear's GraphQL API once per triggered issue to check for paperclip attachments and project documents. If anything is present it prepends a one-line hint (`Linear may have additional context for this issue: …`) to the task description, naming the relevant MCP tools. +- The agent's system prompt addendum tells it to call `mcp__linear-server__get_issue` for the full issue (including the `attachments` connection), `mcp__linear-server__get_attachment` per paperclip, `mcp__linear-server__list_documents` / `get_document` for project wikis, and `mcp__linear-server__list_comments` before opening the PR to pick up new comments. + +No additional setup is required — once Linear MCP is wired (steps above), this works automatically. Only embedded markdown images in the issue description (`![alt](https://…)`) are still pre-fetched and screened at task-creation time, because they enter the agent's context as URL attachments. + ## Usage - **Trigger a task**: apply the trigger label to an issue in a mapped Linear project. The issue title + description becomes the task description. - **Check status**: from the Linear issue (progress comments) or `bgagent list` / `bgagent status <task-id>`. - **Cancel**: `bgagent cancel <task-id>`. Removing the Linear label does not cancel a running task. +## Parent/sub-issue orchestration + +If you apply the trigger label to a **parent issue that has sub-issues**, ABCA orchestrates the whole epic instead of creating one task: + +1. **Discovery** — it reads the sub-issues and their `blocked by` / `blocking` relations, builds a dependency graph (DAG), and rejects cycles with a terminal comment on the parent. +2. **Dependency-ordered execution** — root sub-issues (no blockers) start immediately; a blocked sub-issue does not start until **all** its blockers reach terminal-success (a sub-issue that completes but fails its build does **not** release its dependents). Independent sub-issues run in parallel. +3. **Stacked PRs** — a sub-issue with a single predecessor branches from that predecessor's branch (so it sees its code before merge); a sub-issue with multiple predecessors branches from the default branch and merges all predecessor branches in. Review/merge the resulting stack bottom-up. +4. **Rollup** — when every sub-issue reaches a terminal state, ABCA posts an aggregate **rollup comment on the parent** (succeeded / failed / skipped counts + per-child status). Each sub-issue also gets its own final-status comment. +5. **Failure handling** — if a sub-issue fails (or is cancelled), its transitive dependents are **skipped** (never started); independent siblings still finish. The parent rollup reflects the partial outcome. + +Notes and current limitations: + +- The parent issue itself spawns **no task** — a human-authored sub-issue graph is treated as consent to execute. +- **No "cancel the whole epic" button yet.** Cancelling an individual sub-issue's task (`bgagent cancel <task-id>`) stops it and skips its dependents, but there is no single command to cancel a whole in-flight orchestration. Tracked as a follow-up. +- A scheduled backstop (every ~10 min) recovers sub-issues whose terminal events were lost during a transient outage, so a stalled orchestration self-heals rather than hanging. +- Multi-predecessor ("diamond") sub-issues merge their predecessors' branches at start time; if a predecessor is later edited in review, re-integration of the dependent is a tracked follow-up. + +## Parent/sub-issue orchestration + +If you apply the trigger label to a **parent issue that has sub-issues**, ABCA orchestrates the whole epic instead of creating one task: + +1. **Discovery** — it reads the sub-issues and their `blocked by` / `blocking` relations, builds a dependency graph (DAG), and rejects cycles with a terminal comment on the parent. +2. **Dependency-ordered execution** — root sub-issues (no blockers) start immediately; a blocked sub-issue does not start until **all** its blockers reach terminal-success (a sub-issue that completes but fails its build does **not** release its dependents). Independent sub-issues run in parallel. +3. **Stacked PRs** — a sub-issue with a single predecessor branches from that predecessor's branch (so it sees its code before merge); a sub-issue with multiple predecessors branches from the default branch and merges all predecessor branches in. Review/merge the resulting stack bottom-up. +4. **Rollup** — when every sub-issue reaches a terminal state, ABCA posts an aggregate **rollup comment on the parent** (succeeded / failed / skipped counts + per-child status). Each sub-issue also gets its own final-status comment. +5. **Failure handling** — if a sub-issue fails (or is cancelled), its transitive dependents are **skipped** (never started); independent siblings still finish. The parent rollup reflects the partial outcome. + +Notes and current limitations: + +- The parent issue itself spawns **no task** — a human-authored sub-issue graph is treated as consent to execute. +- **No "cancel the whole epic" button yet.** Cancelling an individual sub-issue's task (`bgagent cancel <task-id>`) stops it and skips its dependents, but there is no single command to cancel a whole in-flight orchestration. Tracked as a follow-up. +- A scheduled backstop (every ~10 min) recovers sub-issues whose terminal events were lost during a transient outage, so a stalled orchestration self-heals rather than hanging. +- Multi-predecessor ("diamond") sub-issues merge their predecessors' branches at start time; if a predecessor is later edited in review, re-integration of the dependent is a tracked follow-up. + ## Troubleshooting ### Webhook doesn't trigger a task @@ -185,11 +239,11 @@ If the failing event's `organizationId` doesn't match any registered workspace a ### "Invalid redirect_uri parameter for the application" during step 3 -Linear's misleading error for `actor=app` flows where the OAuth app config is incomplete. In your Linear app settings: +Linear's misleading error for `actor=app` flows where the OAuth app config is incomplete (it reports `Invalid redirect_uri` regardless of which required field is actually missing). In your Linear app settings, confirm: -- **GitHub username** must end with `[bot]` (e.g. `bgagent[bot]`) -- **Webhooks** toggle must be ON -- The Callback URL must be on a **single line** (line-wrapped URLs become two malformed entries Linear silently rejects) +- **GitHub username** is filled in (Linear's inline help describes the field and the `[bot]` suffix) — a blank value triggers this error. +- **Webhooks** toggle is ON. +- The Callback URL is on a **single line** (line-wrapped URLs become two malformed entries Linear silently rejects). Re-run `bgagent linear setup` after fixing. diff --git a/scripts/linear_epic.py b/scripts/linear_epic.py new file mode 100644 index 00000000..496967ca --- /dev/null +++ b/scripts/linear_epic.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +# +# MIT No Attribution — Copyright Amazon.com, Inc. or its affiliates. +# +# Linear epic harness for #247 orchestration stress testing (Mode A). +# +# Creates a parent "epic" issue plus a DAG of child sub-issues wired with +# "blocked by" relations, then (optionally) applies the trigger label to +# fire the orchestration. Also inspects + tears down test epics. Kept as a +# real .py file so the GraphQL payloads don't fight shell quoting. +# +# Auth: reads the Linear PAT from $LINEAR_PAT or /tmp/linear_pat (never +# echoed). All workspace ids are ABCA-demo defaults but overridable by flag. +# +# Usage: +# linear_epic.py create-epic --spec <spec.json> # build + wire a DAG (no trigger) +# linear_epic.py trigger --issue <uuid|identifier> # add trigger label → orchestrate +# linear_epic.py inspect --issue <uuid|identifier> # parent + children + deps + state +# linear_epic.py teardown --issue <uuid|identifier> # archive parent + all children +# +# A DAG spec is JSON: {"title": "...", "nodes": [{"key":"A","title":"...", +# "description":"...","depends_on":["B",...]}, ...]}. Node "key" is a local +# alias used only to express edges; real Linear ids are resolved after create. + +import argparse +import json +import os +import sys +import urllib.request +import urllib.error + +LINEAR_URL = "https://api.linear.app/graphql" +TEAM_ID = "8ab50246-938f-4b85-aff8-3df416787075" # ABCA +PROJECT_ID = "f369205b-2c33-4b1b-ac5f-52c640c3243e" # abca-demo → isadeks/vercel-abca-linear +TRIGGER_LABEL = "abca" + + +def pat(): + p = os.environ.get("LINEAR_PAT") + if not p: + try: + with open("/tmp/linear_pat") as f: + p = f.read().strip() + except OSError: + pass + if not p: + sys.exit("No Linear PAT in $LINEAR_PAT or /tmp/linear_pat") + return p + + +def gql(query, variables=None): + body = json.dumps({"query": query, "variables": variables or {}}).encode() + req = urllib.request.Request( + LINEAR_URL, data=body, + headers={"Authorization": pat(), "Content-Type": "application/json"}, + ) + try: + with urllib.request.urlopen(req, timeout=30) as r: + out = json.load(r) + except urllib.error.HTTPError as e: + sys.exit(f"HTTP {e.code}: {e.read().decode()[:400]}") + if "errors" in out: + sys.exit("GraphQL errors: " + json.dumps(out["errors"])[:600]) + return out["data"] + + +def label_id(name): + d = gql( + 'query($t:String!){ team(id:$t){ labels(first:50){ nodes{ id name } } } }', + {"t": TEAM_ID}, + ) + for n in d["team"]["labels"]["nodes"]: + if n["name"] == name: + return n["id"] + sys.exit(f"Label {name!r} not found on team") + + +def resolve_issue_id(ref): + """Accept a UUID or an identifier like ABCA-123 → return the UUID.""" + if "-" in ref and ref.split("-")[0].isalpha(): + d = gql('query($id:String!){ issue(id:$id){ id } }', {"id": ref}) + return d["issue"]["id"] + return ref + + +def create_issue(title, description, parent_id=None): + inp = { + "teamId": TEAM_ID, + "projectId": PROJECT_ID, + "title": title, + "description": description, + } + if parent_id: + inp["parentId"] = parent_id + d = gql( + 'mutation($i:IssueCreateInput!){ issueCreate(input:$i){ success issue{ id identifier } } }', + {"i": inp}, + ) + iss = d["issueCreate"]["issue"] + return iss["id"], iss["identifier"] + + +def create_blocks(blocker_id, blocked_id): + """blocker_id BLOCKS blocked_id → blocked_id depends_on blocker_id.""" + gql( + 'mutation($i:IssueRelationCreateInput!){ issueRelationCreate(input:$i){ success } }', + {"i": {"issueId": blocker_id, "relatedIssueId": blocked_id, "type": "blocks"}}, + ) + + +def add_label(issue_id, lbl_id): + gql( + 'mutation($id:String!,$l:[String!]){ issueUpdate(id:$id, input:{addedLabelIds:$l}){ success } }', + {"id": issue_id, "l": [lbl_id]}, + ) + + +def cmd_create_epic(args): + spec = json.load(open(args.spec)) + parent_id, parent_ident = create_issue( + spec["title"], spec.get("description", "Orchestration stress-test epic."), + ) + print(f"PARENT {parent_ident} {parent_id} {spec['title']}") + key_to_id = {} + for node in spec["nodes"]: + cid, cident = create_issue( + node["title"], node.get("description", ""), parent_id=parent_id, + ) + key_to_id[node["key"]] = cid + print(f" CHILD {cident} {cid} key={node['key']} {node['title']}") + # Wire edges: for child C depends_on P, P BLOCKS C. + for node in spec["nodes"]: + for dep in node.get("depends_on", []): + create_blocks(key_to_id[dep], key_to_id[node["key"]]) + print(f" EDGE {dep} blocks {node['key']}") + print(f"\nReady. Trigger with: scripts/linear_epic.py trigger --issue {parent_ident}") + print(json.dumps({"parent_id": parent_id, "parent_identifier": parent_ident, + "children": key_to_id})) + + +def cmd_trigger(args): + iid = resolve_issue_id(args.issue) + add_label(iid, label_id(TRIGGER_LABEL)) + print(f"Trigger label {TRIGGER_LABEL!r} applied to {args.issue} → orchestration firing.") + + +def cmd_inspect(args): + iid = resolve_issue_id(args.issue) + d = gql( + '''query($id:String!){ issue(id:$id){ identifier title + state{ name type } labels{ nodes{ name } } + children(first:50){ nodes{ identifier title state{ name type } + inverseRelations(first:20){ nodes{ type issue{ identifier } } } } } } }''', + {"id": iid}, + ) + i = d["issue"] + print(f"PARENT {i['identifier']} [{i['state']['name']}] {i['title']}") + print(f" labels: {[l['name'] for l in i['labels']['nodes']]}") + for c in i["children"]["nodes"]: + deps = [r["issue"]["identifier"] for r in c["inverseRelations"]["nodes"] + if r["type"] == "blocks"] + print(f" {c['identifier']:10} [{c['state']['name']:11}] blocked_by={deps} {c['title'][:46]}") + + +def cmd_teardown(args): + iid = resolve_issue_id(args.issue) + d = gql( + 'query($id:String!){ issue(id:$id){ identifier children(first:50){ nodes{ id identifier } } } }', + {"id": iid}, + ) + i = d["issue"] + for c in i["children"]["nodes"]: + gql('mutation($id:String!){ issueArchive(id:$id){ success } }', {"id": c["id"]}) + print(f" archived child {c['identifier']}") + gql('mutation($id:String!){ issueArchive(id:$id){ success } }', {"id": iid}) + print(f"archived parent {i['identifier']}") + + +def main(): + ap = argparse.ArgumentParser() + sub = ap.add_subparsers(dest="cmd", required=True) + p = sub.add_parser("create-epic"); p.add_argument("--spec", required=True); p.set_defaults(fn=cmd_create_epic) + p = sub.add_parser("trigger"); p.add_argument("--issue", required=True); p.set_defaults(fn=cmd_trigger) + p = sub.add_parser("inspect"); p.add_argument("--issue", required=True); p.set_defaults(fn=cmd_inspect) + p = sub.add_parser("teardown"); p.add_argument("--issue", required=True); p.set_defaults(fn=cmd_teardown) + args = ap.parse_args() + args.fn(args) + + +if __name__ == "__main__": + main() diff --git a/scripts/orchestration-debug.sh b/scripts/orchestration-debug.sh new file mode 100755 index 00000000..394418d0 --- /dev/null +++ b/scripts/orchestration-debug.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# +# MIT No Attribution — Copyright Amazon.com, Inc. or its affiliates. +# +# Orchestration debug helper for Linear parent/sub-issue orchestration +# (issue #247, Mode A). One command to see the full state of an +# orchestration run + the reconciler/processor logs — instead of +# hand-writing DynamoDB scans and `aws logs tail` each time. +# +# Usage: +# scripts/orchestration-debug.sh # list all orchestrations +# scripts/orchestration-debug.sh <orchestration_id> # full DAG state for one run +# scripts/orchestration-debug.sh logs [minutes] # tail processor + reconciler logs +# +# Env overrides (auto-discovered from the deployed stack if unset): +# STACK_NAME (default: backgroundagent-dev) +# AWS_REGION (default: us-east-1) +# +set -euo pipefail + +STACK_NAME="${STACK_NAME:-backgroundagent-dev}" +REGION="${AWS_REGION:-us-east-1}" +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PP="python3 ${HERE}/orchestration_debug.py" + +orch_table() { + aws dynamodb list-tables --region "$REGION" --output text --query 'TableNames' \ + | tr '\t' '\n' | grep -i "${STACK_NAME}-OrchestrationTable" | head -1 +} +processor_log() { + echo "/aws/lambda/$(aws lambda list-functions --region "$REGION" \ + --query "Functions[?contains(FunctionName,'WebhookProces')].FunctionName" \ + --output text | tr '\t' '\n' | head -1)" +} +reconciler_log() { + echo "/aws/lambda/$(aws lambda list-functions --region "$REGION" \ + --query "Functions[?contains(FunctionName,'OrchestrationReconciler')].FunctionName" \ + --output text | tr '\t' '\n' | head -1)" +} + +CMD="${1:-list}" + +if [[ "$CMD" == "logs" ]]; then + MINUTES="${2:-15}" + echo "═══ webhook processor (last ${MINUTES}m) ═══" + aws logs tail "$(processor_log)" --region "$REGION" --since "${MINUTES}m" --format short 2>&1 \ + | grep -iE 'orchestration|seeded|release|reconcil|non-success|response_body|rejected|cycle|error' \ + || echo " (no orchestration log lines)" + echo "" + echo "═══ reconciler (last ${MINUTES}m) ═══" + aws logs tail "$(reconciler_log)" --region "$REGION" --since "${MINUTES}m" --format short 2>&1 \ + | grep -iE 'orchestration|released|skip|complete|reconcil|non-success|response_body|error' \ + || echo " (no reconciler log lines — has it fired yet?)" + exit 0 +fi + +TABLE="$(orch_table)" +if [[ -z "$TABLE" ]]; then + echo "OrchestrationTable not found in stack $STACK_NAME ($REGION). Is it deployed?" >&2 + exit 1 +fi + +if [[ "$CMD" == "list" ]]; then + echo "═══ all orchestrations in $TABLE ═══" + aws dynamodb scan --table-name "$TABLE" --region "$REGION" \ + --filter-expression "sub_issue_id = :m" \ + --expression-attribute-values '{":m":{"S":"#meta"}}' \ + --output json 2>&1 | $PP list + exit 0 +fi + +echo "═══ orchestration $CMD ═══" +aws dynamodb query --table-name "$TABLE" --region "$REGION" \ + --key-condition-expression "orchestration_id = :o" \ + --expression-attribute-values "{\":o\":{\"S\":\"$CMD\"}}" \ + --output json 2>&1 | $PP rows diff --git a/scripts/orchestration_debug.py b/scripts/orchestration_debug.py new file mode 100644 index 00000000..aaec71eb --- /dev/null +++ b/scripts/orchestration_debug.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# +# MIT No Attribution — Copyright Amazon.com, Inc. or its affiliates. +# +# Pretty-printer for Linear orchestration state (issue #247, Mode A). +# Reads DynamoDB JSON from stdin. Modes: "list" (meta rows) or "rows" +# (one orchestration's full DAG). Kept as a real .py file (not an inline +# heredoc) so the f-strings don't fight shell quoting. + +import sys +import json + +STAT = { + "ready": "ready", + "blocked": "blocked", + "released": "released", + "succeeded": "succeeded", + "failed": "FAILED", + "skipped": "skipped", +} + + +def s(item, key, default=""): + return item.get(key, {}).get("S", default) + + +def main(): + mode = sys.argv[1] if len(sys.argv) > 1 else "rows" + data = json.load(sys.stdin) + items = data.get("Items", []) + + if mode == "list": + if not items: + print(" (none — no orchestration has been triggered yet)") + return + for m in items: + n = m.get("child_count", {}).get("N", "?") + print(f" {s(m, 'orchestration_id')} issue={s(m, 'parent_linear_issue_id')} repo={s(m, 'repo')} children={n}") + print("\nInspect one with: scripts/orchestration-debug.sh <orchestration_id>") + return + + # rows mode: meta first, then children sorted by identifier + if not items: + print(" (no rows for this orchestration_id)") + return + meta = [i for i in items if s(i, "sub_issue_id") == "#meta"] + kids = [i for i in items if s(i, "sub_issue_id") != "#meta"] + + for m in meta: + n = m.get("child_count", {}).get("N", "?") + # Print ONLY whether an OAuth secret is present, never its value — and + # test key PRESENCE (``in``) so the secret ARN string is never even read. + # NOTE: CodeQL's py/clear-text-logging-sensitive-data still flags the + # prints below because it taints the whole stdin-derived meta dict as + # sensitive and follows any ``s(m, …)`` read into a print — a false + # positive (this dev-only debug helper logs only ids + a yes/no flag). + has_oauth = "yes" if "linear_oauth_secret_arn" in m else "no" + print(f" PARENT issue={s(m, 'parent_linear_issue_id')} repo={s(m, 'repo')} children={n}") + print(f" release_ctx: user={s(m, 'platform_user_id')} oauth={has_oauth}") + + for k in sorted(kids, key=lambda i: s(i, "linear_identifier")): + st = s(k, "child_status") + deps = [x.get("S", "") for x in k.get("depends_on", {}).get("L", [])] + tid = s(k, "child_task_id") or "-" + label = s(k, "linear_identifier") or s(k, "sub_issue_id")[:8] + print(f" {label:10} {STAT.get(st, st):11} deps={deps or '[]'} task={tid}") + + +if __name__ == "__main__": + main()