protoLabsAI · mabry1985 · Jun 6, 2026 · Jun 6, 2026 · Jun 6, 2026 · Jun 6, 2026
diff --git a/.gitattributes b/.gitattributes
@@ -2,4 +2,8 @@
 # both sides add entries at the top. `merge=union` keeps BOTH sides instead of
 # raising a conflict — a fork's entries and upstream's coexist (re-order/dedupe
 # by hand at release time if needed). Confirmed pain across every downstream port.
+#
+# FORKS: switch this to `CHANGELOG.md merge=ours` (+ `git config merge.ours.driver
+# true` per clone) so an upstream sync keeps YOUR curated changelog instead of
+# splicing upstream's whole changelog back in. See docs/guides/upstream-sync.md.
 CHANGELOG.md merge=union
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
@@ -34,6 +34,35 @@ jobs:
           git clone --depth 1 \
             https://github.com/protoLabsAI/release-tools /tmp/release-tools
           node /tmp/release-tools/bin/verify-workspace-config.mjs --root "$GITHUB_WORKSPACE"
+      - name: Guard against the deleted server.py entrypoint
+        # ADR 0023 promoted the monolith to the server/ package (launch:
+        # `python -m server`). A deploy file launching the old single-file binary
+        # CrashLoopBackOffs. Fail CI if any deploy artifact still does. Scoped to
+        # deploy surfaces (excludes .github so this guard can't match itself).
+        run: |
+          if grep -rnE 'python[0-9]* +server\.py' \
+               --include='*.sh' --include='*.yaml' --include='*.yml' \
+               --include='Dockerfile*' --exclude-dir=.github . ; then
+            echo "::error::Stale single-file launch found — use 'python -m server' (ADR 0023)."
+            exit 1
+          fi
+          echo "ok: no stale single-file launches in deploy artifacts"
+
+  lint:
+    name: Lint (ruff)
+    runs-on: namespace-profile-protolabs-linux
+    timeout-minutes: 5
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Ruff check
+        # Pinned so a new ruff release can't add a rule that fails the gate
+        # out from under a PR. Config + ignores live in pyproject.toml.
+        run: |
+          pip install ruff==0.15.10
+          ruff check .
 
   tests:
     name: Python tests
@@ -58,6 +87,33 @@ jobs:
           . .venv/bin/activate
           python -m pytest tests/ -q
 
+  live-smoke:
+    name: A2A live smoke (lean tier)
+    runs-on: namespace-profile-protolabs-linux
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+          cache: pip
+      - name: Install lean (core) deps
+        # requirements-core.txt is the PRODUCTION image's dependency set (the
+        # `--ui none` tier). Installing it here — not the full requirements.txt —
+        # also guards the lean-image import gap class (e.g. the FastAPI-missing
+        # bug that only push-to-main caught, #426).
+        run: |
+          python -m venv .venv
+          . .venv/bin/activate
+          python -m pip install --upgrade pip
+          pip install -r requirements-core.txt
+      - name: Boot the real server vs a fake model + drive a real A2A turn
+        # Catches the green-but-wire-broken class unit/mock tests miss — CRLF SSE
+        # framing, A2A routing + version negotiation, agent-card build, lean boot.
+        run: |
+          . .venv/bin/activate
+          python scripts/live_smoke.py
+
   web-e2e:
     name: Web E2E smoke
     runs-on: namespace-profile-protolabs-linux

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -102,7 +102,15 @@ jobs:
           build-args: |
             VERSION=${{ steps.version.outputs.version }}
 
+      # Build-provenance attestation needs the GitHub attestations feature, which
+      # is unavailable on private repos without a paid plan — so a fork would get
+      # a noisy red error here for a step it can't use. Opt-in (fork-friendly):
+      # set the `ATTESTATIONS_ENABLED` repo variable to `true` to run it; forks
+      # leave it unset and the step is skipped. Enable without editing this file
+      # so upstream changes don't conflict on re-sync (same pattern as
+      # `RELEASE_ENABLED`). `continue-on-error` stays as a belt-and-suspenders.
       - name: Attest build provenance
+        if: vars.ATTESTATIONS_ENABLED == 'true'
         continue-on-error: true
         uses: actions/attest-build-provenance@v1
         with:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,41 @@
 ## [Unreleased]
 
+## [0.16.0] - 2026-06-06
+
+### Added
+- **Eval-case gating (`requires_env`)** — an eval case can now declare
+  `requires_env: [VAR, …]`; when any is unset the case is **skipped** (shown
+  `SKIP`, excluded from the pass/fail tally) instead of run, so a case needing an
+  optional integration doesn't break the default board. Uses it to ship a gated
+  `code_with_delegation` case (ADR 0024) that verifies end-to-end coding-agent
+  delegation over a live A2A turn — run it with `EVAL_CODING_AGENT=1` once a
+  coding agent is configured. See [Eval your fork](docs/guides/evals.md).
+- **Spawn CLI coding agents over ACP** — a new opt-in `coding_agent` plugin
+  (ADR 0024) adds a `code_with(agent, task)` tool that hands a real, repo-scoped
+  coding job to a purpose-built CLI coding agent (protoCLI `proto`, Claude Code,
+  Codex, Gemini CLI) and returns its result. protoAgent is the
+  [ACP](https://agentclientprotocol.com) *client* — it launches the agent as a
+  subprocess and drives one session over JSON-RPC 2.0 on its stdio
+  (`initialize` → `session/new` → `session/prompt`), accumulating the agent's
+  message as the answer. The ACP client is a port of ORBIS's canonical
+  implementation. Ships **disabled with no agents configured** — each agent gets
+  file + shell access in its (config-pinned, auto-allowed) workdir, so it's a
+  deliberate opt-in; enable with `plugins: { enabled: [coding_agent] }` and
+  declare agents under the `coding_agent` config section. One client (subprocess +
+  session) is cached per agent so follow-up calls continue the same thread.
+  Synchronous (final answer returned; `tool_call` titles logged).
+  See [the guide](docs/guides/coding-agents.md).
+- **Coding-agent permission controls** (ADR 0024) — each configured agent takes a
+  by-kind permission policy applied to the coding agent's `session/request_permission`
+  requests: `auto` (allow all, default), `allowlist` (allow all but
+  `execute`/`delete`), or `readonly` (read-like kinds only) — overridable with
+  `allow_kinds` / `deny_kinds`. Plus a per-call consent gate (`confirm: true`)
+  that asks the operator via `ask_human` before each `code_with` call. Ships
+  agent recipes for protoCLI, Claude Code, Codex, and Gemini CLI. (Per-action
+  live HITL is deferred — pausing a blocking subprocess session mid-turn is
+  incompatible with LangGraph's resume model; use `readonly`/`allowlist` for
+  deterministic per-action control.)
+
 ## [0.15.1] - 2026-06-05
 
 ### Fixed

diff --git a/Dockerfile b/Dockerfile
@@ -104,4 +104,11 @@ USER sandbox
 WORKDIR /sandbox
 
 EXPOSE 7870
+
+# Readiness/health: /healthz returns 200 only once the agent graph is compiled
+# (503 during the model-cold-start window). start-period covers the
+# frozen-sidecar / first-compile boot so a slow start isn't marked unhealthy.
+HEALTHCHECK --interval=30s --timeout=3s --start-period=60s --retries=3 \
+    CMD curl -fsS http://localhost:7870/healthz || exit 1
+
 CMD ["/opt/protoagent/entrypoint.sh"]
diff --git a/README.md b/README.md
@@ -43,8 +43,8 @@ rename / release-pipeline wiring.
 
 | Concern | Where it lives | What it does |
 |---|---|---|
-| A2A server | `a2a_handler.py` | JSON-RPC 2.0 over `/a2a`, SSE streaming, `tasks/*` lifecycle, push notifications, well-known agent card, dual token-shape parsing |
-| Agent runtime | `graph/agent.py`, `server.py` | LangGraph `create_agent()` wired to the A2A handler, with streaming token capture for cost-v1 |
+| A2A server | `server/a2a.py`, `a2a_executor.py` | JSON-RPC 2.0 over `/a2a`, SSE streaming, `tasks/*` lifecycle, push notifications, well-known agent card, dual token-shape parsing |
+| Agent runtime | `graph/agent.py`, `server/` | LangGraph `create_agent()` wired to the A2A handler, with streaming token capture for cost-v1 |
 | LLM gateway | `graph/llm.py` | OpenAI-compatible client pointed at LiteLLM — swap models by editing the gateway config, not the fork |
 | Subagents | `graph/subagents/config.py` | DeerFlow-pattern delegation via a `task()` tool; one worked example ships — a `researcher` (web + memory, plan→search→synthesize→cite) |
 | Starter tools | `tools/lg_tools.py`, `tools/github_tools.py` | Default-on set: 4 keyless general (`current_time`, `calculator` safe AST eval, `web_search` via DuckDuckGo, `fetch_url`) + 2 HITL (`ask_human`, `request_user_input`) + 4 GitHub read tools over the `gh` CLI + 4 notes + 5 memory + 3 scheduler + 4 beads + inbox/peer (conditional). Drop any via `tools.disabled`; add via a plugin. See [Starter tools](./docs/reference/starter-tools.md) |
@@ -88,7 +88,7 @@ own GHCR: [Customize & deploy](./docs/guides/customize-and-deploy.md).
 
 ```
 ┌──────────────┐     A2A JSON-RPC + SSE      ┌─────────────────┐
-│   Consumer   │ ──────────────────────────▶ │  a2a_handler    │
+│   Consumer   │ ──────────────────────────▶ │  A2A handler    │
 │  (any A2A    │                             │  (FastAPI)      │
 │   client)    │ ◀──── cost-v1 DataPart ─────│                 │
 └──────────────┘                             └────────┬────────┘
@@ -120,9 +120,9 @@ subagent `task()` delegation, and the structured-output protocol.
 | `a2a.trace` propagation | No (it's a protocol convention, not a card extension) | Yes — reads caller's Langfuse trace context from `params.metadata["a2a.trace"]` and nests this agent's trace under it |
 
 Declare additional extensions on the card in
-`server.py::_build_agent_card` when your agent's skills actually
-mutate shared state (see `effect-domain-v1` in the Workstacean
-docs for when this applies).
+`server/a2a.py::_build_agent_card_proto` when your agent's skills
+actually mutate shared state (see `effect-domain-v1` in the
+Workstacean docs for when this applies).
 
 ## Push notification support
 
@@ -142,7 +142,7 @@ The A2A handler supports both token shapes the spec permits:
 Both produce `Authorization: Bearer shared-secret` on outgoing
 webhooks. If your fork is getting 401s on callbacks, check which
 shape the consumer is sending before changing anything —
-`_extract_push_token` in `a2a_handler.py` reads both and the
+the dual-token parser in `a2a_auth.py` reads both and the
 test suite covers both.
 
 ## Observability
@@ -224,6 +224,6 @@ complete end-to-end example and cron setup.
 ## Contributing
 
 This is a template repo — bugs and improvements to the shared
-runtime (`a2a_handler.py`, `graph/agent.py`, extension support,
-release pipeline) land here. Domain-specific agent logic lives
-in the fork, not here.
+runtime (the `server/` package, `graph/agent.py`, extension
+support, release pipeline) land here. Domain-specific agent logic
+lives in the fork, not here.
diff --git a/TEMPLATE.md b/TEMPLATE.md
@@ -120,20 +120,29 @@ its own recursion budget.
 
 If your agent doesn't need the subagent pattern at all, delete
 the registry entry and call `create_agent_graph(config,
-include_subagents=False)` in `server.py`.
-
-## 6. Rewrite the agent card
-
-`server.py::_build_agent_card` has a placeholder card. Replace:
+include_subagents=False)` in `server/agent_init.py`.
+
+## 6. Declare the agent card
+
+Don't edit the card builder — its identity is config-driven
+(#570). Declare your `description` + `skills` in the `a2a:`
+section of `config/langgraph-config.yaml` (or contribute skills
+from a plugin via `register_a2a_skill`):
+
+```yaml
+a2a:
+  description: "What your agent does, in one line."
+  skills:
+    - id: my_skill        # what A2A callers dispatch to
+      name: My Skill
+      description: ...
+```
 
-- `name` and `description` with the agent's real surface
-- `skills` — each skill is what A2A callers dispatch to. IDs
-  should match what your tools can actually accomplish.
-- `capabilities.extensions` — declare any A2A extensions your
-  agent implements. `cost-v1` is declared by default because
-  the runtime emits it automatically; add `effect-domain-v1`
-  if your skills mutate shared state you want Workstacean's
-  planner to be aware of.
+- `name` already follows `identity.name` (the setup wizard).
+- `capabilities.extensions` — `cost-v1` is declared by default
+  (the runtime emits it automatically); add `effect-domain-v1`
+  in `server/a2a.py::_build_agent_card_proto` if your skills
+  mutate shared state Workstacean's planner should know about.
 
 ## 7. Set up the model
 
@@ -197,12 +206,12 @@ sqlite poller or a Workstacean adapter, selected at startup via env:
 
 ```bash
 # Default: local sqlite, persists at /sandbox/scheduler/<agent_name>/jobs.db
-python server.py
+python -m server
 
 # Workstacean: set both and restart
 export WORKSTACEAN_API_BASE=http://your-workstacean:3000
 export WORKSTACEAN_API_KEY=...
-python server.py
+python -m server
 ```
 
 Multi-fork safety: every job is namespaced by `AGENT_NAME`, so

diff --git a/a2a_auth.py b/a2a_auth.py
@@ -35,8 +35,18 @@
 # Allowed origins: None = verification disabled; list = allowlist.
 _ALLOWED_ORIGINS: list[list[str] | None] = [None]
 
-# Path prefix the guard applies to. The agent card + health are public.
-_GUARDED_PREFIX = "/a2a"
+# Path prefixes the guard applies to: the A2A JSON-RPC surface plus the operator
+# console + OpenAI-compat APIs (which drive subagents, rewrite config/SOUL,
+# schedule jobs, and run turns). The agent card, /healthz, /metrics, and the
+# static console assets live OUTSIDE these prefixes and stay public.
+_GUARDED_PREFIXES = ("/a2a", "/api/", "/v1/")
+
+# Exempt from the guard: the read-only Server-Sent-Events stream. Browsers'
+# EventSource cannot set an Authorization header, so a bearer can't be presented
+# here — and it only exposes activity/inbox events, not any action. The
+# agent-driving endpoints (/api/subagents/run, /api/config, /api/chat, /a2a, …)
+# stay guarded.
+_GUARD_EXEMPT = ("/api/events",)
 
 
 def set_bearer_token(token: str | None) -> None:
@@ -84,7 +94,10 @@ class A2AAuthMiddleware(BaseHTTPMiddleware):
     """Enforces bearer / X-API-Key / origin on the guarded A2A path."""
 
     async def dispatch(self, request: Request, call_next):
-        if not request.url.path.startswith(_GUARDED_PREFIX):
+        path = request.url.path
+        if not any(path.startswith(p) for p in _GUARDED_PREFIXES):
+            return await call_next(request)
+        if any(path.startswith(p) for p in _GUARD_EXEMPT):
             return await call_next(request)
 
         # X-API-Key (legacy) — enforced only when configured.

diff --git a/activity/store.py b/activity/store.py
@@ -56,6 +56,8 @@ def __init__(self, db_path: str) -> None:
 
     def _connect(self) -> sqlite3.Connection:
         db = sqlite3.connect(self.path)
+        db.execute("PRAGMA journal_mode=WAL")   # concurrent reads during writes
+        db.execute("PRAGMA busy_timeout=5000")  # wait (don't error) on lock contention
         db.row_factory = sqlite3.Row
         return db
 

diff --git a/apps/web/src/lib/api.ts b/apps/web/src/lib/api.ts
@@ -132,8 +132,27 @@ export function isDesktopWebview(): boolean {
   }
 }
 
+/** Operator bearer token, set in localStorage (`protoagent.authToken`). Sent on
+ * every fetch-based API + A2A call so a token-configured deployment's console
+ * authenticates against the server guard. Blank ⇒ no header — the default
+ * local/desktop case (no token) stays open. (The `/api/events` EventSource is
+ * exempt server-side since EventSource can't set headers.) */
+export function authToken(): string {
+  try {
+    return window.localStorage.getItem("protoagent.authToken") || "";
+  } catch {
+    return "";
+  }
+}
+
+function applyAuth(headers: Headers): Headers {
+  const t = authToken();
+  if (t) headers.set("Authorization", `Bearer ${t}`);
+  return headers;
+}
+
 async function request<T>(path: string, options: RequestOptions = {}): Promise<T> {
-  const headers = new Headers(options.headers);
+  const headers = applyAuth(new Headers(options.headers));
   let body: BodyInit | undefined;
   if (options.body !== undefined) {
     headers.set("Content-Type", "application/json");
@@ -578,7 +597,7 @@ export const api = {
       try {
         const res = await fetch(apiUrl("/api/chat"), {
           method: "POST",
-          headers: { "Content-Type": "application/json" },
+          headers: applyAuth(new Headers({ "Content-Type": "application/json" })),
           signal: handlers.signal,
           body: JSON.stringify({ message, session_id: sessionId }),
         });
@@ -608,7 +627,7 @@ export const api = {
     const rpcId = `web-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
     const response = await fetch(apiUrl("/a2a"), {
       method: "POST",
-      headers: { "Content-Type": "application/json", "A2A-Version": "1.0" },
+      headers: applyAuth(new Headers({ "Content-Type": "application/json", "A2A-Version": "1.0" })),
       signal: handlers.signal,
       // A2A 1.0 (a2a-sdk): the streaming RPC is `SendStreamingMessage` (0.3's
       // `message/stream` is gone → -32601 Method not found, the cause of a