Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
57edcb5
docs: tighten for server/ package + config-driven identity (post #569…
mabry1985 Jun 6, 2026
57c626e
fix(deploy): launch the server/ package, not the deleted server.py (+…
mabry1985 Jun 6, 2026
095da22
security: default-bind 127.0.0.1; containers expose 0.0.0.0 explicitl…
mabry1985 Jun 6, 2026
00cc56d
reliability: bound the LLM call (timeout + retries) + close the push …
mabry1985 Jun 6, 2026
fb7cf97
security: default-on SSRF denylist for fetch_url + re-check redirects…
mabry1985 Jun 6, 2026
0beba05
deploy: wire the existing /healthz into a HEALTHCHECK + k8s probes (#…
mabry1985 Jun 6, 2026
a1c1396
ci(release): make build-provenance attestation opt-in (fork-friendly)…
mabry1985 Jun 6, 2026
6794d11
ci: A2A live-smoke job — boot the real server vs a fake model, drive …
mabry1985 Jun 6, 2026
39f8407
ci: enforce ruff + clean dead imports/vars (lint gate) (#587)
mabry1985 Jun 6, 2026
a6cf03b
obs: audit log rotation + instance-scoping + tail reads; broaden reda…
mabry1985 Jun 6, 2026
e2cdcfe
data: WAL + busy_timeout on the telemetry/activity/inbox/beads stores…
mabry1985 Jun 6, 2026
b80798f
security: bearer-gate the operator/console + OpenAI-compat APIs (P0) …
mabry1985 Jun 6, 2026
1011a4c
obs: A2A turn-outcome metrics for /metrics alerting (#592)
mabry1985 Jun 6, 2026
d1c7605
docs: fork upstream-sync guide — merge-not-squash + CHANGELOG merge=o…
mabry1985 Jun 6, 2026
9cfbce3
feat(plugins): spawn CLI coding agents over ACP (code_with) — ADR 002…
mabry1985 Jun 6, 2026
048b9f3
feat(coding_agent): by-kind permission policy + per-call consent gate…
mabry1985 Jun 6, 2026
50ed584
feat(evals): requires_env skip + gated code_with delegation eval — AD…
mabry1985 Jun 6, 2026
bada5c2
fix(mcp): a tool error degrades to a recoverable result, not a failed…
mabry1985 Jun 6, 2026
da0740d
feat(a2a): opt-in deploy-time guard — refuse to start on a loopback c…
mabry1985 Jun 6, 2026
4afd97d
chore: release v0.16.0 (#601)
mabry1985 Jun 6, 2026
42a551e
Merge remote-tracking branch 'upstream/main' into chore/sync-upstream…
Jun 6, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,8 @@
# both sides add entries at the top. `merge=union` keeps BOTH sides instead of
# raising a conflict — a fork's entries and upstream's coexist (re-order/dedupe
# by hand at release time if needed). Confirmed pain across every downstream port.
#
# FORKS: switch this to `CHANGELOG.md merge=ours` (+ `git config merge.ours.driver
# true` per clone) so an upstream sync keeps YOUR curated changelog instead of
# splicing upstream's whole changelog back in. See docs/guides/upstream-sync.md.
CHANGELOG.md merge=union
56 changes: 56 additions & 0 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,35 @@ jobs:
git clone --depth 1 \
https://github.com/protoLabsAI/release-tools /tmp/release-tools
node /tmp/release-tools/bin/verify-workspace-config.mjs --root "$GITHUB_WORKSPACE"
- name: Guard against the deleted server.py entrypoint
# ADR 0023 promoted the monolith to the server/ package (launch:
# `python -m server`). A deploy file launching the old single-file binary
# CrashLoopBackOffs. Fail CI if any deploy artifact still does. Scoped to
# deploy surfaces (excludes .github so this guard can't match itself).
run: |
if grep -rnE 'python[0-9]* +server\.py' \
--include='*.sh' --include='*.yaml' --include='*.yml' \
--include='Dockerfile*' --exclude-dir=.github . ; then
echo "::error::Stale single-file launch found — use 'python -m server' (ADR 0023)."
exit 1
fi
echo "ok: no stale single-file launches in deploy artifacts"

lint:
name: Lint (ruff)
runs-on: namespace-profile-protolabs-linux
timeout-minutes: 5
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Ruff check
# Pinned so a new ruff release can't add a rule that fails the gate
# out from under a PR. Config + ignores live in pyproject.toml.
run: |
pip install ruff==0.15.10
ruff check .

tests:
name: Python tests
Expand All @@ -58,6 +87,33 @@ jobs:
. .venv/bin/activate
python -m pytest tests/ -q

live-smoke:
name: A2A live smoke (lean tier)
runs-on: namespace-profile-protolabs-linux
timeout-minutes: 10
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
- name: Install lean (core) deps
# requirements-core.txt is the PRODUCTION image's dependency set (the
# `--ui none` tier). Installing it here — not the full requirements.txt —
# also guards the lean-image import gap class (e.g. the FastAPI-missing
# bug that only push-to-main caught, #426).
run: |
python -m venv .venv
. .venv/bin/activate
python -m pip install --upgrade pip
pip install -r requirements-core.txt
- name: Boot the real server vs a fake model + drive a real A2A turn
# Catches the green-but-wire-broken class unit/mock tests miss — CRLF SSE
# framing, A2A routing + version negotiation, agent-card build, lean boot.
run: |
. .venv/bin/activate
python scripts/live_smoke.py

web-e2e:
name: Web E2E smoke
runs-on: namespace-profile-protolabs-linux
Expand Down
8 changes: 8 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,15 @@ jobs:
build-args: |
VERSION=${{ steps.version.outputs.version }}

# Build-provenance attestation needs the GitHub attestations feature, which
# is unavailable on private repos without a paid plan — so a fork would get
# a noisy red error here for a step it can't use. Opt-in (fork-friendly):
# set the `ATTESTATIONS_ENABLED` repo variable to `true` to run it; forks
# leave it unset and the step is skipped. Enable without editing this file
# so upstream changes don't conflict on re-sync (same pattern as
# `RELEASE_ENABLED`). `continue-on-error` stays as a belt-and-suspenders.
- name: Attest build provenance
if: vars.ATTESTATIONS_ENABLED == 'true'
continue-on-error: true
uses: actions/attest-build-provenance@v1
with:
Expand Down
36 changes: 36 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,41 @@
## [Unreleased]

## [0.16.0] - 2026-06-06

### Added
- **Eval-case gating (`requires_env`)** — an eval case can now declare
`requires_env: [VAR, …]`; when any is unset the case is **skipped** (shown
`SKIP`, excluded from the pass/fail tally) instead of run, so a case needing an
optional integration doesn't break the default board. Uses it to ship a gated
`code_with_delegation` case (ADR 0024) that verifies end-to-end coding-agent
delegation over a live A2A turn — run it with `EVAL_CODING_AGENT=1` once a
coding agent is configured. See [Eval your fork](docs/guides/evals.md).
- **Spawn CLI coding agents over ACP** — a new opt-in `coding_agent` plugin
(ADR 0024) adds a `code_with(agent, task)` tool that hands a real, repo-scoped
coding job to a purpose-built CLI coding agent (protoCLI `proto`, Claude Code,
Codex, Gemini CLI) and returns its result. protoAgent is the
[ACP](https://agentclientprotocol.com) *client* — it launches the agent as a
subprocess and drives one session over JSON-RPC 2.0 on its stdio
(`initialize` → `session/new` → `session/prompt`), accumulating the agent's
message as the answer. The ACP client is a port of ORBIS's canonical
implementation. Ships **disabled with no agents configured** — each agent gets
file + shell access in its (config-pinned, auto-allowed) workdir, so it's a
deliberate opt-in; enable with `plugins: { enabled: [coding_agent] }` and
declare agents under the `coding_agent` config section. One client (subprocess +
session) is cached per agent so follow-up calls continue the same thread.
Synchronous (final answer returned; `tool_call` titles logged).
See [the guide](docs/guides/coding-agents.md).
- **Coding-agent permission controls** (ADR 0024) — each configured agent takes a
by-kind permission policy applied to the coding agent's `session/request_permission`
requests: `auto` (allow all, default), `allowlist` (allow all but
`execute`/`delete`), or `readonly` (read-like kinds only) — overridable with
`allow_kinds` / `deny_kinds`. Plus a per-call consent gate (`confirm: true`)
that asks the operator via `ask_human` before each `code_with` call. Ships
agent recipes for protoCLI, Claude Code, Codex, and Gemini CLI. (Per-action
live HITL is deferred — pausing a blocking subprocess session mid-turn is
incompatible with LangGraph's resume model; use `readonly`/`allowlist` for
deterministic per-action control.)

## [0.15.1] - 2026-06-05

### Fixed
Expand Down
7 changes: 7 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,11 @@ USER sandbox
WORKDIR /sandbox

EXPOSE 7870

# Readiness/health: /healthz returns 200 only once the agent graph is compiled
# (503 during the model-cold-start window). start-period covers the
# frozen-sidecar / first-compile boot so a slow start isn't marked unhealthy.
HEALTHCHECK --interval=30s --timeout=3s --start-period=60s --retries=3 \
CMD curl -fsS http://localhost:7870/healthz || exit 1

CMD ["/opt/protoagent/entrypoint.sh"]
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ rename / release-pipeline wiring.

| Concern | Where it lives | What it does |
|---|---|---|
| A2A server | `a2a_handler.py` | JSON-RPC 2.0 over `/a2a`, SSE streaming, `tasks/*` lifecycle, push notifications, well-known agent card, dual token-shape parsing |
| Agent runtime | `graph/agent.py`, `server.py` | LangGraph `create_agent()` wired to the A2A handler, with streaming token capture for cost-v1 |
| A2A server | `server/a2a.py`, `a2a_executor.py` | JSON-RPC 2.0 over `/a2a`, SSE streaming, `tasks/*` lifecycle, push notifications, well-known agent card, dual token-shape parsing |
| Agent runtime | `graph/agent.py`, `server/` | LangGraph `create_agent()` wired to the A2A handler, with streaming token capture for cost-v1 |
| LLM gateway | `graph/llm.py` | OpenAI-compatible client pointed at LiteLLM — swap models by editing the gateway config, not the fork |
| Subagents | `graph/subagents/config.py` | DeerFlow-pattern delegation via a `task()` tool; one worked example ships — a `researcher` (web + memory, plan→search→synthesize→cite) |
| Starter tools | `tools/lg_tools.py`, `tools/github_tools.py` | Default-on set: 4 keyless general (`current_time`, `calculator` safe AST eval, `web_search` via DuckDuckGo, `fetch_url`) + 2 HITL (`ask_human`, `request_user_input`) + 4 GitHub read tools over the `gh` CLI + 4 notes + 5 memory + 3 scheduler + 4 beads + inbox/peer (conditional). Drop any via `tools.disabled`; add via a plugin. See [Starter tools](./docs/reference/starter-tools.md) |
Expand Down Expand Up @@ -88,7 +88,7 @@ own GHCR: [Customize & deploy](./docs/guides/customize-and-deploy.md).

```
┌──────────────┐ A2A JSON-RPC + SSE ┌─────────────────┐
│ Consumer │ ──────────────────────────▶ │ a2a_handler
│ Consumer │ ──────────────────────────▶ │ A2A handler
│ (any A2A │ │ (FastAPI) │
│ client) │ ◀──── cost-v1 DataPart ─────│ │
└──────────────┘ └────────┬────────┘
Expand Down Expand Up @@ -120,9 +120,9 @@ subagent `task()` delegation, and the structured-output protocol.
| `a2a.trace` propagation | No (it's a protocol convention, not a card extension) | Yes — reads caller's Langfuse trace context from `params.metadata["a2a.trace"]` and nests this agent's trace under it |

Declare additional extensions on the card in
`server.py::_build_agent_card` when your agent's skills actually
mutate shared state (see `effect-domain-v1` in the Workstacean
docs for when this applies).
`server/a2a.py::_build_agent_card_proto` when your agent's skills
actually mutate shared state (see `effect-domain-v1` in the
Workstacean docs for when this applies).

## Push notification support

Expand All @@ -142,7 +142,7 @@ The A2A handler supports both token shapes the spec permits:
Both produce `Authorization: Bearer shared-secret` on outgoing
webhooks. If your fork is getting 401s on callbacks, check which
shape the consumer is sending before changing anything —
`_extract_push_token` in `a2a_handler.py` reads both and the
the dual-token parser in `a2a_auth.py` reads both and the
test suite covers both.

## Observability
Expand Down Expand Up @@ -224,6 +224,6 @@ complete end-to-end example and cron setup.
## Contributing

This is a template repo — bugs and improvements to the shared
runtime (`a2a_handler.py`, `graph/agent.py`, extension support,
release pipeline) land here. Domain-specific agent logic lives
in the fork, not here.
runtime (the `server/` package, `graph/agent.py`, extension
support, release pipeline) land here. Domain-specific agent logic
lives in the fork, not here.
39 changes: 24 additions & 15 deletions TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,20 +120,29 @@ its own recursion budget.

If your agent doesn't need the subagent pattern at all, delete
the registry entry and call `create_agent_graph(config,
include_subagents=False)` in `server.py`.

## 6. Rewrite the agent card

`server.py::_build_agent_card` has a placeholder card. Replace:
include_subagents=False)` in `server/agent_init.py`.

## 6. Declare the agent card

Don't edit the card builder — its identity is config-driven
(#570). Declare your `description` + `skills` in the `a2a:`
section of `config/langgraph-config.yaml` (or contribute skills
from a plugin via `register_a2a_skill`):

```yaml
a2a:
description: "What your agent does, in one line."
skills:
- id: my_skill # what A2A callers dispatch to
name: My Skill
description: ...
```

- `name` and `description` with the agent's real surface
- `skills` — each skill is what A2A callers dispatch to. IDs
should match what your tools can actually accomplish.
- `capabilities.extensions` — declare any A2A extensions your
agent implements. `cost-v1` is declared by default because
the runtime emits it automatically; add `effect-domain-v1`
if your skills mutate shared state you want Workstacean's
planner to be aware of.
- `name` already follows `identity.name` (the setup wizard).
- `capabilities.extensions` — `cost-v1` is declared by default
(the runtime emits it automatically); add `effect-domain-v1`
in `server/a2a.py::_build_agent_card_proto` if your skills
mutate shared state Workstacean's planner should know about.

## 7. Set up the model

Expand Down Expand Up @@ -197,12 +206,12 @@ sqlite poller or a Workstacean adapter, selected at startup via env:

```bash
# Default: local sqlite, persists at /sandbox/scheduler/<agent_name>/jobs.db
python server.py
python -m server

# Workstacean: set both and restart
export WORKSTACEAN_API_BASE=http://your-workstacean:3000
export WORKSTACEAN_API_KEY=...
python server.py
python -m server
```

Multi-fork safety: every job is namespaced by `AGENT_NAME`, so
Expand Down
19 changes: 16 additions & 3 deletions a2a_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,18 @@
# Allowed origins: None = verification disabled; list = allowlist.
_ALLOWED_ORIGINS: list[list[str] | None] = [None]

# Path prefix the guard applies to. The agent card + health are public.
_GUARDED_PREFIX = "/a2a"
# Path prefixes the guard applies to: the A2A JSON-RPC surface plus the operator
# console + OpenAI-compat APIs (which drive subagents, rewrite config/SOUL,
# schedule jobs, and run turns). The agent card, /healthz, /metrics, and the
# static console assets live OUTSIDE these prefixes and stay public.
_GUARDED_PREFIXES = ("/a2a", "/api/", "/v1/")

# Exempt from the guard: the read-only Server-Sent-Events stream. Browsers'
# EventSource cannot set an Authorization header, so a bearer can't be presented
# here — and it only exposes activity/inbox events, not any action. The
# agent-driving endpoints (/api/subagents/run, /api/config, /api/chat, /a2a, …)
# stay guarded.
_GUARD_EXEMPT = ("/api/events",)


def set_bearer_token(token: str | None) -> None:
Expand Down Expand Up @@ -84,7 +94,10 @@ class A2AAuthMiddleware(BaseHTTPMiddleware):
"""Enforces bearer / X-API-Key / origin on the guarded A2A path."""

async def dispatch(self, request: Request, call_next):
if not request.url.path.startswith(_GUARDED_PREFIX):
path = request.url.path
if not any(path.startswith(p) for p in _GUARDED_PREFIXES):
return await call_next(request)
if any(path.startswith(p) for p in _GUARD_EXEMPT):
return await call_next(request)

# X-API-Key (legacy) — enforced only when configured.
Expand Down
2 changes: 2 additions & 0 deletions activity/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def __init__(self, db_path: str) -> None:

def _connect(self) -> sqlite3.Connection:
db = sqlite3.connect(self.path)
db.execute("PRAGMA journal_mode=WAL") # concurrent reads during writes
db.execute("PRAGMA busy_timeout=5000") # wait (don't error) on lock contention
db.row_factory = sqlite3.Row
return db

Expand Down
25 changes: 22 additions & 3 deletions apps/web/src/lib/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,27 @@ export function isDesktopWebview(): boolean {
}
}

/** Operator bearer token, set in localStorage (`protoagent.authToken`). Sent on
* every fetch-based API + A2A call so a token-configured deployment's console
* authenticates against the server guard. Blank ⇒ no header — the default
* local/desktop case (no token) stays open. (The `/api/events` EventSource is
* exempt server-side since EventSource can't set headers.) */
export function authToken(): string {
try {
return window.localStorage.getItem("protoagent.authToken") || "";
} catch {
return "";
}
}

function applyAuth(headers: Headers): Headers {
const t = authToken();
if (t) headers.set("Authorization", `Bearer ${t}`);
return headers;
}

async function request<T>(path: string, options: RequestOptions = {}): Promise<T> {
const headers = new Headers(options.headers);
const headers = applyAuth(new Headers(options.headers));
let body: BodyInit | undefined;
if (options.body !== undefined) {
headers.set("Content-Type", "application/json");
Expand Down Expand Up @@ -578,7 +597,7 @@ export const api = {
try {
const res = await fetch(apiUrl("/api/chat"), {
method: "POST",
headers: { "Content-Type": "application/json" },
headers: applyAuth(new Headers({ "Content-Type": "application/json" })),
signal: handlers.signal,
body: JSON.stringify({ message, session_id: sessionId }),
});
Expand Down Expand Up @@ -608,7 +627,7 @@ export const api = {
const rpcId = `web-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
const response = await fetch(apiUrl("/a2a"), {
method: "POST",
headers: { "Content-Type": "application/json", "A2A-Version": "1.0" },
headers: applyAuth(new Headers({ "Content-Type": "application/json", "A2A-Version": "1.0" })),
signal: handlers.signal,
// A2A 1.0 (a2a-sdk): the streaming RPC is `SendStreamingMessage` (0.3's
// `message/stream` is gone → -32601 Method not found, the cause of a
Expand Down
Loading
Loading