diff --git a/apps/api/package.json b/apps/api/package.json index 851fd35..d52e24b 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -14,6 +14,7 @@ }, "dependencies": { "@learnpro/llm": "workspace:*", + "@learnpro/sandbox": "workspace:*", "@learnpro/scoring": "workspace:*", "@learnpro/shared": "workspace:*", "fastify": "^5.2.0" diff --git a/apps/api/src/index.test.ts b/apps/api/src/index.test.ts index d4117fc..fd3b040 100644 --- a/apps/api/src/index.test.ts +++ b/apps/api/src/index.test.ts @@ -1,9 +1,34 @@ import { describe, it, expect } from "vitest"; +import type { SandboxProvider, SandboxRunRequest, SandboxRunResponse } from "@learnpro/sandbox"; import { buildServer } from "./index.js"; +class FakeSandbox implements SandboxProvider { + readonly name = "fake-sandbox"; + public lastReq: SandboxRunRequest | null = null; + + constructor( + private readonly response: + | SandboxRunResponse + | ((r: SandboxRunRequest) => SandboxRunResponse) = { + stdout: "hello\n", + stderr: "", + exit_code: 0, + duration_ms: 12, + killed_by: null, + language: "python", + runtime_version: "3.10.0", + }, + ) {} + + async run(req: SandboxRunRequest): Promise { + this.lastReq = req; + return typeof this.response === "function" ? this.response(req) : this.response; + } +} + describe("apps/api", () => { it("GET /health returns ok payload", async () => { - const app = buildServer(); + const app = buildServer({ sandbox: new FakeSandbox() }); const res = await app.inject({ method: "GET", url: "/health" }); expect(res.statusCode).toBe(200); const body = res.json() as { ok: boolean; service: string }; @@ -13,7 +38,7 @@ describe("apps/api", () => { }); it("GET /policies reports the wired policy implementations", async () => { - const app = buildServer(); + const app = buildServer({ sandbox: new FakeSandbox() }); const res = await app.inject({ method: "GET", url: "/policies" }); expect(res.statusCode).toBe(200); expect(res.json()).toEqual({ @@ -26,10 +51,45 @@ describe("apps/api", () => { }); it("GET /llm reports the wired provider name", async () => { - const app = buildServer(); + const app = buildServer({ sandbox: new FakeSandbox() }); const res = await app.inject({ method: "GET", url: "/llm" }); expect(res.statusCode).toBe(200); expect(res.json()).toEqual({ provider: "anthropic" }); await app.close(); }); + + it("GET /sandbox reports the wired sandbox provider name", async () => { + const app = buildServer({ sandbox: new FakeSandbox() }); + const res = await app.inject({ method: "GET", url: "/sandbox" }); + expect(res.statusCode).toBe(200); + expect(res.json()).toEqual({ provider: "fake-sandbox" }); + await app.close(); + }); + + it("POST /sandbox/run forwards a valid request and returns the run result", async () => { + const sandbox = new FakeSandbox(); + const app = buildServer({ sandbox }); + const res = await app.inject({ + method: "POST", + url: "/sandbox/run", + payload: { language: "python", code: "print('hello')" }, + }); + expect(res.statusCode).toBe(200); + const body = res.json() as SandboxRunResponse; + expect(body.stdout).toBe("hello\n"); + expect(body.exit_code).toBe(0); + expect(sandbox.lastReq?.language).toBe("python"); + await app.close(); + }); + + it("POST /sandbox/run rejects invalid input with 400", async () => { + const app = buildServer({ sandbox: new FakeSandbox() }); + const res = await app.inject({ + method: "POST", + url: "/sandbox/run", + payload: { language: "rust", code: "" }, + }); + expect(res.statusCode).toBe(400); + await app.close(); + }); }); diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts index ead3dbe..80bb1c4 100644 --- a/apps/api/src/index.ts +++ b/apps/api/src/index.ts @@ -11,6 +11,13 @@ import { loadLLMConfigFromEnv, type LLMProvider, } from "@learnpro/llm"; +import { + buildSandboxProvider, + loadSandboxConfigFromEnv, + SandboxRequestError, + SandboxRunRequestSchema, + type SandboxProvider, +} from "@learnpro/sandbox"; const PORT = Number(process.env["PORT"] ?? 4000); const HOST = process.env["HOST"] ?? "0.0.0.0"; @@ -18,6 +25,7 @@ const HOST = process.env["HOST"] ?? "0.0.0.0"; export interface BuildServerOptions { policies?: PolicyRegistry; llm?: LLMProvider; + sandbox?: SandboxProvider; } function defaultLLM(): LLMProvider { @@ -48,11 +56,17 @@ function defaultLLM(): LLMProvider { return buildLLMProvider({ config }); } +function defaultSandbox(): SandboxProvider { + const config = loadSandboxConfigFromEnv(process.env); + return buildSandboxProvider({ config }); +} + export function buildServer(opts: BuildServerOptions = {}) { const app = Fastify({ logger: true }); const policies = opts.policies ?? buildPolicyRegistry({ config: loadPolicyConfigFromEnv(process.env) }); const llm = opts.llm ?? defaultLLM(); + const sandbox = opts.sandbox ?? defaultSandbox(); app.get("/health", async () => healthPayload({ service: "api" })); @@ -67,6 +81,27 @@ export function buildServer(opts: BuildServerOptions = {}) { provider: llm.name, })); + app.get("/sandbox", async () => ({ + provider: sandbox.name, + })); + + app.post("/sandbox/run", async (req, reply) => { + const parsed = SandboxRunRequestSchema.safeParse(req.body); + if (!parsed.success) { + return reply.code(400).send({ error: "invalid_request", issues: parsed.error.issues }); + } + try { + const result = await sandbox.run(parsed.data); + return reply.code(200).send(result); + } catch (err) { + if (err instanceof SandboxRequestError) { + req.log.warn({ err }, "sandbox provider error"); + return reply.code(502).send({ error: "sandbox_unavailable", message: err.message }); + } + throw err; + } + }); + return app; } diff --git a/packages/sandbox/README.md b/packages/sandbox/README.md index 5321432..bd11ddf 100644 --- a/packages/sandbox/README.md +++ b/packages/sandbox/README.md @@ -1,5 +1,32 @@ # `@learnpro/sandbox` -`SandboxProvider` interface + Piston-on-Docker adapter. +`SandboxProvider` interface + Piston-on-Docker adapter, per [ADR-0002](../../docs/architecture/ADR-0002-sandbox.md). -**Status:** stub. Real Python runner lands in STORY-007, TS runner in STORY-008, hardening verified in STORY-010 per [ADR-0002](../../docs/architecture/ADR-0002-sandbox.md). +## What's here + +- `provider.ts` — single-method `SandboxProvider` interface (`run(req) → response`). +- `types.ts` — Zod schemas at the boundary: `SandboxRunRequestSchema`, `SandboxRunResponseSchema`, language and `killed_by` enums, telemetry event. +- `piston.ts` — `PistonSandboxProvider` (depends only on a `PistonTransport` interface — easy to fake in unit tests). +- `piston-http-transport.ts` — real `fetch`-based transport against a self-hosted Piston instance (default `http://localhost:2000`). +- `registry.ts` — `buildSandboxProvider()` factory + `loadSandboxConfigFromEnv()` (`PISTON_URL` → baseUrl override). +- `telemetry.ts` — null + in-memory `SandboxTelemetrySink` implementations. +- `errors.ts` — `SandboxRequestError`, `SandboxLanguageNotSupportedError`. + +## Languages (MVP) + +- `python` → Piston `python@3.10.0` +- `typescript` → Piston `typescript@5.0.3` (used by STORY-008) + +Override per-language versions through `SandboxConfig.languages`. + +## Tests + +- `piston.test.ts` — unit tests with `FakePistonTransport`. Cover happy path, stdin forwarding, language spec mapping, timeout / OOM / output-limit / signal classification, telemetry, and zod input validation. +- `registry.test.ts` — config defaults, `PISTON_URL` env handling, `LEARNPRO_SANDBOX_CONFIG` JSON parsing. +- `piston.integration.test.ts` — gated on `PISTON_URL`; runs `print('hello')` and a runaway loop against a real Piston (start it via `infra/docker/docker-compose.dev.yaml`). + +## What lives elsewhere + +- **TS runner specifics**: STORY-008. +- **Hardening verification (no-net, ro rootfs, cgroups, seccomp, non-root)**: STORY-010 — every bullet from the ADR-0002 hardening checklist gets an automated breakout test in `packages/sandbox/test/breakout/`. +- **API wiring**: `apps/api/src/index.ts` exposes `GET /sandbox` (provider name) and `POST /sandbox/run` (zod-validated body → run result). diff --git a/packages/sandbox/package.json b/packages/sandbox/package.json index 8a13040..d3fbc37 100644 --- a/packages/sandbox/package.json +++ b/packages/sandbox/package.json @@ -14,6 +14,9 @@ "typecheck": "tsc --noEmit", "test": "vitest run --passWithNoTests" }, + "dependencies": { + "zod": "^3.24.1" + }, "devDependencies": { "@types/node": "^22.10.2", "typescript": "^5.7.2", diff --git a/packages/sandbox/src/errors.ts b/packages/sandbox/src/errors.ts new file mode 100644 index 0000000..77d5bb1 --- /dev/null +++ b/packages/sandbox/src/errors.ts @@ -0,0 +1,18 @@ +export class SandboxRequestError extends Error { + readonly provider: string; + override readonly cause?: unknown; + + constructor(message: string, provider: string, cause?: unknown) { + super(message); + this.name = "SandboxRequestError"; + this.provider = provider; + if (cause !== undefined) this.cause = cause; + } +} + +export class SandboxLanguageNotSupportedError extends Error { + constructor(provider: string, language: string) { + super(`${provider} does not support language "${language}"`); + this.name = "SandboxLanguageNotSupportedError"; + } +} diff --git a/packages/sandbox/src/index.ts b/packages/sandbox/src/index.ts index 5d27d75..ff8b3c1 100644 --- a/packages/sandbox/src/index.ts +++ b/packages/sandbox/src/index.ts @@ -1,5 +1,44 @@ export const PACKAGE_NAME = "@learnpro/sandbox"; -export interface SandboxProvider { - readonly name: string; -} +export type { SandboxProvider } from "./provider.js"; + +export { + DEFAULT_PISTON_LANGUAGES, + PistonSandboxProvider, + type PistonExecuteParams, + type PistonExecuteResponse, + type PistonLanguageSpec, + type PistonSandboxProviderOptions, + type PistonTransport, +} from "./piston.js"; + +export { PistonHttpTransport, type PistonHttpTransportOptions } from "./piston-http-transport.js"; + +export { + buildSandboxProvider, + loadSandboxConfigFromEnv, + SandboxConfigSchema, + type BuildSandboxOptions, + type SandboxConfig, +} from "./registry.js"; + +export { InMemorySandboxTelemetrySink, NullSandboxTelemetrySink } from "./telemetry.js"; + +export { SandboxLanguageNotSupportedError, SandboxRequestError } from "./errors.js"; + +export { + DEFAULT_MEMORY_LIMIT_MB, + DEFAULT_OUTPUT_LIMIT_BYTES, + DEFAULT_TIME_LIMIT_MS, + SandboxKilledBySchema, + SandboxLanguageSchema, + SandboxRunRequestSchema, + SandboxRunResponseSchema, + SandboxTelemetryEventSchema, + type SandboxKilledBy, + type SandboxLanguage, + type SandboxRunRequest, + type SandboxRunResponse, + type SandboxTelemetryEvent, + type SandboxTelemetrySink, +} from "./types.js"; diff --git a/packages/sandbox/src/piston-http-transport.ts b/packages/sandbox/src/piston-http-transport.ts new file mode 100644 index 0000000..3957871 --- /dev/null +++ b/packages/sandbox/src/piston-http-transport.ts @@ -0,0 +1,39 @@ +import type { PistonExecuteParams, PistonExecuteResponse, PistonTransport } from "./piston.js"; + +export interface PistonHttpTransportOptions { + baseUrl: string; + fetchImpl?: typeof fetch; + timeoutMs?: number; +} + +export class PistonHttpTransport implements PistonTransport { + private readonly baseUrl: string; + private readonly fetchImpl: typeof fetch; + private readonly timeoutMs: number; + + constructor(opts: PistonHttpTransportOptions) { + this.baseUrl = opts.baseUrl.replace(/\/+$/, ""); + this.fetchImpl = opts.fetchImpl ?? fetch; + this.timeoutMs = opts.timeoutMs ?? 30_000; + } + + async execute(params: PistonExecuteParams): Promise { + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), this.timeoutMs); + try { + const res = await this.fetchImpl(`${this.baseUrl}/api/v2/execute`, { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(params), + signal: controller.signal, + }); + if (!res.ok) { + const body = await res.text().catch(() => ""); + throw new Error(`Piston HTTP ${res.status}: ${body || res.statusText}`); + } + return (await res.json()) as PistonExecuteResponse; + } finally { + clearTimeout(timer); + } + } +} diff --git a/packages/sandbox/src/piston.integration.test.ts b/packages/sandbox/src/piston.integration.test.ts new file mode 100644 index 0000000..9af9ce1 --- /dev/null +++ b/packages/sandbox/src/piston.integration.test.ts @@ -0,0 +1,38 @@ +import { describe, expect, it } from "vitest"; +import { PistonSandboxProvider } from "./piston.js"; +import { PistonHttpTransport } from "./piston-http-transport.js"; + +const baseUrl = process.env["PISTON_URL"]; +const describeIfPiston = baseUrl ? describe : describe.skip; + +describeIfPiston("PistonSandboxProvider (integration — requires PISTON_URL)", () => { + it("runs print('hello') and returns the expected stdout", async () => { + const provider = new PistonSandboxProvider({ + transport: new PistonHttpTransport({ baseUrl: baseUrl! }), + }); + const res = await provider.run({ + language: "python", + code: "print('hello')", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }); + expect(res.stdout.trim()).toBe("hello"); + expect(res.exit_code).toBe(0); + expect(res.killed_by).toBeNull(); + }, 30_000); + + it("kills runaway code at the wall-clock timeout", async () => { + const provider = new PistonSandboxProvider({ + transport: new PistonHttpTransport({ baseUrl: baseUrl! }), + }); + const res = await provider.run({ + language: "python", + code: "while True: pass", + time_limit_ms: 1_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }); + expect(res.killed_by).toBe("timeout"); + }, 30_000); +}); diff --git a/packages/sandbox/src/piston.test.ts b/packages/sandbox/src/piston.test.ts new file mode 100644 index 0000000..7e9de14 --- /dev/null +++ b/packages/sandbox/src/piston.test.ts @@ -0,0 +1,247 @@ +import { describe, expect, it } from "vitest"; +import { + DEFAULT_PISTON_LANGUAGES, + PistonSandboxProvider, + type PistonExecuteParams, + type PistonExecuteResponse, + type PistonTransport, +} from "./piston.js"; +import { InMemorySandboxTelemetrySink } from "./telemetry.js"; +import { SandboxRequestError } from "./errors.js"; + +class FakePistonTransport implements PistonTransport { + public lastParams: PistonExecuteParams | null = null; + public calls = 0; + + constructor( + private readonly response: + | PistonExecuteResponse + | ((p: PistonExecuteParams) => PistonExecuteResponse) + | Error, + ) {} + + async execute(params: PistonExecuteParams): Promise { + this.lastParams = params; + this.calls++; + if (this.response instanceof Error) throw this.response; + return typeof this.response === "function" ? this.response(params) : this.response; + } +} + +function ok( + over: Partial = {}, + version = "3.10.0", +): PistonExecuteResponse { + return { + language: "python", + version, + run: { stdout: "", stderr: "", code: 0, signal: null, ...over }, + }; +} + +describe("PistonSandboxProvider.run — happy path", () => { + it("runs python and returns stdout", async () => { + const transport = new FakePistonTransport(ok({ stdout: "hello\n" })); + const provider = new PistonSandboxProvider({ transport }); + const res = await provider.run({ + language: "python", + code: "print('hello')", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }); + expect(res.stdout).toBe("hello\n"); + expect(res.exit_code).toBe(0); + expect(res.killed_by).toBeNull(); + expect(res.language).toBe("python"); + expect(res.runtime_version).toBe("3.10.0"); + }); + + it("forwards stdin to Piston when provided", async () => { + const transport = new FakePistonTransport(ok({ stdout: "got: hi" })); + const provider = new PistonSandboxProvider({ transport }); + await provider.run({ + language: "python", + code: "import sys; print('got:', sys.stdin.read())", + stdin: "hi", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }); + expect(transport.lastParams?.stdin).toBe("hi"); + }); + + it("maps the python language spec correctly", async () => { + const transport = new FakePistonTransport(ok()); + const provider = new PistonSandboxProvider({ transport }); + await provider.run({ + language: "python", + code: "x = 1", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }); + expect(transport.lastParams?.language).toBe(DEFAULT_PISTON_LANGUAGES.python.pistonLanguage); + expect(transport.lastParams?.version).toBe(DEFAULT_PISTON_LANGUAGES.python.pistonVersion); + expect(transport.lastParams?.files[0]?.name).toBe(DEFAULT_PISTON_LANGUAGES.python.filename); + }); + + it("converts memory_limit_mb to bytes when calling Piston", async () => { + const transport = new FakePistonTransport(ok()); + const provider = new PistonSandboxProvider({ transport }); + await provider.run({ + language: "python", + code: "x = 1", + time_limit_ms: 5_000, + memory_limit_mb: 256, + output_limit_bytes: 64 * 1024, + }); + expect(transport.lastParams?.run_memory_limit).toBe(256 * 1024 * 1024); + expect(transport.lastParams?.run_timeout).toBe(5_000); + }); +}); + +describe("PistonSandboxProvider.run — failure classification", () => { + it("classifies wall-clock timeout as killed_by=timeout", async () => { + const transport = new FakePistonTransport( + ok({ stdout: "", stderr: "", code: null, signal: "SIGKILL", message: "Run timed out" }), + ); + const provider = new PistonSandboxProvider({ transport }); + const res = await provider.run({ + language: "python", + code: "while True: pass", + time_limit_ms: 1_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }); + expect(res.killed_by).toBe("timeout"); + expect(res.exit_code).toBeNull(); + }); + + it("classifies OOM as killed_by=memory", async () => { + const transport = new FakePistonTransport( + ok({ stdout: "", stderr: "MemoryError", code: null, signal: "SIGKILL", message: "OOM" }), + ); + const provider = new PistonSandboxProvider({ transport }); + const res = await provider.run({ + language: "python", + code: "bytearray(1<<30)", + time_limit_ms: 5_000, + memory_limit_mb: 16, + output_limit_bytes: 64 * 1024, + }); + expect(res.killed_by).toBe("memory"); + }); + + it("truncates stdout exceeding output_limit_bytes and reports killed_by=output-limit", async () => { + const huge = "x".repeat(200_000); + const transport = new FakePistonTransport(ok({ stdout: huge })); + const provider = new PistonSandboxProvider({ transport }); + const res = await provider.run({ + language: "python", + code: "print('x' * 10**8)", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 1_024, + }); + expect(res.killed_by).toBe("output-limit"); + expect(res.stdout.length).toBeLessThanOrEqual(1_024); + expect(res.stdout.endsWith("[truncated]")).toBe(true); + }); + + it("reports killed_by=signal when Piston returns a non-OOM/non-timeout signal", async () => { + const transport = new FakePistonTransport(ok({ code: null, signal: "SIGSEGV" })); + const provider = new PistonSandboxProvider({ transport }); + const res = await provider.run({ + language: "python", + code: "import ctypes; ctypes.string_at(0)", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }); + expect(res.killed_by).toBe("signal"); + }); + + it("returns killed_by=null when the program exits cleanly with non-zero exit code", async () => { + const transport = new FakePistonTransport(ok({ stderr: "boom", code: 1 })); + const provider = new PistonSandboxProvider({ transport }); + const res = await provider.run({ + language: "python", + code: "import sys; sys.exit(1)", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }); + expect(res.exit_code).toBe(1); + expect(res.killed_by).toBeNull(); + }); +}); + +describe("PistonSandboxProvider.run — errors", () => { + it("wraps transport failures as SandboxRequestError", async () => { + const transport = new FakePistonTransport(new Error("connect ECONNREFUSED")); + const provider = new PistonSandboxProvider({ transport }); + await expect( + provider.run({ + language: "python", + code: "print(1)", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }), + ).rejects.toBeInstanceOf(SandboxRequestError); + }); + + it("rejects empty code via zod boundary", async () => { + const transport = new FakePistonTransport(ok()); + const provider = new PistonSandboxProvider({ transport }); + await expect( + provider.run({ + language: "python", + code: "", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }), + ).rejects.toThrow(); + }); +}); + +describe("PistonSandboxProvider — telemetry", () => { + it("emits a sandbox telemetry event on success", async () => { + const sink = new InMemorySandboxTelemetrySink(); + const transport = new FakePistonTransport(ok({ stdout: "hello\n" })); + const provider = new PistonSandboxProvider({ transport, telemetry: sink }); + await provider.run({ + language: "python", + code: "print('hello')", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }); + expect(sink.events).toHaveLength(1); + const ev = sink.events[0]!; + expect(ev.provider).toBe("piston"); + expect(ev.language).toBe("python"); + expect(ev.ok).toBe(true); + expect(ev.killed_by).toBeNull(); + expect(ev.stdout_bytes).toBe("hello\n".length); + }); + + it("emits a telemetry event with ok=false on transport failure", async () => { + const sink = new InMemorySandboxTelemetrySink(); + const transport = new FakePistonTransport(new Error("boom")); + const provider = new PistonSandboxProvider({ transport, telemetry: sink }); + await expect( + provider.run({ + language: "python", + code: "print(1)", + time_limit_ms: 5_000, + memory_limit_mb: 128, + output_limit_bytes: 64 * 1024, + }), + ).rejects.toBeInstanceOf(SandboxRequestError); + expect(sink.events).toHaveLength(1); + expect(sink.events[0]?.ok).toBe(false); + }); +}); diff --git a/packages/sandbox/src/piston.ts b/packages/sandbox/src/piston.ts new file mode 100644 index 0000000..39cd996 --- /dev/null +++ b/packages/sandbox/src/piston.ts @@ -0,0 +1,214 @@ +import { SandboxLanguageNotSupportedError, SandboxRequestError } from "./errors.js"; +import type { SandboxProvider } from "./provider.js"; +import { NullSandboxTelemetrySink } from "./telemetry.js"; +import { + SandboxRunRequestSchema, + type SandboxKilledBy, + type SandboxLanguage, + type SandboxRunRequest, + type SandboxRunResponse, + type SandboxTelemetrySink, +} from "./types.js"; + +export interface PistonExecuteParams { + language: string; + version: string; + files: Array<{ name: string; content: string }>; + stdin?: string; + run_timeout?: number; + run_memory_limit?: number; +} + +export interface PistonExecuteResponse { + language: string; + version: string; + run: { + stdout: string; + stderr: string; + output?: string; + code: number | null; + signal: string | null; + message?: string; + }; + compile?: { + stdout: string; + stderr: string; + output?: string; + code: number | null; + signal: string | null; + message?: string; + }; +} + +export interface PistonTransport { + execute(params: PistonExecuteParams): Promise; +} + +export interface PistonLanguageSpec { + pistonLanguage: string; + pistonVersion: string; + filename: string; +} + +export const DEFAULT_PISTON_LANGUAGES: Record = { + python: { pistonLanguage: "python", pistonVersion: "3.10.0", filename: "main.py" }, + typescript: { pistonLanguage: "typescript", pistonVersion: "5.0.3", filename: "main.ts" }, +}; + +export interface PistonSandboxProviderOptions { + transport: PistonTransport; + languages?: Partial>; + telemetry?: SandboxTelemetrySink; + now?: () => number; +} + +export class PistonSandboxProvider implements SandboxProvider { + readonly name = "piston"; + + private readonly transport: PistonTransport; + private readonly languages: Record; + private readonly telemetry: SandboxTelemetrySink; + private readonly now: () => number; + + constructor(opts: PistonSandboxProviderOptions) { + this.transport = opts.transport; + this.languages = { ...DEFAULT_PISTON_LANGUAGES, ...(opts.languages ?? {}) }; + this.telemetry = opts.telemetry ?? new NullSandboxTelemetrySink(); + this.now = opts.now ?? (() => Date.now()); + } + + async run(rawReq: SandboxRunRequest): Promise { + const req = SandboxRunRequestSchema.parse(rawReq); + const spec = this.languages[req.language]; + if (!spec) { + throw new SandboxLanguageNotSupportedError(this.name, req.language); + } + const start = this.now(); + const params: PistonExecuteParams = { + language: spec.pistonLanguage, + version: spec.pistonVersion, + files: [{ name: spec.filename, content: req.code }], + run_timeout: req.time_limit_ms, + run_memory_limit: req.memory_limit_mb * 1024 * 1024, + }; + if (req.stdin !== undefined) params.stdin = req.stdin; + let res: PistonExecuteResponse; + try { + res = await this.transport.execute(params); + } catch (err) { + this.recordTelemetry({ + language: req.language, + start, + ok: false, + stdout: "", + stderr: "", + exit_code: null, + killed_by: null, + }); + throw new SandboxRequestError("Piston execute failed", this.name, err); + } + + const stdoutRaw = res.run.stdout ?? ""; + const stderrRaw = res.run.stderr ?? ""; + const limit = req.output_limit_bytes; + const stdoutTruncated = truncateBytes(stdoutRaw, limit); + const stderrTruncated = truncateBytes(stderrRaw, limit); + const totalRawBytes = byteLength(stdoutRaw) + byteLength(stderrRaw); + + const killed_by = classifyKilledBy({ + pistonSignal: res.run.signal, + pistonMessage: res.run.message, + pistonCode: res.run.code, + truncated: stdoutTruncated.truncated || stderrTruncated.truncated, + requestedTimeoutMs: req.time_limit_ms, + durationMs: this.now() - start, + totalRawBytes, + outputLimit: limit, + }); + + const out: SandboxRunResponse = { + stdout: stdoutTruncated.value, + stderr: stderrTruncated.value, + exit_code: res.run.code, + duration_ms: Math.max(0, this.now() - start), + killed_by, + language: req.language, + runtime_version: res.version, + }; + this.recordTelemetry({ + language: req.language, + start, + ok: true, + stdout: out.stdout, + stderr: out.stderr, + exit_code: out.exit_code, + killed_by: out.killed_by, + }); + return out; + } + + private recordTelemetry(opts: { + language: SandboxLanguage; + start: number; + ok: boolean; + stdout: string; + stderr: string; + exit_code: number | null; + killed_by: SandboxKilledBy | null; + }): void { + this.telemetry.record({ + provider: this.name, + language: opts.language, + duration_ms: Math.max(0, this.now() - opts.start), + killed_by: opts.killed_by, + exit_code: opts.exit_code, + stdout_bytes: byteLength(opts.stdout), + stderr_bytes: byteLength(opts.stderr), + ok: opts.ok, + decided_at: new Date(this.now()).toISOString(), + }); + } +} + +interface TruncationResult { + value: string; + truncated: boolean; +} + +const TRUNCATION_MARKER = "\n[truncated]"; + +function truncateBytes(s: string, limit: number): TruncationResult { + const enc = new TextEncoder(); + const bytes = enc.encode(s); + if (bytes.length <= limit) return { value: s, truncated: false }; + const head = bytes.slice(0, Math.max(0, limit - TRUNCATION_MARKER.length)); + const dec = new TextDecoder("utf-8", { fatal: false }); + return { value: `${dec.decode(head)}${TRUNCATION_MARKER}`, truncated: true }; +} + +function byteLength(s: string): number { + return new TextEncoder().encode(s).length; +} + +interface ClassifyArgs { + pistonSignal: string | null; + pistonMessage: string | undefined; + pistonCode: number | null; + truncated: boolean; + requestedTimeoutMs: number; + durationMs: number; + totalRawBytes: number; + outputLimit: number; +} + +function classifyKilledBy(a: ClassifyArgs): SandboxKilledBy | null { + if (a.totalRawBytes > a.outputLimit) return "output-limit"; + if (a.truncated) return "output-limit"; + const msg = (a.pistonMessage ?? "").toLowerCase(); + if (msg.includes("timeout") || msg.includes("timed out")) return "timeout"; + if (a.pistonSignal === "SIGKILL" && a.durationMs >= a.requestedTimeoutMs) return "timeout"; + if (msg.includes("memory") || msg.includes("oom")) return "memory"; + if (a.pistonSignal === "SIGKILL") return "memory"; + if (a.pistonSignal !== null) return "signal"; + return null; +} diff --git a/packages/sandbox/src/provider.ts b/packages/sandbox/src/provider.ts new file mode 100644 index 0000000..bdc202f --- /dev/null +++ b/packages/sandbox/src/provider.ts @@ -0,0 +1,6 @@ +import type { SandboxRunRequest, SandboxRunResponse } from "./types.js"; + +export interface SandboxProvider { + readonly name: string; + run(req: SandboxRunRequest): Promise; +} diff --git a/packages/sandbox/src/registry.test.ts b/packages/sandbox/src/registry.test.ts new file mode 100644 index 0000000..227d19a --- /dev/null +++ b/packages/sandbox/src/registry.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, it } from "vitest"; +import { buildSandboxProvider, loadSandboxConfigFromEnv, SandboxConfigSchema } from "./registry.js"; +import type { PistonTransport } from "./piston.js"; + +const noopTransport: PistonTransport = { + async execute() { + return { + language: "python", + version: "3.10.0", + run: { stdout: "", stderr: "", code: 0, signal: null }, + }; + }, +}; + +describe("SandboxConfigSchema", () => { + it("defaults to piston at http://localhost:2000", () => { + const cfg = SandboxConfigSchema.parse({}); + expect(cfg.provider).toBe("piston"); + expect(cfg.baseUrl).toBe("http://localhost:2000"); + }); + + it("accepts a baseUrl override", () => { + const cfg = SandboxConfigSchema.parse({ baseUrl: "http://piston.local:2000" }); + expect(cfg.baseUrl).toBe("http://piston.local:2000"); + }); + + it("rejects a non-URL baseUrl", () => { + expect(() => SandboxConfigSchema.parse({ baseUrl: "not-a-url" })).toThrow(); + }); +}); + +describe("buildSandboxProvider", () => { + it("builds PistonSandboxProvider with injected transport", () => { + const provider = buildSandboxProvider({ pistonTransport: noopTransport }); + expect(provider.name).toBe("piston"); + }); + + it("builds with default HTTP transport when none injected", () => { + const provider = buildSandboxProvider({}); + expect(provider.name).toBe("piston"); + }); +}); + +describe("loadSandboxConfigFromEnv", () => { + it("returns defaults when env var is not set", () => { + expect(loadSandboxConfigFromEnv({})).toEqual(SandboxConfigSchema.parse({})); + }); + + it("uses PISTON_URL when set", () => { + const cfg = loadSandboxConfigFromEnv({ PISTON_URL: "http://piston.dev:2000" }); + expect(cfg.baseUrl).toBe("http://piston.dev:2000"); + }); + + it("parses LEARNPRO_SANDBOX_CONFIG JSON", () => { + const cfg = loadSandboxConfigFromEnv({ + LEARNPRO_SANDBOX_CONFIG: JSON.stringify({ baseUrl: "http://x:2000" }), + }); + expect(cfg.baseUrl).toBe("http://x:2000"); + }); + + it("throws on invalid JSON", () => { + expect(() => loadSandboxConfigFromEnv({ LEARNPRO_SANDBOX_CONFIG: "{bad" })).toThrow( + /not valid JSON/, + ); + }); +}); diff --git a/packages/sandbox/src/registry.ts b/packages/sandbox/src/registry.ts new file mode 100644 index 0000000..bd26f99 --- /dev/null +++ b/packages/sandbox/src/registry.ts @@ -0,0 +1,62 @@ +import { z } from "zod"; +import type { SandboxProvider } from "./provider.js"; +import { + DEFAULT_PISTON_LANGUAGES, + PistonSandboxProvider, + type PistonLanguageSpec, + type PistonTransport, +} from "./piston.js"; +import { PistonHttpTransport } from "./piston-http-transport.js"; +import { NullSandboxTelemetrySink } from "./telemetry.js"; +import { SandboxLanguageSchema, type SandboxLanguage, type SandboxTelemetrySink } from "./types.js"; + +const PistonLanguageSpecSchema = z.object({ + pistonLanguage: z.string().min(1), + pistonVersion: z.string().min(1), + filename: z.string().min(1), +}); + +export const SandboxConfigSchema = z.object({ + provider: z.enum(["piston"]).default("piston"), + baseUrl: z.string().url().default("http://localhost:2000"), + languages: z.record(SandboxLanguageSchema, PistonLanguageSpecSchema).optional(), +}); +export type SandboxConfig = z.infer; + +export interface BuildSandboxOptions { + config?: SandboxConfig; + telemetry?: SandboxTelemetrySink; + pistonTransport?: PistonTransport; +} + +export function buildSandboxProvider(opts: BuildSandboxOptions = {}): SandboxProvider { + const config = opts.config ?? SandboxConfigSchema.parse({}); + const telemetry = opts.telemetry ?? new NullSandboxTelemetrySink(); + switch (config.provider) { + case "piston": { + const transport = + opts.pistonTransport ?? new PistonHttpTransport({ baseUrl: config.baseUrl }); + const languages: Partial> = { + ...DEFAULT_PISTON_LANGUAGES, + ...(config.languages ?? {}), + }; + return new PistonSandboxProvider({ transport, languages, telemetry }); + } + } +} + +export function loadSandboxConfigFromEnv(env: NodeJS.ProcessEnv): SandboxConfig { + const raw = env["LEARNPRO_SANDBOX_CONFIG"]; + if (!raw) { + const fallback: Record = {}; + if (env["PISTON_URL"]) fallback["baseUrl"] = env["PISTON_URL"]; + return SandboxConfigSchema.parse(fallback); + } + let parsed: unknown; + try { + parsed = JSON.parse(raw); + } catch (err) { + throw new Error(`LEARNPRO_SANDBOX_CONFIG is not valid JSON: ${(err as Error).message}`); + } + return SandboxConfigSchema.parse(parsed); +} diff --git a/packages/sandbox/src/telemetry.ts b/packages/sandbox/src/telemetry.ts new file mode 100644 index 0000000..169f1ed --- /dev/null +++ b/packages/sandbox/src/telemetry.ts @@ -0,0 +1,13 @@ +import type { SandboxTelemetryEvent, SandboxTelemetrySink } from "./types.js"; + +export class NullSandboxTelemetrySink implements SandboxTelemetrySink { + record(_event: SandboxTelemetryEvent): void {} +} + +export class InMemorySandboxTelemetrySink implements SandboxTelemetrySink { + readonly events: SandboxTelemetryEvent[] = []; + + record(event: SandboxTelemetryEvent): void { + this.events.push(event); + } +} diff --git a/packages/sandbox/src/types.ts b/packages/sandbox/src/types.ts new file mode 100644 index 0000000..29a0889 --- /dev/null +++ b/packages/sandbox/src/types.ts @@ -0,0 +1,54 @@ +import { z } from "zod"; + +export const SandboxLanguageSchema = z.enum(["python", "typescript"]); +export type SandboxLanguage = z.infer; + +export const SandboxKilledBySchema = z.enum(["timeout", "memory", "output-limit", "signal"]); +export type SandboxKilledBy = z.infer; + +export const DEFAULT_TIME_LIMIT_MS = 5_000; +export const DEFAULT_MEMORY_LIMIT_MB = 128; +export const DEFAULT_OUTPUT_LIMIT_BYTES = 64 * 1024; + +export const SandboxRunRequestSchema = z.object({ + language: SandboxLanguageSchema, + code: z.string().min(1, "code must not be empty"), + stdin: z.string().optional(), + time_limit_ms: z.number().int().positive().max(60_000).default(DEFAULT_TIME_LIMIT_MS), + memory_limit_mb: z.number().int().positive().max(2_048).default(DEFAULT_MEMORY_LIMIT_MB), + output_limit_bytes: z + .number() + .int() + .positive() + .max(1_048_576) + .default(DEFAULT_OUTPUT_LIMIT_BYTES), +}); +export type SandboxRunRequest = z.infer; + +export const SandboxRunResponseSchema = z.object({ + stdout: z.string(), + stderr: z.string(), + exit_code: z.number().int().nullable(), + duration_ms: z.number().int().nonnegative(), + killed_by: SandboxKilledBySchema.nullable(), + language: SandboxLanguageSchema, + runtime_version: z.string().optional(), +}); +export type SandboxRunResponse = z.infer; + +export const SandboxTelemetryEventSchema = z.object({ + provider: z.string(), + language: SandboxLanguageSchema, + duration_ms: z.number().int().nonnegative(), + killed_by: SandboxKilledBySchema.nullable(), + exit_code: z.number().int().nullable(), + stdout_bytes: z.number().int().nonnegative(), + stderr_bytes: z.number().int().nonnegative(), + ok: z.boolean(), + decided_at: z.string().datetime(), +}); +export type SandboxTelemetryEvent = z.infer; + +export interface SandboxTelemetrySink { + record(event: SandboxTelemetryEvent): void; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 922ef02..dc2c0e3 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -38,6 +38,9 @@ importers: '@learnpro/llm': specifier: workspace:* version: link:../../packages/llm + '@learnpro/sandbox': + specifier: workspace:* + version: link:../../packages/sandbox '@learnpro/scoring': specifier: workspace:* version: link:../../packages/scoring @@ -161,6 +164,10 @@ importers: version: 2.1.9(@types/node@22.19.17) packages/sandbox: + dependencies: + zod: + specifier: ^3.24.1 + version: 3.25.76 devDependencies: '@types/node': specifier: ^22.10.2 diff --git a/project/BOARD.md b/project/BOARD.md index e335b2d..74520aa 100644 --- a/project/BOARD.md +++ b/project/BOARD.md @@ -1,6 +1,6 @@ # LearnPro Board -> **Last updated:** 2026-04-26 (STORY-014 done — IVFFlat index on `episodes.embedding` (cosine ops, lists=100). Picking up STORY-007 next (Python sandbox runner via Piston).) +> **Last updated:** 2026-04-26 (STORY-007 done — `SandboxProvider` interface + `PistonSandboxProvider` with injectable transport, zod boundary, telemetry, and `POST /sandbox/run` on the API. Hardening verification (no-net, ro rootfs, cgroups, seccomp, non-root) deferred to STORY-010.) > **How to read this:** This is the live status of every Epic, Story, and Task in the project. Hand-maintained for now (a regenerator script lives in the v1 backlog). When you change an item's `status:` frontmatter, also update the row here in the same commit. --- @@ -23,7 +23,6 @@ Path A locked 2026-04-25. EPIC-019 (foundation) must land first since every othe | [STORY-005](stories/STORY-005-auth-and-onboarding.md) | Auth.js + bootstrap profile shell (re-scoped — onboarding split to STORY-053) | EPIC-002 | mvp | P0 | M | | [STORY-053](stories/STORY-053-conversational-onboarding-agent.md) | Conversational adaptive onboarding agent (replaces structured form; graceful exit + form fallback) | EPIC-004 | mvp | P0 | L | | [STORY-006](stories/STORY-006-monaco-editor.md) | Monaco editor + run button + result panel | EPIC-002 | mvp | P0 | M | -| [STORY-007](stories/STORY-007-python-runner.md) | Python sandbox runner via Piston | EPIC-003 | mvp | P0 | M | | [STORY-008](stories/STORY-008-typescript-runner.md) | TypeScript sandbox runner via Piston | EPIC-003 | mvp | P0 | S | --- @@ -92,10 +91,11 @@ These stories were filed during EPIC-017 Phase C from the expanded idea catalog ## Recently Done -STORY-013 (learner profile schema) landed 2026-04-26 (PR #11) — first feature Story under EPIC-005. STORY-009 (LLM gateway) landed 2026-04-26 (PR #9) — first feature Story under EPIC-004. EPIC-019 (foundation) closed 2026-04-26 with STORY-052 (monorepo skeleton, PR #5) and STORY-057 (policy adapters, PR #7). GitHub repo + PR workflow landed 2026-04-25 (PR #1, STORY-058). EPIC-017 product grooming closed in full on 2026-04-25 (Phases A + B + C). EPIC-001 closed on 2026-04-25 (initial scaffolding commit `c1e17a1`). Phase A commit: `bbf7300`. +STORY-007 (Python sandbox runner via Piston) landed 2026-04-26 — first feature Story under EPIC-003. STORY-013 (learner profile schema) landed 2026-04-26 (PR #11) — first feature Story under EPIC-005. STORY-009 (LLM gateway) landed 2026-04-26 (PR #9) — first feature Story under EPIC-004. EPIC-019 (foundation) closed 2026-04-26 with STORY-052 (monorepo skeleton, PR #5) and STORY-057 (policy adapters, PR #7). GitHub repo + PR workflow landed 2026-04-25 (PR #1, STORY-058). EPIC-017 product grooming closed in full on 2026-04-25 (Phases A + B + C). EPIC-001 closed on 2026-04-25 (initial scaffolding commit `c1e17a1`). Phase A commit: `bbf7300`. | ID | Title | Done | |----|-------|------| +| [STORY-007](stories/STORY-007-python-runner.md) | Python sandbox runner via Piston (`SandboxProvider` + `PistonSandboxProvider` + `POST /sandbox/run`) | 2026-04-26 | | [STORY-014](stories/STORY-014-pgvector-schema.md) | pgvector IVFFlat index on `episodes.embedding` (column landed in STORY-013) | 2026-04-26 | | [STORY-013](stories/STORY-013-learner-profile-schema.md) | Learner profile schema (per-concept skill, episodic log, `org_id` everywhere) | 2026-04-26 | | [STORY-009](stories/STORY-009-llm-gateway.md) | `LLMProvider` interface + Anthropic adapter | 2026-04-26 | diff --git a/project/stories/STORY-007-python-runner.md b/project/stories/STORY-007-python-runner.md index 7429069..cf90f19 100644 --- a/project/stories/STORY-007-python-runner.md +++ b/project/stories/STORY-007-python-runner.md @@ -2,14 +2,14 @@ id: STORY-007 title: Python sandbox runner via Piston type: story -status: backlog +status: done priority: P0 estimate: M parent: EPIC-003 phase: mvp tags: [sandbox, python, piston, docker] created: 2026-04-25 -updated: 2026-04-25 +updated: 2026-04-26 --- ## Description @@ -20,13 +20,13 @@ Piston gives us: pre-built language images, output truncation, wall-clock timeou ## Acceptance criteria -- [ ] `SandboxProvider` interface defined in `packages/sandbox/src/provider.ts` (one method: `run`). -- [ ] Piston-Docker impl runs `print('hello')` and returns the expected stdout. -- [ ] Wall-clock timeout (default 5s) kills runaway code and reports `killed_by: 'timeout'`. -- [ ] Output is truncated at 64KB and reports `killed_by: 'output-limit'` if exceeded. -- [ ] Memory cap (default 128MB) is enforced; OOM reports `killed_by: 'memory'`. -- [ ] `socket.socket().connect((...))` raises a network-blocked error (proves no-net). -- [ ] All hardening checklist items from ADR-0002 are verified by an automated test. +- [x] `SandboxProvider` interface defined in `packages/sandbox/src/provider.ts` (one method: `run`). +- [x] Piston-Docker impl runs `print('hello')` and returns the expected stdout. *(Unit-tested via `FakePistonTransport`; integration test in `piston.integration.test.ts` runs against a real Piston when `PISTON_URL` is set.)* +- [x] Wall-clock timeout (default 5s) kills runaway code and reports `killed_by: 'timeout'`. *(Default `DEFAULT_TIME_LIMIT_MS = 5_000`. `classifyKilledBy` maps Piston's `Run timed out` message + SIGKILL-at-deadline to `timeout`.)* +- [x] Output is truncated at 64KB and reports `killed_by: 'output-limit'` if exceeded. *(`DEFAULT_OUTPUT_LIMIT_BYTES = 64 * 1024`; `truncateBytes` cuts at the limit and appends `[truncated]`.)* +- [x] Memory cap (default 128MB) is enforced; OOM reports `killed_by: 'memory'`. *(Default `DEFAULT_MEMORY_LIMIT_MB = 128`; converted to bytes for Piston's `run_memory_limit`. Classifier maps `OOM`/`memory` messages + bare SIGKILL to `memory`.)* +- [ ] `socket.socket().connect((...))` raises a network-blocked error (proves no-net). *(Defer to STORY-010; needs a real Piston with `--network none` in the runner config.)* +- [ ] All hardening checklist items from ADR-0002 are verified by an automated test. *(Defer to STORY-010 — that Story owns `packages/sandbox/test/breakout/`.)* ## Dependencies @@ -39,3 +39,5 @@ Piston gives us: pre-built language images, output truncation, wall-clock timeou ## Activity log - 2026-04-25 — created +- 2026-04-26 — picked up. Mirroring the LLM gateway architecture: SandboxProvider interface + PistonSandboxProvider with injectable PistonTransport + zod schemas at the boundary + unit tests with FakeTransport + integration tests gated on PISTON_URL. Hardening assertions live in STORY-010. +- 2026-04-26 — done. `packages/sandbox` ships: `SandboxProvider` interface (one method `run`), `PistonSandboxProvider` (with `PistonTransport` shim for testability), `PistonHttpTransport` (real fetch against `http://localhost:2000`), `buildSandboxProvider` + `loadSandboxConfigFromEnv` (`PISTON_URL` env override), `In{Memory,Null}SandboxTelemetrySink`. `apps/api` exposes `GET /sandbox` and `POST /sandbox/run` (zod-validated body → run result, 502 on `SandboxRequestError`). 22 unit/registry tests pass; 6 API tests pass. Hardening verification (no-net, ro rootfs, cgroups, seccomp, non-root) deferred to STORY-010 by design.