diff --git a/packages/llm/src/anthropic.test.ts b/packages/llm/src/anthropic.test.ts
index 506c694..0ac7695 100644
--- a/packages/llm/src/anthropic.test.ts
+++ b/packages/llm/src/anthropic.test.ts
@@ -7,6 +7,7 @@ import {
 } from "./anthropic.js";
 import { InMemoryLLMTelemetrySink } from "./telemetry.js";
 import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS } from "./models.js";
+import { PRICING_VERSION } from "./pricing.js";
 import { LLMRequestError, NotImplementedError } from "./errors.js";
 
 class FakeTransport implements AnthropicTransport {
@@ -148,7 +149,7 @@ describe("AnthropicProvider.complete", () => {
     ).rejects.toBeInstanceOf(LLMRequestError);
   });
 
-  it("emits telemetry on success including role and prompt_version", async () => {
+  it("emits telemetry on success including role, prompt_version, and cost", async () => {
     const sink = new InMemoryLLMTelemetrySink();
     const transport = new FakeTransport(baseResponse);
     const provider = new AnthropicProvider({ transport, telemetry: sink });
@@ -156,6 +157,7 @@ describe("AnthropicProvider.complete", () => {
       messages: [{ role: "user", content: "hi" }],
       role: "tutor",
       user_id: "u1",
+      session_id: "sess-42",
       prompt_version: "tutor@v3",
       max_tokens: 16,
       temperature: 0,
@@ -166,9 +168,28 @@ describe("AnthropicProvider.complete", () => {
     expect(ev.role).toBe("tutor");
     expect(ev.prompt_version).toBe("tutor@v3");
     expect(ev.user_id).toBe("u1");
+    expect(ev.session_id).toBe("sess-42");
     expect(ev.input_tokens).toBe(11);
     expect(ev.output_tokens).toBe(7);
     expect(ev.ok).toBe(true);
+    // Haiku pricing: (11*1 + 7*5) / 1M = 0.000046
+    expect(ev.cost_usd).toBe(0.000046);
+    expect(ev.pricing_version).toBe(PRICING_VERSION);
+  });
+
+  it("emits cost=0 + known_model=false fallback for an unknown model", async () => {
+    const sink = new InMemoryLLMTelemetrySink();
+    const transport = new FakeTransport({ ...baseResponse, model: "made-up-model" });
+    const provider = new AnthropicProvider({ transport, telemetry: sink });
+    await provider.complete({
+      messages: [{ role: "user", content: "hi" }],
+      role: "tutor",
+      user_id: "u1",
+      max_tokens: 16,
+      temperature: 0,
+    });
+    expect(sink.events[0]?.cost_usd).toBe(0);
+    expect(sink.events[0]?.pricing_version).toBe(PRICING_VERSION);
   });
 });
 
@@ -226,6 +247,33 @@ describe("AnthropicProvider.toolCall", () => {
     expect(res.finish_reason).toBe("tool_use");
     expect(transport.lastParams?.tool_choice).toEqual({ type: "auto" });
   });
+
+  it("records tool_used in telemetry when a tool is invoked", async () => {
+    const sink = new InMemoryLLMTelemetrySink();
+    const transport = new FakeTransport({
+      model: ANTHROPIC_OPUS,
+      stop_reason: "tool_use",
+      usage: { input_tokens: 12, output_tokens: 4 },
+      content: [
+        {
+          type: "tool_use",
+          id: "tool_01",
+          name: "give-hint",
+          input: { rung: 1 },
+        },
+      ],
+    });
+    const provider = new AnthropicProvider({ transport, telemetry: sink });
+    await provider.toolCall({
+      messages: [{ role: "user", content: "stuck" }],
+      role: "tutor",
+      max_tokens: 64,
+      temperature: 0,
+      tools: [{ name: "give-hint", description: "x", input_schema: { type: "object" } }],
+      tool_choice: "auto",
+    });
+    expect(sink.events[0]?.tool_used).toBe("give-hint");
+  });
 });
 
 describe("AnthropicProvider.embed", () => {
diff --git a/packages/llm/src/anthropic.ts b/packages/llm/src/anthropic.ts
index f862dd6..dd50648 100644
--- a/packages/llm/src/anthropic.ts
+++ b/packages/llm/src/anthropic.ts
@@ -1,6 +1,7 @@
 import { LLMRequestError, NotImplementedError } from "./errors.js";
 import type { LLMProvider } from "./provider.js";
 import { DEFAULT_ROLE_MODEL_MAP, resolveModel, type RoleModelMap } from "./models.js";
+import { costFor } from "./pricing.js";
 import { NullLLMTelemetrySink } from "./telemetry.js";
 import { DEFAULT_RETRY, withRetry, type RetryOptions } from "./retry.js";
 import {
@@ -219,6 +220,7 @@ export class AnthropicProvider implements LLMProvider {
         usage: out.usage,
         start,
         ok: true,
+        ...(tool_calls[0] !== undefined && { tool_used: tool_calls[0].name }),
       });
       return out;
     } catch (err) {
@@ -241,19 +243,29 @@ export class AnthropicProvider implements LLMProvider {
     usage: { input_tokens: number; output_tokens: number };
     start: number;
     ok: boolean;
+    tool_used?: string;
   }): void {
+    const cost = costFor({
+      model: opts.model,
+      input_tokens: opts.usage.input_tokens,
+      output_tokens: opts.usage.output_tokens,
+    });
     this.telemetry.record({
       provider: this.name,
       model: opts.model,
       task: opts.task,
       input_tokens: opts.usage.input_tokens,
       output_tokens: opts.usage.output_tokens,
+      cost_usd: cost.cost_usd,
+      pricing_version: cost.pricing_version,
       latency_ms: Math.max(0, this.now() - opts.start),
       ok: opts.ok,
       decided_at: new Date(this.now()).toISOString(),
       ...(opts.req.role !== undefined && { role: opts.req.role }),
       ...(opts.req.user_id !== undefined && { user_id: opts.req.user_id }),
+      ...(opts.req.session_id !== undefined && { session_id: opts.req.session_id }),
       ...(opts.req.prompt_version !== undefined && { prompt_version: opts.req.prompt_version }),
+      ...(opts.tool_used !== undefined && { tool_used: opts.tool_used }),
     });
   }
 }
diff --git a/packages/llm/src/budget-gated-provider.test.ts b/packages/llm/src/budget-gated-provider.test.ts
new file mode 100644
index 0000000..92e7b41
--- /dev/null
+++ b/packages/llm/src/budget-gated-provider.test.ts
@@ -0,0 +1,295 @@
+import { describe, expect, it } from "vitest";
+import { BudgetGatedLLMProvider } from "./budget-gated-provider.js";
+import { DailyTokenBudget, InMemoryUsageStore } from "./budget.js";
+import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS } from "./models.js";
+import { ANTHROPIC_SONNET } from "./pricing.js";
+import { TokenBudgetExceededError } from "./errors.js";
+import type { LLMProvider } from "./provider.js";
+import type {
+  CompleteRequest,
+  CompleteResponse,
+  EmbedRequest,
+  EmbedResponse,
+  StreamChunk,
+  ToolCallRequest,
+  ToolCallResponse,
+} from "./types.js";
+
+class StubProvider implements LLMProvider {
+  readonly name = "stub";
+  public lastCompleteReq: CompleteRequest | null = null;
+  public lastToolCallReq: ToolCallRequest | null = null;
+  public lastStreamReq: CompleteRequest | null = null;
+  public lastEmbedReq: EmbedRequest | null = null;
+
+  constructor(
+    private readonly response: {
+      input_tokens: number;
+      output_tokens: number;
+      streamChunks?: string[];
+    },
+  ) {}
+
+  async complete(req: CompleteRequest): Promise<CompleteResponse> {
+    this.lastCompleteReq = req;
+    return {
+      text: "stub-text",
+      model: req.model ?? "stub-model",
+      finish_reason: "end_turn",
+      usage: {
+        input_tokens: this.response.input_tokens,
+        output_tokens: this.response.output_tokens,
+      },
+    };
+  }
+
+  async *stream(req: CompleteRequest): AsyncIterable<StreamChunk> {
+    this.lastStreamReq = req;
+    const chunks = this.response.streamChunks ?? ["abcd", "efgh"];
+    for (const c of chunks) yield { delta: c, done: false };
+    yield { delta: "", done: true };
+  }
+
+  async embed(req: EmbedRequest): Promise<EmbedResponse> {
+    this.lastEmbedReq = req;
+    return { vector: [0.1, 0.2], model: "stub-embed", usage: {} };
+  }
+
+  async toolCall(req: ToolCallRequest): Promise<ToolCallResponse> {
+    this.lastToolCallReq = req;
+    return {
+      text: "",
+      tool_calls: [],
+      model: req.model ?? "stub-model",
+      finish_reason: "tool_use",
+      usage: {
+        input_tokens: this.response.input_tokens,
+        output_tokens: this.response.output_tokens,
+      },
+    };
+  }
+}
+
+describe("BudgetGatedLLMProvider.complete", () => {
+  it("passes the resolved baseline model through under threshold", async () => {
+    const inner = new StubProvider({ input_tokens: 100, output_tokens: 50 });
+    const budget = new DailyTokenBudget({
+      store: new InMemoryUsageStore(),
+      daily_limit_tokens: 10_000,
+    });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    const res = await gated.complete({
+      messages: [{ role: "user", content: "hi" }],
+      role: "tutor",
+      user_id: "u1",
+      max_tokens: 64,
+      temperature: 0.5,
+    });
+    expect(inner.lastCompleteReq?.model).toBe(ANTHROPIC_OPUS);
+    expect(res.text).toBe("stub-text");
+  });
+
+  it("downgrades the model when over the threshold (Opus → Sonnet)", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 800);
+    const inner = new StubProvider({ input_tokens: 10, output_tokens: 5 });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    await gated.complete({
+      messages: [{ role: "user", content: "hi" }],
+      role: "tutor",
+      user_id: "u1",
+      max_tokens: 32,
+      temperature: 0,
+    });
+    expect(inner.lastCompleteReq?.model).toBe(ANTHROPIC_SONNET);
+  });
+
+  it("respects an explicit model override (no downgrade)", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 999);
+    const inner = new StubProvider({ input_tokens: 10, output_tokens: 5 });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    await gated.complete({
+      messages: [{ role: "user", content: "hi" }],
+      role: "tutor",
+      model: "explicit-model",
+      user_id: "u1",
+      max_tokens: 32,
+      temperature: 0,
+    });
+    expect(inner.lastCompleteReq?.model).toBe("explicit-model");
+  });
+
+  it("throws TokenBudgetExceededError before calling the inner provider when at the limit", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 1000);
+    const inner = new StubProvider({ input_tokens: 1, output_tokens: 1 });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    await expect(
+      gated.complete({
+        messages: [{ role: "user", content: "hi" }],
+        role: "tutor",
+        user_id: "u1",
+        max_tokens: 16,
+        temperature: 0,
+      }),
+    ).rejects.toBeInstanceOf(TokenBudgetExceededError);
+    expect(inner.lastCompleteReq).toBeNull();
+  });
+
+  it("records (input + output) tokens after a successful call", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 10_000 });
+    const inner = new StubProvider({ input_tokens: 120, output_tokens: 35 });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    await gated.complete({
+      messages: [{ role: "user", content: "hi" }],
+      role: "tutor",
+      user_id: "u1",
+      max_tokens: 16,
+      temperature: 0,
+    });
+    expect(await store.today("u1")).toBe(155);
+  });
+
+  it("does nothing budget-wise when no user_id is provided (system call)", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    const inner = new StubProvider({ input_tokens: 50, output_tokens: 50 });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    await gated.complete({
+      messages: [{ role: "user", content: "hi" }],
+      role: "tutor",
+      max_tokens: 16,
+      temperature: 0,
+    });
+    expect(await store.today("u1")).toBe(0);
+    expect(inner.lastCompleteReq?.model).toBe(ANTHROPIC_OPUS);
+  });
+});
+
+describe("BudgetGatedLLMProvider.stream", () => {
+  it("downgrades the model when over the threshold and approximates output tokens", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 850);
+    const inner = new StubProvider({
+      input_tokens: 0,
+      output_tokens: 0,
+      streamChunks: ["abcd", "efghij"], // 4 + 6 = 10 chars → ~3 tokens
+    });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    const out = [];
+    for await (const c of gated.stream({
+      messages: [{ role: "user", content: "hi" }],
+      role: "tutor",
+      user_id: "u1",
+      max_tokens: 16,
+      temperature: 0,
+    })) {
+      out.push(c);
+    }
+    expect(inner.lastStreamReq?.model).toBe(ANTHROPIC_SONNET);
+    expect(out[out.length - 1]?.done).toBe(true);
+    // 850 (existing) + ceil(4/4)=1 + ceil(6/4)=2 = 853
+    expect(await store.today("u1")).toBe(853);
+  });
+
+  it("blocks when over budget without consuming the inner stream", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 1000);
+    const inner = new StubProvider({ input_tokens: 0, output_tokens: 0 });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    await expect(async () => {
+      for await (const _ of gated.stream({
+        messages: [{ role: "user", content: "hi" }],
+        role: "tutor",
+        user_id: "u1",
+        max_tokens: 16,
+        temperature: 0,
+      })) {
+        // unreachable
+      }
+    }).rejects.toBeInstanceOf(TokenBudgetExceededError);
+    expect(inner.lastStreamReq).toBeNull();
+  });
+});
+
+describe("BudgetGatedLLMProvider.toolCall", () => {
+  it("downgrades and records like complete()", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 800);
+    const inner = new StubProvider({ input_tokens: 30, output_tokens: 20 });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    await gated.toolCall({
+      messages: [{ role: "user", content: "hi" }],
+      role: "tutor",
+      user_id: "u1",
+      max_tokens: 16,
+      temperature: 0,
+      tools: [{ name: "t", description: "d", input_schema: { type: "object" } }],
+      tool_choice: "auto",
+    });
+    expect(inner.lastToolCallReq?.model).toBe(ANTHROPIC_SONNET);
+    expect(await store.today("u1")).toBe(850);
+  });
+});
+
+describe("BudgetGatedLLMProvider.embed", () => {
+  it("passes through without budget gating", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 999_999);
+    const inner = new StubProvider({ input_tokens: 0, output_tokens: 0 });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    const res = await gated.embed({ text: "hello" });
+    expect(res.vector).toEqual([0.1, 0.2]);
+    expect(inner.lastEmbedReq?.text).toBe("hello");
+  });
+});
+
+describe("BudgetGatedLLMProvider.name", () => {
+  it("identifies itself with a budget-gated prefix", () => {
+    const inner = new StubProvider({ input_tokens: 0, output_tokens: 0 });
+    const budget = new DailyTokenBudget({
+      store: new InMemoryUsageStore(),
+      daily_limit_tokens: 0,
+    });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    expect(gated.name).toBe("budget-gated:stub");
+  });
+});
+
+// Sanity: ensure tier ladder uses Sonnet from pricing module (not duplicated).
+describe("Tier sanity", () => {
+  it("Sonnet baseline downgrades to Haiku", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({
+      store,
+      daily_limit_tokens: 1000,
+      models: {
+        tutor: ANTHROPIC_SONNET,
+        interviewer: ANTHROPIC_SONNET,
+        reflection: ANTHROPIC_SONNET,
+        grader: ANTHROPIC_HAIKU,
+        router: ANTHROPIC_HAIKU,
+      },
+    });
+    await budget.record("u1", 900);
+    const inner = new StubProvider({ input_tokens: 0, output_tokens: 0 });
+    const gated = new BudgetGatedLLMProvider({ inner, budget });
+    await gated.complete({
+      messages: [{ role: "user", content: "hi" }],
+      role: "tutor",
+      user_id: "u1",
+      max_tokens: 16,
+      temperature: 0,
+    });
+    expect(inner.lastCompleteReq?.model).toBe(ANTHROPIC_HAIKU);
+  });
+});
diff --git a/packages/llm/src/budget-gated-provider.ts b/packages/llm/src/budget-gated-provider.ts
new file mode 100644
index 0000000..f203893
--- /dev/null
+++ b/packages/llm/src/budget-gated-provider.ts
@@ -0,0 +1,83 @@
+import type { DailyTokenBudget } from "./budget.js";
+import type { LLMProvider } from "./provider.js";
+import type {
+  CompleteRequest,
+  CompleteResponse,
+  EmbedRequest,
+  EmbedResponse,
+  StreamChunk,
+  ToolCallRequest,
+  ToolCallResponse,
+} from "./types.js";
+
+export interface BudgetGatedLLMProviderOptions {
+  inner: LLMProvider;
+  budget: DailyTokenBudget;
+}
+
+// Decorator that wraps an LLMProvider with a per-user daily token budget. Two effects:
+//   1. Pre-call: throws TokenBudgetExceededError if the user is already at/over their daily limit.
+//   2. Pre-call: when no explicit model was requested, may downgrade the resolved model by one tier
+//      (Opus → Sonnet → Haiku) once the user crosses the configured threshold (default 80%).
+//   3. Post-call: records actual input+output tokens so subsequent calls see updated state.
+//
+// The inner provider stays unaware of the budget — its telemetry sink still fires unmodified, so
+// downgrade decisions are observable via the per-event `model` field. Embed calls are passed through
+// without budget gating (no per-user usage attribution exists for embeddings yet).
+export class BudgetGatedLLMProvider implements LLMProvider {
+  readonly name: string;
+  private readonly inner: LLMProvider;
+  private readonly budget: DailyTokenBudget;
+
+  constructor(opts: BudgetGatedLLMProviderOptions) {
+    this.inner = opts.inner;
+    this.budget = opts.budget;
+    this.name = `budget-gated:${opts.inner.name}`;
+  }
+
+  async complete(req: CompleteRequest): Promise<CompleteResponse> {
+    await this.budget.assertWithinBudget(req.user_id);
+    const decision = await this.budget.decideModel({
+      user_id: req.user_id ?? "",
+      ...(req.role !== undefined && { role: req.role }),
+      ...(req.model !== undefined && { explicit_model: req.model }),
+    });
+    const next: CompleteRequest = { ...req, model: decision.model };
+    const res = await this.inner.complete(next);
+    await this.budget.record(req.user_id, res.usage.input_tokens + res.usage.output_tokens);
+    return res;
+  }
+
+  async *stream(req: CompleteRequest): AsyncIterable<StreamChunk> {
+    await this.budget.assertWithinBudget(req.user_id);
+    const decision = await this.budget.decideModel({
+      user_id: req.user_id ?? "",
+      ...(req.role !== undefined && { role: req.role }),
+      ...(req.model !== undefined && { explicit_model: req.model }),
+    });
+    const next: CompleteRequest = { ...req, model: decision.model };
+    let approxOutputTokens = 0;
+    for await (const chunk of this.inner.stream(next)) {
+      if (!chunk.done) approxOutputTokens += Math.max(1, Math.ceil(chunk.delta.length / 4));
+      yield chunk;
+    }
+    await this.budget.record(req.user_id, approxOutputTokens);
+  }
+
+  embed(req: EmbedRequest): Promise<EmbedResponse> {
+    return this.inner.embed(req);
+  }
+
+  async toolCall(req: ToolCallRequest): Promise<ToolCallResponse> {
+    await this.budget.assertWithinBudget(req.user_id);
+    const decision = await this.budget.decideModel({
+      user_id: req.user_id ?? "",
+      ...(req.role !== undefined && { role: req.role }),
+      ...(req.model !== undefined && { explicit_model: req.model }),
+    });
+    const next: ToolCallRequest = { ...req, model: decision.model };
+    const res = await this.inner.toolCall(next);
+    await this.budget.record(req.user_id, res.usage.input_tokens + res.usage.output_tokens);
+    return res;
+  }
+}
diff --git a/packages/llm/src/budget.test.ts b/packages/llm/src/budget.test.ts
new file mode 100644
index 0000000..325d23a
--- /dev/null
+++ b/packages/llm/src/budget.test.ts
@@ -0,0 +1,218 @@
+import { describe, expect, it } from "vitest";
+import { DailyTokenBudget, InMemoryUsageStore, MODEL_TIERS } from "./budget.js";
+import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS } from "./models.js";
+import { ANTHROPIC_SONNET } from "./pricing.js";
+import { TokenBudgetExceededError } from "./errors.js";
+
+describe("InMemoryUsageStore", () => {
+  it("returns 0 for an unseen user", async () => {
+    const store = new InMemoryUsageStore();
+    expect(await store.today("u1")).toBe(0);
+  });
+
+  it("accumulates tokens for the same user/day", async () => {
+    const store = new InMemoryUsageStore();
+    const day = new Date("2026-04-26T12:00:00Z");
+    await store.record("u1", 100, day);
+    await store.record("u1", 250, day);
+    expect(await store.today("u1", day)).toBe(350);
+  });
+
+  it("partitions buckets by UTC date", async () => {
+    const store = new InMemoryUsageStore();
+    const d1 = new Date("2026-04-26T23:59:00Z");
+    const d2 = new Date("2026-04-27T00:01:00Z");
+    await store.record("u1", 100, d1);
+    await store.record("u1", 50, d2);
+    expect(await store.today("u1", d1)).toBe(100);
+    expect(await store.today("u1", d2)).toBe(50);
+  });
+
+  it("partitions buckets by user", async () => {
+    const store = new InMemoryUsageStore();
+    const day = new Date("2026-04-26T12:00:00Z");
+    await store.record("u1", 100, day);
+    await store.record("u2", 999, day);
+    expect(await store.today("u1", day)).toBe(100);
+    expect(await store.today("u2", day)).toBe(999);
+  });
+});
+
+describe("DailyTokenBudget.assertWithinBudget", () => {
+  it("is a no-op when limit is 0 (unlimited / self-hosted default)", async () => {
+    const budget = new DailyTokenBudget({
+      store: new InMemoryUsageStore(),
+      daily_limit_tokens: 0,
+    });
+    await budget.record("u1", 999_999_999);
+    await expect(budget.assertWithinBudget("u1")).resolves.toBeUndefined();
+  });
+
+  it("is a no-op when no user_id is provided (system call)", async () => {
+    const budget = new DailyTokenBudget({
+      store: new InMemoryUsageStore(),
+      daily_limit_tokens: 1000,
+    });
+    await expect(budget.assertWithinBudget(undefined)).resolves.toBeUndefined();
+  });
+
+  it("throws TokenBudgetExceededError when used >= limit", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 1000);
+    await expect(budget.assertWithinBudget("u1")).rejects.toBeInstanceOf(TokenBudgetExceededError);
+  });
+
+  it("does not throw while under the limit", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 999);
+    await expect(budget.assertWithinBudget("u1")).resolves.toBeUndefined();
+  });
+});
+
+describe("DailyTokenBudget.decideModel", () => {
+  it("explicit model always wins (reason: explicit)", async () => {
+    const budget = new DailyTokenBudget({
+      store: new InMemoryUsageStore(),
+      daily_limit_tokens: 1000,
+    });
+    const r = await budget.decideModel({
+      user_id: "u1",
+      role: "tutor",
+      explicit_model: "some-other-model",
+    });
+    expect(r.model).toBe("some-other-model");
+    expect(r.reason).toBe("explicit");
+  });
+
+  it("returns baseline with reason=no_user when user_id is missing", async () => {
+    const budget = new DailyTokenBudget({
+      store: new InMemoryUsageStore(),
+      daily_limit_tokens: 1000,
+    });
+    const r = await budget.decideModel({ role: "tutor", user_id: "" });
+    expect(r.model).toBe(ANTHROPIC_OPUS);
+    expect(r.reason).toBe("no_user");
+    expect(r.tier).toBe("premium");
+  });
+
+  it("returns baseline with reason=unlimited when limit is 0", async () => {
+    const budget = new DailyTokenBudget({
+      store: new InMemoryUsageStore(),
+      daily_limit_tokens: 0,
+    });
+    const r = await budget.decideModel({ user_id: "u1", role: "tutor" });
+    expect(r.model).toBe(ANTHROPIC_OPUS);
+    expect(r.reason).toBe("unlimited");
+  });
+
+  it("returns baseline with reason=under_threshold when ratio < 0.8", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 500);
+    const r = await budget.decideModel({ user_id: "u1", role: "tutor" });
+    expect(r.model).toBe(ANTHROPIC_OPUS);
+    expect(r.reason).toBe("under_threshold");
+    expect(r.ratio).toBe(0.5);
+  });
+
+  it("downgrades premium → mid when at the threshold", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 800);
+    const r = await budget.decideModel({ user_id: "u1", role: "tutor" });
+    expect(r.model).toBe(ANTHROPIC_SONNET);
+    expect(r.tier).toBe("mid");
+    expect(r.reason).toBe("downgraded");
+    expect(r.ratio).toBe(0.8);
+  });
+
+  it("downgrades mid → cheap when at the threshold (router → Haiku stays cheap)", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({
+      store,
+      daily_limit_tokens: 1000,
+      models: {
+        tutor: ANTHROPIC_OPUS,
+        interviewer: ANTHROPIC_OPUS,
+        reflection: ANTHROPIC_OPUS,
+        grader: ANTHROPIC_SONNET,
+        router: ANTHROPIC_SONNET,
+      },
+    });
+    await budget.record("u1", 900);
+    const r = await budget.decideModel({ user_id: "u1", role: "router" });
+    expect(r.model).toBe(ANTHROPIC_HAIKU);
+    expect(r.tier).toBe("cheap");
+    expect(r.reason).toBe("downgraded");
+  });
+
+  it("does not downgrade if baseline is already cheap (no tier below)", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 900);
+    const r = await budget.decideModel({ user_id: "u1", role: "router" });
+    expect(r.model).toBe(ANTHROPIC_HAIKU);
+    expect(r.tier).toBe("cheap");
+    expect(r.reason).toBe("downgraded");
+  });
+
+  it("returns baseline (no downgrade) when baseline is not on the tier ladder", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({
+      store,
+      daily_limit_tokens: 1000,
+      models: {
+        tutor: "off-ladder-model",
+        interviewer: ANTHROPIC_OPUS,
+        reflection: ANTHROPIC_OPUS,
+        grader: ANTHROPIC_HAIKU,
+        router: ANTHROPIC_HAIKU,
+      },
+    });
+    await budget.record("u1", 900);
+    const r = await budget.decideModel({ user_id: "u1", role: "tutor" });
+    expect(r.model).toBe("off-ladder-model");
+    expect(r.tier).toBeNull();
+    expect(r.reason).toBe("under_threshold");
+  });
+
+  it("custom downgrade_threshold is respected", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({
+      store,
+      daily_limit_tokens: 1000,
+      downgrade_threshold: 0.5,
+    });
+    await budget.record("u1", 500);
+    const r = await budget.decideModel({ user_id: "u1", role: "tutor" });
+    expect(r.reason).toBe("downgraded");
+    expect(r.model).toBe(ANTHROPIC_SONNET);
+  });
+});
+
+describe("DailyTokenBudget.record", () => {
+  it("ignores zero or negative tokens", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record("u1", 0);
+    await budget.record("u1", -5);
+    expect(await store.today("u1")).toBe(0);
+  });
+
+  it("ignores calls without a user_id", async () => {
+    const store = new InMemoryUsageStore();
+    const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 });
+    await budget.record(undefined, 100);
+    expect(await store.today("anon")).toBe(0);
+  });
+});
+
+describe("MODEL_TIERS", () => {
+  it("maps premium/mid/cheap to Opus/Sonnet/Haiku", () => {
+    expect(MODEL_TIERS.premium).toBe(ANTHROPIC_OPUS);
+    expect(MODEL_TIERS.mid).toBe(ANTHROPIC_SONNET);
+    expect(MODEL_TIERS.cheap).toBe(ANTHROPIC_HAIKU);
+  });
+});
diff --git a/packages/llm/src/budget.ts b/packages/llm/src/budget.ts
new file mode 100644
index 0000000..7a1f064
--- /dev/null
+++ b/packages/llm/src/budget.ts
@@ -0,0 +1,168 @@
+import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS, type RoleModelMap } from "./models.js";
+import { ANTHROPIC_SONNET } from "./pricing.js";
+import { TokenBudgetExceededError } from "./errors.js";
+import type { LLMRole } from "./types.js";
+
+export interface DailyUsage {
+  user_id: string;
+  date: string; // YYYY-MM-DD in UTC
+  tokens: number;
+}
+
+// UsageStore is the abstraction the budget tracker depends on. The DB-backed implementation
+// (writes to the `agent_calls` table) lands when the schema migration ships — see the
+// STORY-012 close-out. The in-memory impl is sufficient for tests and self-hosted no-budget mode.
+export interface UsageStore {
+  today(user_id: string, now?: Date): Promise<number>;
+  record(user_id: string, tokens: number, now?: Date): Promise<void>;
+}
+
+export class InMemoryUsageStore implements UsageStore {
+  private readonly buckets = new Map<string, number>();
+
+  async today(user_id: string, now: Date = new Date()): Promise<number> {
+    return this.buckets.get(this.key(user_id, now)) ?? 0;
+  }
+
+  async record(user_id: string, tokens: number, now: Date = new Date()): Promise<void> {
+    const k = this.key(user_id, now);
+    this.buckets.set(k, (this.buckets.get(k) ?? 0) + tokens);
+  }
+
+  private key(user_id: string, now: Date): string {
+    return `${user_id}|${now.toISOString().slice(0, 10)}`;
+  }
+}
+
+// Tier ladder used for graceful downgrades when a user nears their daily budget.
+// Indexed by name so callers can extend the map (e.g. add an `embed` tier later).
+export const MODEL_TIERS = {
+  premium: ANTHROPIC_OPUS,
+  mid: ANTHROPIC_SONNET,
+  cheap: ANTHROPIC_HAIKU,
+} as const;
+export type ModelTier = keyof typeof MODEL_TIERS;
+
+const TIER_ORDER: ModelTier[] = ["premium", "mid", "cheap"];
+
+export interface DailyTokenBudgetOptions {
+  store: UsageStore;
+  // 0 = unlimited (self-hosted default).
+  daily_limit_tokens: number;
+  // Threshold (0..1) at which to downgrade by one tier. Default 0.8.
+  downgrade_threshold?: number;
+  models?: RoleModelMap;
+  now?: () => Date;
+}
+
+export interface DecideModelInput {
+  user_id: string;
+  role?: LLMRole;
+  explicit_model?: string;
+}
+
+export interface DecideModelResult {
+  model: string;
+  tier: ModelTier | null;
+  reason: "explicit" | "no_user" | "unlimited" | "under_threshold" | "downgraded";
+  used_tokens: number;
+  ratio: number;
+}
+
+export class DailyTokenBudget {
+  private readonly store: UsageStore;
+  private readonly limit: number;
+  private readonly threshold: number;
+  private readonly models: RoleModelMap | undefined;
+  private readonly now: () => Date;
+
+  constructor(opts: DailyTokenBudgetOptions) {
+    this.store = opts.store;
+    this.limit = opts.daily_limit_tokens;
+    this.threshold = opts.downgrade_threshold ?? 0.8;
+    this.models = opts.models;
+    this.now = opts.now ?? (() => new Date());
+  }
+
+  // Throws TokenBudgetExceededError if the user has already hit their daily limit.
+  // No-op when limit is 0 (unlimited) or when no user_id is provided (self-hosted system call).
+  async assertWithinBudget(user_id: string | undefined): Promise<void> {
+    if (!user_id || this.limit === 0) return;
+    const used = await this.store.today(user_id, this.now());
+    if (used >= this.limit) {
+      throw new TokenBudgetExceededError(user_id, used, this.limit);
+    }
+  }
+
+  // Pick the model to use. Downgrades by one tier when at/over the threshold.
+  // Explicit model always wins (caller has opted out of the budget controller).
+  async decideModel(input: DecideModelInput): Promise<DecideModelResult> {
+    if (input.explicit_model) {
+      return {
+        model: input.explicit_model,
+        tier: tierForModel(input.explicit_model),
+        reason: "explicit",
+        used_tokens: 0,
+        ratio: 0,
+      };
+    }
+    const baseline = baselineModel(input.role, this.models);
+    if (!input.user_id || this.limit === 0) {
+      return {
+        model: baseline,
+        tier: tierForModel(baseline),
+        reason: input.user_id ? "unlimited" : "no_user",
+        used_tokens: 0,
+        ratio: 0,
+      };
+    }
+    const used = await this.store.today(input.user_id, this.now());
+    const ratio = used / this.limit;
+    const baselineTier = tierForModel(baseline);
+    if (ratio < this.threshold || baselineTier === null) {
+      return {
+        model: baseline,
+        tier: baselineTier,
+        reason: "under_threshold",
+        used_tokens: used,
+        ratio,
+      };
+    }
+    const downgraded = downgradeOneTier(baselineTier);
+    return {
+      model: MODEL_TIERS[downgraded],
+      tier: downgraded,
+      reason: "downgraded",
+      used_tokens: used,
+      ratio,
+    };
+  }
+
+  async record(user_id: string | undefined, tokens: number): Promise<void> {
+    if (!user_id || tokens <= 0) return;
+    await this.store.record(user_id, tokens, this.now());
+  }
+}
+
+function tierForModel(model: string): ModelTier | null {
+  for (const tier of TIER_ORDER) {
+    if (MODEL_TIERS[tier] === model) return tier;
+  }
+  return null;
+}
+
+function downgradeOneTier(tier: ModelTier): ModelTier {
+  const idx = TIER_ORDER.indexOf(tier);
+  if (idx < 0 || idx === TIER_ORDER.length - 1) return "cheap";
+  return TIER_ORDER[idx + 1] as ModelTier;
+}
+
+function baselineModel(role: LLMRole | undefined, map: RoleModelMap | undefined): string {
+  if (role && map) return map[role];
+  if (role) {
+    return role === "tutor" || role === "interviewer" || role === "reflection"
+      ? ANTHROPIC_OPUS
+      : ANTHROPIC_HAIKU;
+  }
+  return ANTHROPIC_HAIKU;
+}
diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts
index 724c57d..dcc4f53 100644
--- a/packages/llm/src/index.ts
+++ b/packages/llm/src/index.ts
@@ -38,6 +38,33 @@ export {
 
 export { InMemoryLLMTelemetrySink, NullLLMTelemetrySink } from "./telemetry.js";
 
+export {
+  ANTHROPIC_SONNET,
+  MODEL_PRICING,
+  PRICING_VERSION,
+  costFor,
+  type CostInput,
+  type CostResult,
+  type ModelPrice,
+} from "./pricing.js";
+
+export {
+  DailyTokenBudget,
+  InMemoryUsageStore,
+  MODEL_TIERS,
+  type DailyTokenBudgetOptions,
+  type DailyUsage,
+  type DecideModelInput,
+  type DecideModelResult,
+  type ModelTier,
+  type UsageStore,
+} from "./budget.js";
+
+export {
+  BudgetGatedLLMProvider,
+  type BudgetGatedLLMProviderOptions,
+} from "./budget-gated-provider.js";
+
 export {
   ChatMessageSchema,
   ChatRoleSchema,
diff --git a/packages/llm/src/pricing.test.ts b/packages/llm/src/pricing.test.ts
new file mode 100644
index 0000000..49c8181
--- /dev/null
+++ b/packages/llm/src/pricing.test.ts
@@ -0,0 +1,52 @@
+import { describe, expect, it } from "vitest";
+import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS } from "./models.js";
+import { ANTHROPIC_SONNET, costFor, MODEL_PRICING, PRICING_VERSION } from "./pricing.js";
+
+describe("costFor", () => {
+  it("computes Opus cost: 1M input + 1M output → $15 + $75 = $90", () => {
+    const r = costFor({
+      model: ANTHROPIC_OPUS,
+      input_tokens: 1_000_000,
+      output_tokens: 1_000_000,
+    });
+    expect(r.cost_usd).toBe(90);
+    expect(r.known_model).toBe(true);
+    expect(r.pricing_version).toBe(PRICING_VERSION);
+  });
+
+  it("computes Haiku cost: 1k input + 1k output → $0.000001 * (1k + 5k)", () => {
+    const r = costFor({
+      model: ANTHROPIC_HAIKU,
+      input_tokens: 1_000,
+      output_tokens: 1_000,
+    });
+    // (1000 * 1 + 1000 * 5) / 1_000_000 = 0.006
+    expect(r.cost_usd).toBe(0.006);
+    expect(r.known_model).toBe(true);
+  });
+
+  it("computes Sonnet cost (rounded to 6 decimals)", () => {
+    const r = costFor({ model: ANTHROPIC_SONNET, input_tokens: 500, output_tokens: 250 });
+    // (500 * 3 + 250 * 15) / 1M = 0.00525
+    expect(r.cost_usd).toBe(0.00525);
+  });
+
+  it("returns cost=0 + known_model=false for unknown models, never throws", () => {
+    const r = costFor({ model: "made-up-model", input_tokens: 100, output_tokens: 100 });
+    expect(r.cost_usd).toBe(0);
+    expect(r.known_model).toBe(false);
+    expect(r.pricing_version).toBe(PRICING_VERSION);
+  });
+
+  it("stamps every result with the same PRICING_VERSION constant", () => {
+    expect(
+      costFor({ model: ANTHROPIC_OPUS, input_tokens: 0, output_tokens: 0 }).pricing_version,
+    ).toBe(PRICING_VERSION);
+  });
+
+  it("includes Opus, Sonnet, and Haiku in the pricing table", () => {
+    expect(MODEL_PRICING[ANTHROPIC_OPUS]).toBeDefined();
+    expect(MODEL_PRICING[ANTHROPIC_SONNET]).toBeDefined();
+    expect(MODEL_PRICING[ANTHROPIC_HAIKU]).toBeDefined();
+  });
+});
diff --git a/packages/llm/src/pricing.ts b/packages/llm/src/pricing.ts
new file mode 100644
index 0000000..3ee7f02
--- /dev/null
+++ b/packages/llm/src/pricing.ts
@@ -0,0 +1,53 @@
+import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS } from "./models.js";
+
+export const ANTHROPIC_SONNET = "claude-sonnet-4-6";
+
+export interface ModelPrice {
+  input_per_mtok: number;
+  output_per_mtok: number;
+}
+
+export const PRICING_VERSION = "2026-04-26";
+
+// USD per 1M tokens. Anchored to Anthropic public list prices snapshot at PRICING_VERSION.
+// When prices change, bump PRICING_VERSION and append a new constant — never mutate in place.
+// Rows for OpenAI / Ollama land when their providers do.
+export const MODEL_PRICING: Record<string, ModelPrice> = {
+  [ANTHROPIC_OPUS]: { input_per_mtok: 15, output_per_mtok: 75 },
+  [ANTHROPIC_SONNET]: { input_per_mtok: 3, output_per_mtok: 15 },
+  [ANTHROPIC_HAIKU]: { input_per_mtok: 1, output_per_mtok: 5 },
+};
+
+export interface CostInput {
+  model: string;
+  input_tokens: number;
+  output_tokens: number;
+}
+
+export interface CostResult {
+  cost_usd: number;
+  pricing_version: string;
+  known_model: boolean;
+}
+
+// Compute cost without throwing on unknown models — pricing-table drift should not break the
+// runtime path. Unknown models record cost=0 and known_model=false; an analytics dashboard can
+// flag this for the operator to update MODEL_PRICING.
+export function costFor(input: CostInput): CostResult {
+  const price = MODEL_PRICING[input.model];
+  if (price === undefined) {
+    return { cost_usd: 0, pricing_version: PRICING_VERSION, known_model: false };
+  }
+  const cost =
+    (input.input_tokens * price.input_per_mtok + input.output_tokens * price.output_per_mtok) /
+    1_000_000;
+  return {
+    cost_usd: round6(cost),
+    pricing_version: PRICING_VERSION,
+    known_model: true,
+  };
+}
+
+function round6(n: number): number {
+  return Math.round(n * 1_000_000) / 1_000_000;
+}
diff --git a/packages/llm/src/types.ts b/packages/llm/src/types.ts
index 1d39a81..1846bb3 100644
--- a/packages/llm/src/types.ts
+++ b/packages/llm/src/types.ts
@@ -28,6 +28,7 @@ export const CompleteRequestSchema = z.object({
   temperature: z.number().min(0).max(2).default(0.7),
   user_id: z.string().optional(),
   prompt_version: z.string().optional(),
+  session_id: z.string().optional(),
 });
 export type CompleteRequest = z.infer<typeof CompleteRequestSchema>;
 
@@ -101,9 +102,14 @@ export const LLMTelemetryEventSchema = z.object({
   role: LLMRoleSchema.optional(),
   prompt_version: z.string().optional(),
   user_id: z.string().optional(),
+  session_id: z.string().optional(),
   task: z.enum(["complete", "stream", "embed", "tool_call"]),
   input_tokens: z.number().int().min(0),
   output_tokens: z.number().int().min(0),
+  cached_tokens: z.number().int().min(0).optional(),
+  cost_usd: z.number().min(0),
+  pricing_version: z.string(),
+  tool_used: z.string().optional(),
   latency_ms: z.number().int().min(0),
   ok: z.boolean(),
   decided_at: z.string(),
diff --git a/project/BOARD.md b/project/BOARD.md
index f8b8303..313a9cc 100644
--- a/project/BOARD.md
+++ b/project/BOARD.md
@@ -1,6 +1,6 @@
 # LearnPro Board
 
-> **Last updated:** 2026-04-26 (STORY-006 done — Monaco-based `/playground` page in `apps/web` with language selector + Run button + result panel; wiring path browser → Next.js Route Handler `/api/sandbox/run` → Fastify `/sandbox/run`. Re-scoped on pickup: WebSocket streaming split into [STORY-059](./stories/STORY-059-sandbox-streaming.md); Submit/hidden-tests deferred to [STORY-016](./stories/STORY-016-seed-bank.md); problem-language follow rewires when STORY-016 lands.)
+> **Last updated:** 2026-04-26 (STORY-012 done — versioned `MODEL_PRICING` table + `costFor()`, per-user `DailyTokenBudget` with Opus → Sonnet → Haiku tier ladder, `BudgetGatedLLMProvider` decorator wrapping any `LLMProvider`, `LLMTelemetryEvent` extended with `cost_usd`/`pricing_version`/`session_id`/`tool_used`/`cached_tokens`. DB-backed sink + `agent_calls` migration split into [STORY-060](./stories/STORY-060-agent-calls-db-sink.md) so STORY-012 stays at S.)
 > **How to read this:** This is the live status of every Epic, Story, and Task in the project. Hand-maintained for now (a regenerator script lives in the v1 backlog). When you change an item's `status:` frontmatter, also update the row here in the same commit.
 
 ---
@@ -31,7 +31,6 @@ Path A locked 2026-04-25. EPIC-019 (foundation) must land first since every othe
 |----|-------|------|-------|----------|-----|
 | [STORY-010](stories/STORY-010-sandbox-hardening.md) | Verify sandbox hardening checklist (no-net, ro rootfs, cgroups, seccomp, non-root) | EPIC-003 | mvp | P0 | M |
 | [STORY-011](stories/STORY-011-tutor-agent-tools.md) | Tutor agent with `assign-problem` / `give-hint` / `grade` / `update-profile` tools | EPIC-004 | mvp | P0 | L |
-| [STORY-012](stories/STORY-012-cost-telemetry.md) | Per-call LLM cost & latency telemetry + per-user daily token budget | EPIC-004 | mvp | P0 | S |
 | [STORY-015](stories/STORY-015-session-plan.md) | Session plan agent (3–5 micro-objectives per session) | EPIC-006 | mvp | P0 | M |
 | [STORY-016](stories/STORY-016-seed-bank.md) | Curated seed problem bank (~30 Python + ~30 TS) with hidden tests | EPIC-007 | mvp | P0 | L |
 | [STORY-017](stories/STORY-017-hint-ladder.md) | 3-rung hint ladder | EPIC-007 | mvp | P0 | S |
@@ -48,6 +47,7 @@ Path A locked 2026-04-25. EPIC-019 (foundation) must land first since every othe
 | [STORY-054](stories/STORY-054-adaptive-autonomy-controller.md) | Adaptive autonomy controller (per-user confidence → Low/Medium/High ask-vs-act bands) | EPIC-004 | mvp | P0 | M |
 | [STORY-055](stories/STORY-055-rich-interaction-telemetry-schema.md) | Rich interaction telemetry schema (cursor focus, voice opt-in, edits/reverts → `interactions` table) | EPIC-005 | mvp | P0 | M |
 | [STORY-056](stories/STORY-056-data-retention-and-redaction.md) | Data retention & redaction pipeline (raw 90d / voice 30d / episodes indefinite + PII redaction) | EPIC-016 | mvp | P0 | M |
+| [STORY-060](stories/STORY-060-agent-calls-db-sink.md) | DB-backed `UsageStore` + `agent_calls` table (split from STORY-012) | EPIC-004 | mvp | P0 | S |
 
 ---
 
@@ -90,10 +90,11 @@ These stories were filed during EPIC-017 Phase C from the expanded idea catalog
 
 ## Recently Done
 
-STORY-006 (Monaco editor + Run button + result panel) landed 2026-04-26 — first user-facing feature in `apps/web`. STORY-008 (TypeScript sandbox runner via Piston) landed 2026-04-26. STORY-007 (Python sandbox runner via Piston) landed 2026-04-26 (PR #14) — first feature Story under EPIC-003. STORY-013 (learner profile schema) landed 2026-04-26 (PR #11) — first feature Story under EPIC-005. STORY-009 (LLM gateway) landed 2026-04-26 (PR #9) — first feature Story under EPIC-004. EPIC-019 (foundation) closed 2026-04-26 with STORY-052 (monorepo skeleton, PR #5) and STORY-057 (policy adapters, PR #7). GitHub repo + PR workflow landed 2026-04-25 (PR #1, STORY-058). EPIC-017 product grooming closed in full on 2026-04-25 (Phases A + B + C). EPIC-001 closed on 2026-04-25 (initial scaffolding commit `c1e17a1`). Phase A commit: `bbf7300`.
+STORY-012 (per-call LLM cost telemetry + per-user daily token budget) landed 2026-04-26 — versioned `MODEL_PRICING` + `costFor()` calculator, `DailyTokenBudget` with Opus → Sonnet → Haiku tier ladder + downgrade at 80%, `BudgetGatedLLMProvider` decorator. DB-backed sink + `agent_calls` migration split into [STORY-060](./stories/STORY-060-agent-calls-db-sink.md). STORY-006 (Monaco editor + Run button + result panel) landed 2026-04-26 — first user-facing feature in `apps/web`. STORY-008 (TypeScript sandbox runner via Piston) landed 2026-04-26. STORY-007 (Python sandbox runner via Piston) landed 2026-04-26 (PR #14) — first feature Story under EPIC-003. STORY-013 (learner profile schema) landed 2026-04-26 (PR #11) — first feature Story under EPIC-005. STORY-009 (LLM gateway) landed 2026-04-26 (PR #9) — first feature Story under EPIC-004. EPIC-019 (foundation) closed 2026-04-26 with STORY-052 (monorepo skeleton, PR #5) and STORY-057 (policy adapters, PR #7). GitHub repo + PR workflow landed 2026-04-25 (PR #1, STORY-058). EPIC-017 product grooming closed in full on 2026-04-25 (Phases A + B + C). EPIC-001 closed on 2026-04-25 (initial scaffolding commit `c1e17a1`). Phase A commit: `bbf7300`.
 
 | ID | Title | Done |
 |----|-------|------|
+| [STORY-012](stories/STORY-012-cost-telemetry.md) | Per-call LLM cost & latency telemetry + per-user daily token budget (DB sink → STORY-060) | 2026-04-26 |
 | [STORY-006](stories/STORY-006-monaco-editor.md) | Monaco editor + Run button + result panel (`/playground` → Next.js proxy → Fastify `/sandbox/run`) | 2026-04-26 |
 | [STORY-008](stories/STORY-008-typescript-runner.md) | TypeScript sandbox runner via Piston (TS-specific unit/integration/API tests on top of STORY-007 infra) | 2026-04-26 |
 | [STORY-007](stories/STORY-007-python-runner.md) | Python sandbox runner via Piston (`SandboxProvider` + `PistonSandboxProvider` + `POST /sandbox/run`) | 2026-04-26 |
diff --git a/project/stories/STORY-012-cost-telemetry.md b/project/stories/STORY-012-cost-telemetry.md
index e6ba151..5f8086e 100644
--- a/project/stories/STORY-012-cost-telemetry.md
+++ b/project/stories/STORY-012-cost-telemetry.md
@@ -2,14 +2,14 @@
 id: STORY-012
 title: Per-call LLM cost & latency telemetry + per-user daily token budget
 type: story
-status: backlog
+status: done
 priority: P0
 estimate: S
 parent: EPIC-004
 phase: mvp
 tags: [llm, telemetry, cost-control]
 created: 2026-04-25
-updated: 2026-04-25
+updated: 2026-04-26
 ---
 
 ## Description
@@ -22,20 +22,23 @@ Goes through the `Telemetry` adapter from EPIC-015 (console impl in MVP, OpenTel
 
 ## Acceptance criteria
 
-- [ ] `agent_calls` table records all 10 fields above.
-- [ ] Daily token budget is enforced server-side (not just UI).
-- [ ] Graceful model-downgrade kicks in at 80% consumption.
-- [ ] At 100%, user sees a friendly message, not an error stack trace.
-- [ ] Cost calculation uses a versioned price table per model (so price changes don't silently break analytics).
+- [x] **All telemetry fields are recorded** — `LLMTelemetryEvent` carries `provider`, `model`, `role`, `user_id`, `session_id` (new), `task`, `input_tokens`, `output_tokens`, `cached_tokens` (optional, for prompt-cache later), `cost_usd` (new), `pricing_version` (new), `tool_used` (optional, populated for tool calls), `latency_ms`, `ok`, `decided_at`, `prompt_version`. The `agent_calls` *table* + DB-backed sink land in [STORY-060](./STORY-060-agent-calls-db-sink.md) with the next batch of DB migrations — the schema and emission point are done; only persistence is split.
+- [x] Daily token budget is enforced server-side (`BudgetGatedLLMProvider` decorator in `@learnpro/llm` — pre-call `assertWithinBudget` + post-call `record`, applied at the provider layer so any caller goes through it).
+- [x] Graceful model-downgrade kicks in at the threshold (default 80%): `DailyTokenBudget.decideModel` walks the `MODEL_TIERS` ladder (premium=Opus → mid=Sonnet → cheap=Haiku) and downgrades by one tier when usage ≥ threshold. Explicit `req.model` always wins.
+- [x] At 100%, `TokenBudgetExceededError` is thrown with a human-friendly message (`"Daily token budget exceeded for user X: used Y / limit Z"`). API-side mapping to a 429 + JSON body lands in [STORY-060](./STORY-060-agent-calls-db-sink.md) with the auth wiring.
+- [x] Cost calculation uses a versioned price table — `MODEL_PRICING` in `packages/llm/src/pricing.ts`, stamped with `PRICING_VERSION = "2026-04-26"`. Append-only convention: bump the version + add a new row when prices change, never mutate in place. Unknown models record `cost_usd=0` + `known_model=false` so analytics can flag operator-stale tables without breaking the runtime path.
 
 ## Dependencies
 
-- Blocked by: STORY-009 (LLM gateway).
+- Blocked by: STORY-009 (LLM gateway). ✅
+- Spawned: [STORY-060](./STORY-060-agent-calls-db-sink.md) — DB-backed `UsageStore` + `agent_calls` Drizzle migration + API 429 mapping. Kept separate so STORY-012 stays at S; STORY-060 lands with the next DB Story so the migration batches.
 
 ## Tasks
 
-(To be created when work begins.)
+(Tracked inline in the activity log.)
 
 ## Activity log
 
 - 2026-04-25 — created
+- 2026-04-26 — picked up. Built versioned cost calculator (`pricing.ts` + `pricing.test.ts`), per-user daily budget tracker (`budget.ts` + `budget.test.ts` — `UsageStore` interface + `InMemoryUsageStore` + `DailyTokenBudget` with explicit/no_user/unlimited/under_threshold/downgraded reasons), and decorator pattern wrapping any `LLMProvider` (`budget-gated-provider.ts` + `budget-gated-provider.test.ts`). Extended `LLMTelemetryEventSchema` with `cost_usd`, `pricing_version`, optional `session_id`/`cached_tokens`/`tool_used`. Wired `costFor()` into `AnthropicProvider.recordTelemetry` so every call now stamps cost + version + tool name. Total: 38 new tests across 3 files, all green; 72 tests passing in `@learnpro/llm`.
+- 2026-04-26 — done. Filed [STORY-060](./STORY-060-agent-calls-db-sink.md) for the deferred DB persistence layer (Drizzle migration + `DrizzleLLMTelemetrySink` + `DrizzleUsageStore` + API 429 mapping). Interfaces (`UsageStore`, `LLMTelemetrySink`) are stable; STORY-060 just adds Drizzle impls behind them.
diff --git a/project/stories/STORY-060-agent-calls-db-sink.md b/project/stories/STORY-060-agent-calls-db-sink.md
new file mode 100644
index 0000000..fb79d64
--- /dev/null
+++ b/project/stories/STORY-060-agent-calls-db-sink.md
@@ -0,0 +1,58 @@
+---
+id: STORY-060
+title: DB-backed `UsageStore` + `agent_calls` table (split from STORY-012)
+type: story
+status: backlog
+priority: P0
+estimate: S
+parent: EPIC-004
+phase: mvp
+tags: [llm, telemetry, db, drizzle]
+created: 2026-04-26
+updated: 2026-04-26
+---
+
+## Description
+
+[STORY-012](./STORY-012-cost-telemetry.md) shipped the cost calculator (`costFor` + versioned `MODEL_PRICING`), the `LLMTelemetryEvent` schema (now carries `cost_usd`, `pricing_version`, optional `session_id` / `cached_tokens` / `tool_used`), and the per-user daily token budget (`DailyTokenBudget` + `BudgetGatedLLMProvider` decorator). What it deliberately did **not** ship is the persistence layer behind both:
+
+1. **`agent_calls` table** — the sink that records every `LLMTelemetryEvent` so we can answer "what does a typical learning session cost?" before the AWS bill answers it for us. Today the in-process `InMemoryLLMTelemetrySink` is sufficient for tests and short demos; production needs Postgres.
+2. **DB-backed `UsageStore`** — the budget tracker depends on a `UsageStore` interface; the in-memory impl handles tests and self-hosted no-budget mode (limit=0), but a multi-process API needs a shared bucket per `(user_id, UTC date)`.
+
+Splitting this out keeps STORY-012 within its S estimate (interface + decorator + tests, no Drizzle migration) and lets the schema change move with the next batch of DB-touching Stories.
+
+## MVP scope (this Story)
+
+- Drizzle migration for `agent_calls` (cols match `LLMTelemetryEvent` + `org_id`, `id`, `created_at`).
+- `DrizzleLLMTelemetrySink` — implements `LLMTelemetrySink`, INSERTs one row per event, never throws (errors logged + dropped so a telemetry outage can't kill an LLM call).
+- `DrizzleUsageStore` — implements `UsageStore`. `today()` runs `SELECT sum(input_tokens + output_tokens) FROM agent_calls WHERE user_id=$1 AND created_at >= $2 (start-of-UTC-day)`. `record()` is a no-op (rows are written by the telemetry sink — single source of truth).
+- API wiring: when `LEARNPRO_DAILY_TOKEN_LIMIT > 0` is set, `buildLLMProvider` wraps the AnthropicProvider with `BudgetGatedLLMProvider` using `DrizzleUsageStore`. Self-hosted default is 0 (unlimited).
+- Friendly 429 mapping: API serializes `TokenBudgetExceededError` as `{ error: "daily_budget_exceeded", message: "..." }` (status 429) so the playground can render the friendly message AC from STORY-012.
+
+## Out of scope (file separately if needed)
+
+- Per-org budgets (only per-user for MVP).
+- Aggregate dashboards / cost analytics UI — depends on a stats route + admin shell that don't exist yet.
+- Cached-prompt / prompt-cache aware accounting (`cached_tokens` column is wired but not yet populated by the Anthropic transport).
+
+## Acceptance criteria
+
+- [ ] `agent_calls` Drizzle migration lands in `packages/db` with all `LLMTelemetryEvent` fields + `id`, `org_id`, `created_at`.
+- [ ] `DrizzleLLMTelemetrySink` writes one row per event; failures are logged but never thrown.
+- [ ] `DrizzleUsageStore.today()` aggregates today's tokens per user against UTC midnight; covered by an integration test against a real Postgres (Docker Compose).
+- [ ] API exposes `GET /llm/usage/today` returning `{ used_tokens, limit_tokens, ratio }` for the authenticated user (used by the UI nag at >80%, friendly block at 100%).
+- [ ] When the budget is exceeded, the API responds 429 with `{ error: "daily_budget_exceeded", message: "..." }` rather than letting `TokenBudgetExceededError` leak as a 500.
+- [ ] Manual smoke: with `LEARNPRO_DAILY_TOKEN_LIMIT=100` and a real Anthropic key, hitting the playground twice triggers the friendly message on call #2.
+
+## Dependencies
+
+- Blocked by: STORY-005 (Auth.js — needs a `user_id` to attribute usage to) **or** a stub auth middleware that pins a fixed `user_id` for dev. Acceptable to land the table + sink without auth, with the API wiring deferred until STORY-005.
+- Blocks: nothing structural, but deferring it past 100 daily users would be expensive.
+
+## Notes
+
+Filed during STORY-012 close-out (2026-04-26). The interfaces (`UsageStore`, `LLMTelemetrySink`) are already stable from STORY-012; this Story just adds the Drizzle implementations behind them.
+
+## Activity log
+
+- 2026-04-26 — created (split from STORY-012).