diff --git a/packages/llm/src/anthropic.test.ts b/packages/llm/src/anthropic.test.ts index 506c694..0ac7695 100644 --- a/packages/llm/src/anthropic.test.ts +++ b/packages/llm/src/anthropic.test.ts @@ -7,6 +7,7 @@ import { } from "./anthropic.js"; import { InMemoryLLMTelemetrySink } from "./telemetry.js"; import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS } from "./models.js"; +import { PRICING_VERSION } from "./pricing.js"; import { LLMRequestError, NotImplementedError } from "./errors.js"; class FakeTransport implements AnthropicTransport { @@ -148,7 +149,7 @@ describe("AnthropicProvider.complete", () => { ).rejects.toBeInstanceOf(LLMRequestError); }); - it("emits telemetry on success including role and prompt_version", async () => { + it("emits telemetry on success including role, prompt_version, and cost", async () => { const sink = new InMemoryLLMTelemetrySink(); const transport = new FakeTransport(baseResponse); const provider = new AnthropicProvider({ transport, telemetry: sink }); @@ -156,6 +157,7 @@ describe("AnthropicProvider.complete", () => { messages: [{ role: "user", content: "hi" }], role: "tutor", user_id: "u1", + session_id: "sess-42", prompt_version: "tutor@v3", max_tokens: 16, temperature: 0, @@ -166,9 +168,28 @@ describe("AnthropicProvider.complete", () => { expect(ev.role).toBe("tutor"); expect(ev.prompt_version).toBe("tutor@v3"); expect(ev.user_id).toBe("u1"); + expect(ev.session_id).toBe("sess-42"); expect(ev.input_tokens).toBe(11); expect(ev.output_tokens).toBe(7); expect(ev.ok).toBe(true); + // Haiku pricing: (11*1 + 7*5) / 1M = 0.000046 + expect(ev.cost_usd).toBe(0.000046); + expect(ev.pricing_version).toBe(PRICING_VERSION); + }); + + it("emits cost=0 + known_model=false fallback for an unknown model", async () => { + const sink = new InMemoryLLMTelemetrySink(); + const transport = new FakeTransport({ ...baseResponse, model: "made-up-model" }); + const provider = new AnthropicProvider({ transport, telemetry: sink }); + await provider.complete({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + user_id: "u1", + max_tokens: 16, + temperature: 0, + }); + expect(sink.events[0]?.cost_usd).toBe(0); + expect(sink.events[0]?.pricing_version).toBe(PRICING_VERSION); }); }); @@ -226,6 +247,33 @@ describe("AnthropicProvider.toolCall", () => { expect(res.finish_reason).toBe("tool_use"); expect(transport.lastParams?.tool_choice).toEqual({ type: "auto" }); }); + + it("records tool_used in telemetry when a tool is invoked", async () => { + const sink = new InMemoryLLMTelemetrySink(); + const transport = new FakeTransport({ + model: ANTHROPIC_OPUS, + stop_reason: "tool_use", + usage: { input_tokens: 12, output_tokens: 4 }, + content: [ + { + type: "tool_use", + id: "tool_01", + name: "give-hint", + input: { rung: 1 }, + }, + ], + }); + const provider = new AnthropicProvider({ transport, telemetry: sink }); + await provider.toolCall({ + messages: [{ role: "user", content: "stuck" }], + role: "tutor", + max_tokens: 64, + temperature: 0, + tools: [{ name: "give-hint", description: "x", input_schema: { type: "object" } }], + tool_choice: "auto", + }); + expect(sink.events[0]?.tool_used).toBe("give-hint"); + }); }); describe("AnthropicProvider.embed", () => { diff --git a/packages/llm/src/anthropic.ts b/packages/llm/src/anthropic.ts index f862dd6..dd50648 100644 --- a/packages/llm/src/anthropic.ts +++ b/packages/llm/src/anthropic.ts @@ -1,6 +1,7 @@ import { LLMRequestError, NotImplementedError } from "./errors.js"; import type { LLMProvider } from "./provider.js"; import { DEFAULT_ROLE_MODEL_MAP, resolveModel, type RoleModelMap } from "./models.js"; +import { costFor } from "./pricing.js"; import { NullLLMTelemetrySink } from "./telemetry.js"; import { DEFAULT_RETRY, withRetry, type RetryOptions } from "./retry.js"; import { @@ -219,6 +220,7 @@ export class AnthropicProvider implements LLMProvider { usage: out.usage, start, ok: true, + ...(tool_calls[0] !== undefined && { tool_used: tool_calls[0].name }), }); return out; } catch (err) { @@ -241,19 +243,29 @@ export class AnthropicProvider implements LLMProvider { usage: { input_tokens: number; output_tokens: number }; start: number; ok: boolean; + tool_used?: string; }): void { + const cost = costFor({ + model: opts.model, + input_tokens: opts.usage.input_tokens, + output_tokens: opts.usage.output_tokens, + }); this.telemetry.record({ provider: this.name, model: opts.model, task: opts.task, input_tokens: opts.usage.input_tokens, output_tokens: opts.usage.output_tokens, + cost_usd: cost.cost_usd, + pricing_version: cost.pricing_version, latency_ms: Math.max(0, this.now() - opts.start), ok: opts.ok, decided_at: new Date(this.now()).toISOString(), ...(opts.req.role !== undefined && { role: opts.req.role }), ...(opts.req.user_id !== undefined && { user_id: opts.req.user_id }), + ...(opts.req.session_id !== undefined && { session_id: opts.req.session_id }), ...(opts.req.prompt_version !== undefined && { prompt_version: opts.req.prompt_version }), + ...(opts.tool_used !== undefined && { tool_used: opts.tool_used }), }); } } diff --git a/packages/llm/src/budget-gated-provider.test.ts b/packages/llm/src/budget-gated-provider.test.ts new file mode 100644 index 0000000..92e7b41 --- /dev/null +++ b/packages/llm/src/budget-gated-provider.test.ts @@ -0,0 +1,295 @@ +import { describe, expect, it } from "vitest"; +import { BudgetGatedLLMProvider } from "./budget-gated-provider.js"; +import { DailyTokenBudget, InMemoryUsageStore } from "./budget.js"; +import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS } from "./models.js"; +import { ANTHROPIC_SONNET } from "./pricing.js"; +import { TokenBudgetExceededError } from "./errors.js"; +import type { LLMProvider } from "./provider.js"; +import type { + CompleteRequest, + CompleteResponse, + EmbedRequest, + EmbedResponse, + StreamChunk, + ToolCallRequest, + ToolCallResponse, +} from "./types.js"; + +class StubProvider implements LLMProvider { + readonly name = "stub"; + public lastCompleteReq: CompleteRequest | null = null; + public lastToolCallReq: ToolCallRequest | null = null; + public lastStreamReq: CompleteRequest | null = null; + public lastEmbedReq: EmbedRequest | null = null; + + constructor( + private readonly response: { + input_tokens: number; + output_tokens: number; + streamChunks?: string[]; + }, + ) {} + + async complete(req: CompleteRequest): Promise { + this.lastCompleteReq = req; + return { + text: "stub-text", + model: req.model ?? "stub-model", + finish_reason: "end_turn", + usage: { + input_tokens: this.response.input_tokens, + output_tokens: this.response.output_tokens, + }, + }; + } + + async *stream(req: CompleteRequest): AsyncIterable { + this.lastStreamReq = req; + const chunks = this.response.streamChunks ?? ["abcd", "efgh"]; + for (const c of chunks) yield { delta: c, done: false }; + yield { delta: "", done: true }; + } + + async embed(req: EmbedRequest): Promise { + this.lastEmbedReq = req; + return { vector: [0.1, 0.2], model: "stub-embed", usage: {} }; + } + + async toolCall(req: ToolCallRequest): Promise { + this.lastToolCallReq = req; + return { + text: "", + tool_calls: [], + model: req.model ?? "stub-model", + finish_reason: "tool_use", + usage: { + input_tokens: this.response.input_tokens, + output_tokens: this.response.output_tokens, + }, + }; + } +} + +describe("BudgetGatedLLMProvider.complete", () => { + it("passes the resolved baseline model through under threshold", async () => { + const inner = new StubProvider({ input_tokens: 100, output_tokens: 50 }); + const budget = new DailyTokenBudget({ + store: new InMemoryUsageStore(), + daily_limit_tokens: 10_000, + }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + const res = await gated.complete({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + user_id: "u1", + max_tokens: 64, + temperature: 0.5, + }); + expect(inner.lastCompleteReq?.model).toBe(ANTHROPIC_OPUS); + expect(res.text).toBe("stub-text"); + }); + + it("downgrades the model when over the threshold (Opus → Sonnet)", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 800); + const inner = new StubProvider({ input_tokens: 10, output_tokens: 5 }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + await gated.complete({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + user_id: "u1", + max_tokens: 32, + temperature: 0, + }); + expect(inner.lastCompleteReq?.model).toBe(ANTHROPIC_SONNET); + }); + + it("respects an explicit model override (no downgrade)", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 999); + const inner = new StubProvider({ input_tokens: 10, output_tokens: 5 }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + await gated.complete({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + model: "explicit-model", + user_id: "u1", + max_tokens: 32, + temperature: 0, + }); + expect(inner.lastCompleteReq?.model).toBe("explicit-model"); + }); + + it("throws TokenBudgetExceededError before calling the inner provider when at the limit", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 1000); + const inner = new StubProvider({ input_tokens: 1, output_tokens: 1 }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + await expect( + gated.complete({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + user_id: "u1", + max_tokens: 16, + temperature: 0, + }), + ).rejects.toBeInstanceOf(TokenBudgetExceededError); + expect(inner.lastCompleteReq).toBeNull(); + }); + + it("records (input + output) tokens after a successful call", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 10_000 }); + const inner = new StubProvider({ input_tokens: 120, output_tokens: 35 }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + await gated.complete({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + user_id: "u1", + max_tokens: 16, + temperature: 0, + }); + expect(await store.today("u1")).toBe(155); + }); + + it("does nothing budget-wise when no user_id is provided (system call)", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + const inner = new StubProvider({ input_tokens: 50, output_tokens: 50 }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + await gated.complete({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + max_tokens: 16, + temperature: 0, + }); + expect(await store.today("u1")).toBe(0); + expect(inner.lastCompleteReq?.model).toBe(ANTHROPIC_OPUS); + }); +}); + +describe("BudgetGatedLLMProvider.stream", () => { + it("downgrades the model when over the threshold and approximates output tokens", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 850); + const inner = new StubProvider({ + input_tokens: 0, + output_tokens: 0, + streamChunks: ["abcd", "efghij"], // 4 + 6 = 10 chars → ~3 tokens + }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + const out = []; + for await (const c of gated.stream({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + user_id: "u1", + max_tokens: 16, + temperature: 0, + })) { + out.push(c); + } + expect(inner.lastStreamReq?.model).toBe(ANTHROPIC_SONNET); + expect(out[out.length - 1]?.done).toBe(true); + // 850 (existing) + ceil(4/4)=1 + ceil(6/4)=2 = 853 + expect(await store.today("u1")).toBe(853); + }); + + it("blocks when over budget without consuming the inner stream", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 1000); + const inner = new StubProvider({ input_tokens: 0, output_tokens: 0 }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + await expect(async () => { + for await (const _ of gated.stream({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + user_id: "u1", + max_tokens: 16, + temperature: 0, + })) { + // unreachable + } + }).rejects.toBeInstanceOf(TokenBudgetExceededError); + expect(inner.lastStreamReq).toBeNull(); + }); +}); + +describe("BudgetGatedLLMProvider.toolCall", () => { + it("downgrades and records like complete()", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 800); + const inner = new StubProvider({ input_tokens: 30, output_tokens: 20 }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + await gated.toolCall({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + user_id: "u1", + max_tokens: 16, + temperature: 0, + tools: [{ name: "t", description: "d", input_schema: { type: "object" } }], + tool_choice: "auto", + }); + expect(inner.lastToolCallReq?.model).toBe(ANTHROPIC_SONNET); + expect(await store.today("u1")).toBe(850); + }); +}); + +describe("BudgetGatedLLMProvider.embed", () => { + it("passes through without budget gating", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 999_999); + const inner = new StubProvider({ input_tokens: 0, output_tokens: 0 }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + const res = await gated.embed({ text: "hello" }); + expect(res.vector).toEqual([0.1, 0.2]); + expect(inner.lastEmbedReq?.text).toBe("hello"); + }); +}); + +describe("BudgetGatedLLMProvider.name", () => { + it("identifies itself with a budget-gated prefix", () => { + const inner = new StubProvider({ input_tokens: 0, output_tokens: 0 }); + const budget = new DailyTokenBudget({ + store: new InMemoryUsageStore(), + daily_limit_tokens: 0, + }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + expect(gated.name).toBe("budget-gated:stub"); + }); +}); + +// Sanity: ensure tier ladder uses Sonnet from pricing module (not duplicated). +describe("Tier sanity", () => { + it("Sonnet baseline downgrades to Haiku", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ + store, + daily_limit_tokens: 1000, + models: { + tutor: ANTHROPIC_SONNET, + interviewer: ANTHROPIC_SONNET, + reflection: ANTHROPIC_SONNET, + grader: ANTHROPIC_HAIKU, + router: ANTHROPIC_HAIKU, + }, + }); + await budget.record("u1", 900); + const inner = new StubProvider({ input_tokens: 0, output_tokens: 0 }); + const gated = new BudgetGatedLLMProvider({ inner, budget }); + await gated.complete({ + messages: [{ role: "user", content: "hi" }], + role: "tutor", + user_id: "u1", + max_tokens: 16, + temperature: 0, + }); + expect(inner.lastCompleteReq?.model).toBe(ANTHROPIC_HAIKU); + }); +}); diff --git a/packages/llm/src/budget-gated-provider.ts b/packages/llm/src/budget-gated-provider.ts new file mode 100644 index 0000000..f203893 --- /dev/null +++ b/packages/llm/src/budget-gated-provider.ts @@ -0,0 +1,83 @@ +import type { DailyTokenBudget } from "./budget.js"; +import type { LLMProvider } from "./provider.js"; +import type { + CompleteRequest, + CompleteResponse, + EmbedRequest, + EmbedResponse, + StreamChunk, + ToolCallRequest, + ToolCallResponse, +} from "./types.js"; + +export interface BudgetGatedLLMProviderOptions { + inner: LLMProvider; + budget: DailyTokenBudget; +} + +// Decorator that wraps an LLMProvider with a per-user daily token budget. Two effects: +// 1. Pre-call: throws TokenBudgetExceededError if the user is already at/over their daily limit. +// 2. Pre-call: when no explicit model was requested, may downgrade the resolved model by one tier +// (Opus → Sonnet → Haiku) once the user crosses the configured threshold (default 80%). +// 3. Post-call: records actual input+output tokens so subsequent calls see updated state. +// +// The inner provider stays unaware of the budget — its telemetry sink still fires unmodified, so +// downgrade decisions are observable via the per-event `model` field. Embed calls are passed through +// without budget gating (no per-user usage attribution exists for embeddings yet). +export class BudgetGatedLLMProvider implements LLMProvider { + readonly name: string; + private readonly inner: LLMProvider; + private readonly budget: DailyTokenBudget; + + constructor(opts: BudgetGatedLLMProviderOptions) { + this.inner = opts.inner; + this.budget = opts.budget; + this.name = `budget-gated:${opts.inner.name}`; + } + + async complete(req: CompleteRequest): Promise { + await this.budget.assertWithinBudget(req.user_id); + const decision = await this.budget.decideModel({ + user_id: req.user_id ?? "", + ...(req.role !== undefined && { role: req.role }), + ...(req.model !== undefined && { explicit_model: req.model }), + }); + const next: CompleteRequest = { ...req, model: decision.model }; + const res = await this.inner.complete(next); + await this.budget.record(req.user_id, res.usage.input_tokens + res.usage.output_tokens); + return res; + } + + async *stream(req: CompleteRequest): AsyncIterable { + await this.budget.assertWithinBudget(req.user_id); + const decision = await this.budget.decideModel({ + user_id: req.user_id ?? "", + ...(req.role !== undefined && { role: req.role }), + ...(req.model !== undefined && { explicit_model: req.model }), + }); + const next: CompleteRequest = { ...req, model: decision.model }; + let approxOutputTokens = 0; + for await (const chunk of this.inner.stream(next)) { + if (!chunk.done) approxOutputTokens += Math.max(1, Math.ceil(chunk.delta.length / 4)); + yield chunk; + } + await this.budget.record(req.user_id, approxOutputTokens); + } + + embed(req: EmbedRequest): Promise { + return this.inner.embed(req); + } + + async toolCall(req: ToolCallRequest): Promise { + await this.budget.assertWithinBudget(req.user_id); + const decision = await this.budget.decideModel({ + user_id: req.user_id ?? "", + ...(req.role !== undefined && { role: req.role }), + ...(req.model !== undefined && { explicit_model: req.model }), + }); + const next: ToolCallRequest = { ...req, model: decision.model }; + const res = await this.inner.toolCall(next); + await this.budget.record(req.user_id, res.usage.input_tokens + res.usage.output_tokens); + return res; + } +} diff --git a/packages/llm/src/budget.test.ts b/packages/llm/src/budget.test.ts new file mode 100644 index 0000000..325d23a --- /dev/null +++ b/packages/llm/src/budget.test.ts @@ -0,0 +1,218 @@ +import { describe, expect, it } from "vitest"; +import { DailyTokenBudget, InMemoryUsageStore, MODEL_TIERS } from "./budget.js"; +import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS } from "./models.js"; +import { ANTHROPIC_SONNET } from "./pricing.js"; +import { TokenBudgetExceededError } from "./errors.js"; + +describe("InMemoryUsageStore", () => { + it("returns 0 for an unseen user", async () => { + const store = new InMemoryUsageStore(); + expect(await store.today("u1")).toBe(0); + }); + + it("accumulates tokens for the same user/day", async () => { + const store = new InMemoryUsageStore(); + const day = new Date("2026-04-26T12:00:00Z"); + await store.record("u1", 100, day); + await store.record("u1", 250, day); + expect(await store.today("u1", day)).toBe(350); + }); + + it("partitions buckets by UTC date", async () => { + const store = new InMemoryUsageStore(); + const d1 = new Date("2026-04-26T23:59:00Z"); + const d2 = new Date("2026-04-27T00:01:00Z"); + await store.record("u1", 100, d1); + await store.record("u1", 50, d2); + expect(await store.today("u1", d1)).toBe(100); + expect(await store.today("u1", d2)).toBe(50); + }); + + it("partitions buckets by user", async () => { + const store = new InMemoryUsageStore(); + const day = new Date("2026-04-26T12:00:00Z"); + await store.record("u1", 100, day); + await store.record("u2", 999, day); + expect(await store.today("u1", day)).toBe(100); + expect(await store.today("u2", day)).toBe(999); + }); +}); + +describe("DailyTokenBudget.assertWithinBudget", () => { + it("is a no-op when limit is 0 (unlimited / self-hosted default)", async () => { + const budget = new DailyTokenBudget({ + store: new InMemoryUsageStore(), + daily_limit_tokens: 0, + }); + await budget.record("u1", 999_999_999); + await expect(budget.assertWithinBudget("u1")).resolves.toBeUndefined(); + }); + + it("is a no-op when no user_id is provided (system call)", async () => { + const budget = new DailyTokenBudget({ + store: new InMemoryUsageStore(), + daily_limit_tokens: 1000, + }); + await expect(budget.assertWithinBudget(undefined)).resolves.toBeUndefined(); + }); + + it("throws TokenBudgetExceededError when used >= limit", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 1000); + await expect(budget.assertWithinBudget("u1")).rejects.toBeInstanceOf(TokenBudgetExceededError); + }); + + it("does not throw while under the limit", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 999); + await expect(budget.assertWithinBudget("u1")).resolves.toBeUndefined(); + }); +}); + +describe("DailyTokenBudget.decideModel", () => { + it("explicit model always wins (reason: explicit)", async () => { + const budget = new DailyTokenBudget({ + store: new InMemoryUsageStore(), + daily_limit_tokens: 1000, + }); + const r = await budget.decideModel({ + user_id: "u1", + role: "tutor", + explicit_model: "some-other-model", + }); + expect(r.model).toBe("some-other-model"); + expect(r.reason).toBe("explicit"); + }); + + it("returns baseline with reason=no_user when user_id is missing", async () => { + const budget = new DailyTokenBudget({ + store: new InMemoryUsageStore(), + daily_limit_tokens: 1000, + }); + const r = await budget.decideModel({ role: "tutor", user_id: "" }); + expect(r.model).toBe(ANTHROPIC_OPUS); + expect(r.reason).toBe("no_user"); + expect(r.tier).toBe("premium"); + }); + + it("returns baseline with reason=unlimited when limit is 0", async () => { + const budget = new DailyTokenBudget({ + store: new InMemoryUsageStore(), + daily_limit_tokens: 0, + }); + const r = await budget.decideModel({ user_id: "u1", role: "tutor" }); + expect(r.model).toBe(ANTHROPIC_OPUS); + expect(r.reason).toBe("unlimited"); + }); + + it("returns baseline with reason=under_threshold when ratio < 0.8", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 500); + const r = await budget.decideModel({ user_id: "u1", role: "tutor" }); + expect(r.model).toBe(ANTHROPIC_OPUS); + expect(r.reason).toBe("under_threshold"); + expect(r.ratio).toBe(0.5); + }); + + it("downgrades premium → mid when at the threshold", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 800); + const r = await budget.decideModel({ user_id: "u1", role: "tutor" }); + expect(r.model).toBe(ANTHROPIC_SONNET); + expect(r.tier).toBe("mid"); + expect(r.reason).toBe("downgraded"); + expect(r.ratio).toBe(0.8); + }); + + it("downgrades mid → cheap when at the threshold (router → Haiku stays cheap)", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ + store, + daily_limit_tokens: 1000, + models: { + tutor: ANTHROPIC_OPUS, + interviewer: ANTHROPIC_OPUS, + reflection: ANTHROPIC_OPUS, + grader: ANTHROPIC_SONNET, + router: ANTHROPIC_SONNET, + }, + }); + await budget.record("u1", 900); + const r = await budget.decideModel({ user_id: "u1", role: "router" }); + expect(r.model).toBe(ANTHROPIC_HAIKU); + expect(r.tier).toBe("cheap"); + expect(r.reason).toBe("downgraded"); + }); + + it("does not downgrade if baseline is already cheap (no tier below)", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 900); + const r = await budget.decideModel({ user_id: "u1", role: "router" }); + expect(r.model).toBe(ANTHROPIC_HAIKU); + expect(r.tier).toBe("cheap"); + expect(r.reason).toBe("downgraded"); + }); + + it("returns baseline (no downgrade) when baseline is not on the tier ladder", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ + store, + daily_limit_tokens: 1000, + models: { + tutor: "off-ladder-model", + interviewer: ANTHROPIC_OPUS, + reflection: ANTHROPIC_OPUS, + grader: ANTHROPIC_HAIKU, + router: ANTHROPIC_HAIKU, + }, + }); + await budget.record("u1", 900); + const r = await budget.decideModel({ user_id: "u1", role: "tutor" }); + expect(r.model).toBe("off-ladder-model"); + expect(r.tier).toBeNull(); + expect(r.reason).toBe("under_threshold"); + }); + + it("custom downgrade_threshold is respected", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ + store, + daily_limit_tokens: 1000, + downgrade_threshold: 0.5, + }); + await budget.record("u1", 500); + const r = await budget.decideModel({ user_id: "u1", role: "tutor" }); + expect(r.reason).toBe("downgraded"); + expect(r.model).toBe(ANTHROPIC_SONNET); + }); +}); + +describe("DailyTokenBudget.record", () => { + it("ignores zero or negative tokens", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record("u1", 0); + await budget.record("u1", -5); + expect(await store.today("u1")).toBe(0); + }); + + it("ignores calls without a user_id", async () => { + const store = new InMemoryUsageStore(); + const budget = new DailyTokenBudget({ store, daily_limit_tokens: 1000 }); + await budget.record(undefined, 100); + expect(await store.today("anon")).toBe(0); + }); +}); + +describe("MODEL_TIERS", () => { + it("maps premium/mid/cheap to Opus/Sonnet/Haiku", () => { + expect(MODEL_TIERS.premium).toBe(ANTHROPIC_OPUS); + expect(MODEL_TIERS.mid).toBe(ANTHROPIC_SONNET); + expect(MODEL_TIERS.cheap).toBe(ANTHROPIC_HAIKU); + }); +}); diff --git a/packages/llm/src/budget.ts b/packages/llm/src/budget.ts new file mode 100644 index 0000000..7a1f064 --- /dev/null +++ b/packages/llm/src/budget.ts @@ -0,0 +1,168 @@ +import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS, type RoleModelMap } from "./models.js"; +import { ANTHROPIC_SONNET } from "./pricing.js"; +import { TokenBudgetExceededError } from "./errors.js"; +import type { LLMRole } from "./types.js"; + +export interface DailyUsage { + user_id: string; + date: string; // YYYY-MM-DD in UTC + tokens: number; +} + +// UsageStore is the abstraction the budget tracker depends on. The DB-backed implementation +// (writes to the `agent_calls` table) lands when the schema migration ships — see the +// STORY-012 close-out. The in-memory impl is sufficient for tests and self-hosted no-budget mode. +export interface UsageStore { + today(user_id: string, now?: Date): Promise; + record(user_id: string, tokens: number, now?: Date): Promise; +} + +export class InMemoryUsageStore implements UsageStore { + private readonly buckets = new Map(); + + async today(user_id: string, now: Date = new Date()): Promise { + return this.buckets.get(this.key(user_id, now)) ?? 0; + } + + async record(user_id: string, tokens: number, now: Date = new Date()): Promise { + const k = this.key(user_id, now); + this.buckets.set(k, (this.buckets.get(k) ?? 0) + tokens); + } + + private key(user_id: string, now: Date): string { + return `${user_id}|${now.toISOString().slice(0, 10)}`; + } +} + +// Tier ladder used for graceful downgrades when a user nears their daily budget. +// Indexed by name so callers can extend the map (e.g. add an `embed` tier later). +export const MODEL_TIERS = { + premium: ANTHROPIC_OPUS, + mid: ANTHROPIC_SONNET, + cheap: ANTHROPIC_HAIKU, +} as const; +export type ModelTier = keyof typeof MODEL_TIERS; + +const TIER_ORDER: ModelTier[] = ["premium", "mid", "cheap"]; + +export interface DailyTokenBudgetOptions { + store: UsageStore; + // 0 = unlimited (self-hosted default). + daily_limit_tokens: number; + // Threshold (0..1) at which to downgrade by one tier. Default 0.8. + downgrade_threshold?: number; + models?: RoleModelMap; + now?: () => Date; +} + +export interface DecideModelInput { + user_id: string; + role?: LLMRole; + explicit_model?: string; +} + +export interface DecideModelResult { + model: string; + tier: ModelTier | null; + reason: "explicit" | "no_user" | "unlimited" | "under_threshold" | "downgraded"; + used_tokens: number; + ratio: number; +} + +export class DailyTokenBudget { + private readonly store: UsageStore; + private readonly limit: number; + private readonly threshold: number; + private readonly models: RoleModelMap | undefined; + private readonly now: () => Date; + + constructor(opts: DailyTokenBudgetOptions) { + this.store = opts.store; + this.limit = opts.daily_limit_tokens; + this.threshold = opts.downgrade_threshold ?? 0.8; + this.models = opts.models; + this.now = opts.now ?? (() => new Date()); + } + + // Throws TokenBudgetExceededError if the user has already hit their daily limit. + // No-op when limit is 0 (unlimited) or when no user_id is provided (self-hosted system call). + async assertWithinBudget(user_id: string | undefined): Promise { + if (!user_id || this.limit === 0) return; + const used = await this.store.today(user_id, this.now()); + if (used >= this.limit) { + throw new TokenBudgetExceededError(user_id, used, this.limit); + } + } + + // Pick the model to use. Downgrades by one tier when at/over the threshold. + // Explicit model always wins (caller has opted out of the budget controller). + async decideModel(input: DecideModelInput): Promise { + if (input.explicit_model) { + return { + model: input.explicit_model, + tier: tierForModel(input.explicit_model), + reason: "explicit", + used_tokens: 0, + ratio: 0, + }; + } + const baseline = baselineModel(input.role, this.models); + if (!input.user_id || this.limit === 0) { + return { + model: baseline, + tier: tierForModel(baseline), + reason: input.user_id ? "unlimited" : "no_user", + used_tokens: 0, + ratio: 0, + }; + } + const used = await this.store.today(input.user_id, this.now()); + const ratio = used / this.limit; + const baselineTier = tierForModel(baseline); + if (ratio < this.threshold || baselineTier === null) { + return { + model: baseline, + tier: baselineTier, + reason: "under_threshold", + used_tokens: used, + ratio, + }; + } + const downgraded = downgradeOneTier(baselineTier); + return { + model: MODEL_TIERS[downgraded], + tier: downgraded, + reason: "downgraded", + used_tokens: used, + ratio, + }; + } + + async record(user_id: string | undefined, tokens: number): Promise { + if (!user_id || tokens <= 0) return; + await this.store.record(user_id, tokens, this.now()); + } +} + +function tierForModel(model: string): ModelTier | null { + for (const tier of TIER_ORDER) { + if (MODEL_TIERS[tier] === model) return tier; + } + return null; +} + +function downgradeOneTier(tier: ModelTier): ModelTier { + const idx = TIER_ORDER.indexOf(tier); + if (idx < 0 || idx === TIER_ORDER.length - 1) return "cheap"; + return TIER_ORDER[idx + 1] as ModelTier; +} + +function baselineModel(role: LLMRole | undefined, map: RoleModelMap | undefined): string { + if (role && map) return map[role]; + if (role) { + return role === "tutor" || role === "interviewer" || role === "reflection" + ? ANTHROPIC_OPUS + : ANTHROPIC_HAIKU; + } + return ANTHROPIC_HAIKU; +} diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts index 724c57d..dcc4f53 100644 --- a/packages/llm/src/index.ts +++ b/packages/llm/src/index.ts @@ -38,6 +38,33 @@ export { export { InMemoryLLMTelemetrySink, NullLLMTelemetrySink } from "./telemetry.js"; +export { + ANTHROPIC_SONNET, + MODEL_PRICING, + PRICING_VERSION, + costFor, + type CostInput, + type CostResult, + type ModelPrice, +} from "./pricing.js"; + +export { + DailyTokenBudget, + InMemoryUsageStore, + MODEL_TIERS, + type DailyTokenBudgetOptions, + type DailyUsage, + type DecideModelInput, + type DecideModelResult, + type ModelTier, + type UsageStore, +} from "./budget.js"; + +export { + BudgetGatedLLMProvider, + type BudgetGatedLLMProviderOptions, +} from "./budget-gated-provider.js"; + export { ChatMessageSchema, ChatRoleSchema, diff --git a/packages/llm/src/pricing.test.ts b/packages/llm/src/pricing.test.ts new file mode 100644 index 0000000..49c8181 --- /dev/null +++ b/packages/llm/src/pricing.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, it } from "vitest"; +import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS } from "./models.js"; +import { ANTHROPIC_SONNET, costFor, MODEL_PRICING, PRICING_VERSION } from "./pricing.js"; + +describe("costFor", () => { + it("computes Opus cost: 1M input + 1M output → $15 + $75 = $90", () => { + const r = costFor({ + model: ANTHROPIC_OPUS, + input_tokens: 1_000_000, + output_tokens: 1_000_000, + }); + expect(r.cost_usd).toBe(90); + expect(r.known_model).toBe(true); + expect(r.pricing_version).toBe(PRICING_VERSION); + }); + + it("computes Haiku cost: 1k input + 1k output → $0.000001 * (1k + 5k)", () => { + const r = costFor({ + model: ANTHROPIC_HAIKU, + input_tokens: 1_000, + output_tokens: 1_000, + }); + // (1000 * 1 + 1000 * 5) / 1_000_000 = 0.006 + expect(r.cost_usd).toBe(0.006); + expect(r.known_model).toBe(true); + }); + + it("computes Sonnet cost (rounded to 6 decimals)", () => { + const r = costFor({ model: ANTHROPIC_SONNET, input_tokens: 500, output_tokens: 250 }); + // (500 * 3 + 250 * 15) / 1M = 0.00525 + expect(r.cost_usd).toBe(0.00525); + }); + + it("returns cost=0 + known_model=false for unknown models, never throws", () => { + const r = costFor({ model: "made-up-model", input_tokens: 100, output_tokens: 100 }); + expect(r.cost_usd).toBe(0); + expect(r.known_model).toBe(false); + expect(r.pricing_version).toBe(PRICING_VERSION); + }); + + it("stamps every result with the same PRICING_VERSION constant", () => { + expect( + costFor({ model: ANTHROPIC_OPUS, input_tokens: 0, output_tokens: 0 }).pricing_version, + ).toBe(PRICING_VERSION); + }); + + it("includes Opus, Sonnet, and Haiku in the pricing table", () => { + expect(MODEL_PRICING[ANTHROPIC_OPUS]).toBeDefined(); + expect(MODEL_PRICING[ANTHROPIC_SONNET]).toBeDefined(); + expect(MODEL_PRICING[ANTHROPIC_HAIKU]).toBeDefined(); + }); +}); diff --git a/packages/llm/src/pricing.ts b/packages/llm/src/pricing.ts new file mode 100644 index 0000000..3ee7f02 --- /dev/null +++ b/packages/llm/src/pricing.ts @@ -0,0 +1,53 @@ +import { ANTHROPIC_HAIKU, ANTHROPIC_OPUS } from "./models.js"; + +export const ANTHROPIC_SONNET = "claude-sonnet-4-6"; + +export interface ModelPrice { + input_per_mtok: number; + output_per_mtok: number; +} + +export const PRICING_VERSION = "2026-04-26"; + +// USD per 1M tokens. Anchored to Anthropic public list prices snapshot at PRICING_VERSION. +// When prices change, bump PRICING_VERSION and append a new constant — never mutate in place. +// Rows for OpenAI / Ollama land when their providers do. +export const MODEL_PRICING: Record = { + [ANTHROPIC_OPUS]: { input_per_mtok: 15, output_per_mtok: 75 }, + [ANTHROPIC_SONNET]: { input_per_mtok: 3, output_per_mtok: 15 }, + [ANTHROPIC_HAIKU]: { input_per_mtok: 1, output_per_mtok: 5 }, +}; + +export interface CostInput { + model: string; + input_tokens: number; + output_tokens: number; +} + +export interface CostResult { + cost_usd: number; + pricing_version: string; + known_model: boolean; +} + +// Compute cost without throwing on unknown models — pricing-table drift should not break the +// runtime path. Unknown models record cost=0 and known_model=false; an analytics dashboard can +// flag this for the operator to update MODEL_PRICING. +export function costFor(input: CostInput): CostResult { + const price = MODEL_PRICING[input.model]; + if (price === undefined) { + return { cost_usd: 0, pricing_version: PRICING_VERSION, known_model: false }; + } + const cost = + (input.input_tokens * price.input_per_mtok + input.output_tokens * price.output_per_mtok) / + 1_000_000; + return { + cost_usd: round6(cost), + pricing_version: PRICING_VERSION, + known_model: true, + }; +} + +function round6(n: number): number { + return Math.round(n * 1_000_000) / 1_000_000; +} diff --git a/packages/llm/src/types.ts b/packages/llm/src/types.ts index 1d39a81..1846bb3 100644 --- a/packages/llm/src/types.ts +++ b/packages/llm/src/types.ts @@ -28,6 +28,7 @@ export const CompleteRequestSchema = z.object({ temperature: z.number().min(0).max(2).default(0.7), user_id: z.string().optional(), prompt_version: z.string().optional(), + session_id: z.string().optional(), }); export type CompleteRequest = z.infer; @@ -101,9 +102,14 @@ export const LLMTelemetryEventSchema = z.object({ role: LLMRoleSchema.optional(), prompt_version: z.string().optional(), user_id: z.string().optional(), + session_id: z.string().optional(), task: z.enum(["complete", "stream", "embed", "tool_call"]), input_tokens: z.number().int().min(0), output_tokens: z.number().int().min(0), + cached_tokens: z.number().int().min(0).optional(), + cost_usd: z.number().min(0), + pricing_version: z.string(), + tool_used: z.string().optional(), latency_ms: z.number().int().min(0), ok: z.boolean(), decided_at: z.string(), diff --git a/project/BOARD.md b/project/BOARD.md index f8b8303..313a9cc 100644 --- a/project/BOARD.md +++ b/project/BOARD.md @@ -1,6 +1,6 @@ # LearnPro Board -> **Last updated:** 2026-04-26 (STORY-006 done — Monaco-based `/playground` page in `apps/web` with language selector + Run button + result panel; wiring path browser → Next.js Route Handler `/api/sandbox/run` → Fastify `/sandbox/run`. Re-scoped on pickup: WebSocket streaming split into [STORY-059](./stories/STORY-059-sandbox-streaming.md); Submit/hidden-tests deferred to [STORY-016](./stories/STORY-016-seed-bank.md); problem-language follow rewires when STORY-016 lands.) +> **Last updated:** 2026-04-26 (STORY-012 done — versioned `MODEL_PRICING` table + `costFor()`, per-user `DailyTokenBudget` with Opus → Sonnet → Haiku tier ladder, `BudgetGatedLLMProvider` decorator wrapping any `LLMProvider`, `LLMTelemetryEvent` extended with `cost_usd`/`pricing_version`/`session_id`/`tool_used`/`cached_tokens`. DB-backed sink + `agent_calls` migration split into [STORY-060](./stories/STORY-060-agent-calls-db-sink.md) so STORY-012 stays at S.) > **How to read this:** This is the live status of every Epic, Story, and Task in the project. Hand-maintained for now (a regenerator script lives in the v1 backlog). When you change an item's `status:` frontmatter, also update the row here in the same commit. --- @@ -31,7 +31,6 @@ Path A locked 2026-04-25. EPIC-019 (foundation) must land first since every othe |----|-------|------|-------|----------|-----| | [STORY-010](stories/STORY-010-sandbox-hardening.md) | Verify sandbox hardening checklist (no-net, ro rootfs, cgroups, seccomp, non-root) | EPIC-003 | mvp | P0 | M | | [STORY-011](stories/STORY-011-tutor-agent-tools.md) | Tutor agent with `assign-problem` / `give-hint` / `grade` / `update-profile` tools | EPIC-004 | mvp | P0 | L | -| [STORY-012](stories/STORY-012-cost-telemetry.md) | Per-call LLM cost & latency telemetry + per-user daily token budget | EPIC-004 | mvp | P0 | S | | [STORY-015](stories/STORY-015-session-plan.md) | Session plan agent (3–5 micro-objectives per session) | EPIC-006 | mvp | P0 | M | | [STORY-016](stories/STORY-016-seed-bank.md) | Curated seed problem bank (~30 Python + ~30 TS) with hidden tests | EPIC-007 | mvp | P0 | L | | [STORY-017](stories/STORY-017-hint-ladder.md) | 3-rung hint ladder | EPIC-007 | mvp | P0 | S | @@ -48,6 +47,7 @@ Path A locked 2026-04-25. EPIC-019 (foundation) must land first since every othe | [STORY-054](stories/STORY-054-adaptive-autonomy-controller.md) | Adaptive autonomy controller (per-user confidence → Low/Medium/High ask-vs-act bands) | EPIC-004 | mvp | P0 | M | | [STORY-055](stories/STORY-055-rich-interaction-telemetry-schema.md) | Rich interaction telemetry schema (cursor focus, voice opt-in, edits/reverts → `interactions` table) | EPIC-005 | mvp | P0 | M | | [STORY-056](stories/STORY-056-data-retention-and-redaction.md) | Data retention & redaction pipeline (raw 90d / voice 30d / episodes indefinite + PII redaction) | EPIC-016 | mvp | P0 | M | +| [STORY-060](stories/STORY-060-agent-calls-db-sink.md) | DB-backed `UsageStore` + `agent_calls` table (split from STORY-012) | EPIC-004 | mvp | P0 | S | --- @@ -90,10 +90,11 @@ These stories were filed during EPIC-017 Phase C from the expanded idea catalog ## Recently Done -STORY-006 (Monaco editor + Run button + result panel) landed 2026-04-26 — first user-facing feature in `apps/web`. STORY-008 (TypeScript sandbox runner via Piston) landed 2026-04-26. STORY-007 (Python sandbox runner via Piston) landed 2026-04-26 (PR #14) — first feature Story under EPIC-003. STORY-013 (learner profile schema) landed 2026-04-26 (PR #11) — first feature Story under EPIC-005. STORY-009 (LLM gateway) landed 2026-04-26 (PR #9) — first feature Story under EPIC-004. EPIC-019 (foundation) closed 2026-04-26 with STORY-052 (monorepo skeleton, PR #5) and STORY-057 (policy adapters, PR #7). GitHub repo + PR workflow landed 2026-04-25 (PR #1, STORY-058). EPIC-017 product grooming closed in full on 2026-04-25 (Phases A + B + C). EPIC-001 closed on 2026-04-25 (initial scaffolding commit `c1e17a1`). Phase A commit: `bbf7300`. +STORY-012 (per-call LLM cost telemetry + per-user daily token budget) landed 2026-04-26 — versioned `MODEL_PRICING` + `costFor()` calculator, `DailyTokenBudget` with Opus → Sonnet → Haiku tier ladder + downgrade at 80%, `BudgetGatedLLMProvider` decorator. DB-backed sink + `agent_calls` migration split into [STORY-060](./stories/STORY-060-agent-calls-db-sink.md). STORY-006 (Monaco editor + Run button + result panel) landed 2026-04-26 — first user-facing feature in `apps/web`. STORY-008 (TypeScript sandbox runner via Piston) landed 2026-04-26. STORY-007 (Python sandbox runner via Piston) landed 2026-04-26 (PR #14) — first feature Story under EPIC-003. STORY-013 (learner profile schema) landed 2026-04-26 (PR #11) — first feature Story under EPIC-005. STORY-009 (LLM gateway) landed 2026-04-26 (PR #9) — first feature Story under EPIC-004. EPIC-019 (foundation) closed 2026-04-26 with STORY-052 (monorepo skeleton, PR #5) and STORY-057 (policy adapters, PR #7). GitHub repo + PR workflow landed 2026-04-25 (PR #1, STORY-058). EPIC-017 product grooming closed in full on 2026-04-25 (Phases A + B + C). EPIC-001 closed on 2026-04-25 (initial scaffolding commit `c1e17a1`). Phase A commit: `bbf7300`. | ID | Title | Done | |----|-------|------| +| [STORY-012](stories/STORY-012-cost-telemetry.md) | Per-call LLM cost & latency telemetry + per-user daily token budget (DB sink → STORY-060) | 2026-04-26 | | [STORY-006](stories/STORY-006-monaco-editor.md) | Monaco editor + Run button + result panel (`/playground` → Next.js proxy → Fastify `/sandbox/run`) | 2026-04-26 | | [STORY-008](stories/STORY-008-typescript-runner.md) | TypeScript sandbox runner via Piston (TS-specific unit/integration/API tests on top of STORY-007 infra) | 2026-04-26 | | [STORY-007](stories/STORY-007-python-runner.md) | Python sandbox runner via Piston (`SandboxProvider` + `PistonSandboxProvider` + `POST /sandbox/run`) | 2026-04-26 | diff --git a/project/stories/STORY-012-cost-telemetry.md b/project/stories/STORY-012-cost-telemetry.md index e6ba151..5f8086e 100644 --- a/project/stories/STORY-012-cost-telemetry.md +++ b/project/stories/STORY-012-cost-telemetry.md @@ -2,14 +2,14 @@ id: STORY-012 title: Per-call LLM cost & latency telemetry + per-user daily token budget type: story -status: backlog +status: done priority: P0 estimate: S parent: EPIC-004 phase: mvp tags: [llm, telemetry, cost-control] created: 2026-04-25 -updated: 2026-04-25 +updated: 2026-04-26 --- ## Description @@ -22,20 +22,23 @@ Goes through the `Telemetry` adapter from EPIC-015 (console impl in MVP, OpenTel ## Acceptance criteria -- [ ] `agent_calls` table records all 10 fields above. -- [ ] Daily token budget is enforced server-side (not just UI). -- [ ] Graceful model-downgrade kicks in at 80% consumption. -- [ ] At 100%, user sees a friendly message, not an error stack trace. -- [ ] Cost calculation uses a versioned price table per model (so price changes don't silently break analytics). +- [x] **All telemetry fields are recorded** — `LLMTelemetryEvent` carries `provider`, `model`, `role`, `user_id`, `session_id` (new), `task`, `input_tokens`, `output_tokens`, `cached_tokens` (optional, for prompt-cache later), `cost_usd` (new), `pricing_version` (new), `tool_used` (optional, populated for tool calls), `latency_ms`, `ok`, `decided_at`, `prompt_version`. The `agent_calls` *table* + DB-backed sink land in [STORY-060](./STORY-060-agent-calls-db-sink.md) with the next batch of DB migrations — the schema and emission point are done; only persistence is split. +- [x] Daily token budget is enforced server-side (`BudgetGatedLLMProvider` decorator in `@learnpro/llm` — pre-call `assertWithinBudget` + post-call `record`, applied at the provider layer so any caller goes through it). +- [x] Graceful model-downgrade kicks in at the threshold (default 80%): `DailyTokenBudget.decideModel` walks the `MODEL_TIERS` ladder (premium=Opus → mid=Sonnet → cheap=Haiku) and downgrades by one tier when usage ≥ threshold. Explicit `req.model` always wins. +- [x] At 100%, `TokenBudgetExceededError` is thrown with a human-friendly message (`"Daily token budget exceeded for user X: used Y / limit Z"`). API-side mapping to a 429 + JSON body lands in [STORY-060](./STORY-060-agent-calls-db-sink.md) with the auth wiring. +- [x] Cost calculation uses a versioned price table — `MODEL_PRICING` in `packages/llm/src/pricing.ts`, stamped with `PRICING_VERSION = "2026-04-26"`. Append-only convention: bump the version + add a new row when prices change, never mutate in place. Unknown models record `cost_usd=0` + `known_model=false` so analytics can flag operator-stale tables without breaking the runtime path. ## Dependencies -- Blocked by: STORY-009 (LLM gateway). +- Blocked by: STORY-009 (LLM gateway). ✅ +- Spawned: [STORY-060](./STORY-060-agent-calls-db-sink.md) — DB-backed `UsageStore` + `agent_calls` Drizzle migration + API 429 mapping. Kept separate so STORY-012 stays at S; STORY-060 lands with the next DB Story so the migration batches. ## Tasks -(To be created when work begins.) +(Tracked inline in the activity log.) ## Activity log - 2026-04-25 — created +- 2026-04-26 — picked up. Built versioned cost calculator (`pricing.ts` + `pricing.test.ts`), per-user daily budget tracker (`budget.ts` + `budget.test.ts` — `UsageStore` interface + `InMemoryUsageStore` + `DailyTokenBudget` with explicit/no_user/unlimited/under_threshold/downgraded reasons), and decorator pattern wrapping any `LLMProvider` (`budget-gated-provider.ts` + `budget-gated-provider.test.ts`). Extended `LLMTelemetryEventSchema` with `cost_usd`, `pricing_version`, optional `session_id`/`cached_tokens`/`tool_used`. Wired `costFor()` into `AnthropicProvider.recordTelemetry` so every call now stamps cost + version + tool name. Total: 38 new tests across 3 files, all green; 72 tests passing in `@learnpro/llm`. +- 2026-04-26 — done. Filed [STORY-060](./STORY-060-agent-calls-db-sink.md) for the deferred DB persistence layer (Drizzle migration + `DrizzleLLMTelemetrySink` + `DrizzleUsageStore` + API 429 mapping). Interfaces (`UsageStore`, `LLMTelemetrySink`) are stable; STORY-060 just adds Drizzle impls behind them. diff --git a/project/stories/STORY-060-agent-calls-db-sink.md b/project/stories/STORY-060-agent-calls-db-sink.md new file mode 100644 index 0000000..fb79d64 --- /dev/null +++ b/project/stories/STORY-060-agent-calls-db-sink.md @@ -0,0 +1,58 @@ +--- +id: STORY-060 +title: DB-backed `UsageStore` + `agent_calls` table (split from STORY-012) +type: story +status: backlog +priority: P0 +estimate: S +parent: EPIC-004 +phase: mvp +tags: [llm, telemetry, db, drizzle] +created: 2026-04-26 +updated: 2026-04-26 +--- + +## Description + +[STORY-012](./STORY-012-cost-telemetry.md) shipped the cost calculator (`costFor` + versioned `MODEL_PRICING`), the `LLMTelemetryEvent` schema (now carries `cost_usd`, `pricing_version`, optional `session_id` / `cached_tokens` / `tool_used`), and the per-user daily token budget (`DailyTokenBudget` + `BudgetGatedLLMProvider` decorator). What it deliberately did **not** ship is the persistence layer behind both: + +1. **`agent_calls` table** — the sink that records every `LLMTelemetryEvent` so we can answer "what does a typical learning session cost?" before the AWS bill answers it for us. Today the in-process `InMemoryLLMTelemetrySink` is sufficient for tests and short demos; production needs Postgres. +2. **DB-backed `UsageStore`** — the budget tracker depends on a `UsageStore` interface; the in-memory impl handles tests and self-hosted no-budget mode (limit=0), but a multi-process API needs a shared bucket per `(user_id, UTC date)`. + +Splitting this out keeps STORY-012 within its S estimate (interface + decorator + tests, no Drizzle migration) and lets the schema change move with the next batch of DB-touching Stories. + +## MVP scope (this Story) + +- Drizzle migration for `agent_calls` (cols match `LLMTelemetryEvent` + `org_id`, `id`, `created_at`). +- `DrizzleLLMTelemetrySink` — implements `LLMTelemetrySink`, INSERTs one row per event, never throws (errors logged + dropped so a telemetry outage can't kill an LLM call). +- `DrizzleUsageStore` — implements `UsageStore`. `today()` runs `SELECT sum(input_tokens + output_tokens) FROM agent_calls WHERE user_id=$1 AND created_at >= $2 (start-of-UTC-day)`. `record()` is a no-op (rows are written by the telemetry sink — single source of truth). +- API wiring: when `LEARNPRO_DAILY_TOKEN_LIMIT > 0` is set, `buildLLMProvider` wraps the AnthropicProvider with `BudgetGatedLLMProvider` using `DrizzleUsageStore`. Self-hosted default is 0 (unlimited). +- Friendly 429 mapping: API serializes `TokenBudgetExceededError` as `{ error: "daily_budget_exceeded", message: "..." }` (status 429) so the playground can render the friendly message AC from STORY-012. + +## Out of scope (file separately if needed) + +- Per-org budgets (only per-user for MVP). +- Aggregate dashboards / cost analytics UI — depends on a stats route + admin shell that don't exist yet. +- Cached-prompt / prompt-cache aware accounting (`cached_tokens` column is wired but not yet populated by the Anthropic transport). + +## Acceptance criteria + +- [ ] `agent_calls` Drizzle migration lands in `packages/db` with all `LLMTelemetryEvent` fields + `id`, `org_id`, `created_at`. +- [ ] `DrizzleLLMTelemetrySink` writes one row per event; failures are logged but never thrown. +- [ ] `DrizzleUsageStore.today()` aggregates today's tokens per user against UTC midnight; covered by an integration test against a real Postgres (Docker Compose). +- [ ] API exposes `GET /llm/usage/today` returning `{ used_tokens, limit_tokens, ratio }` for the authenticated user (used by the UI nag at >80%, friendly block at 100%). +- [ ] When the budget is exceeded, the API responds 429 with `{ error: "daily_budget_exceeded", message: "..." }` rather than letting `TokenBudgetExceededError` leak as a 500. +- [ ] Manual smoke: with `LEARNPRO_DAILY_TOKEN_LIMIT=100` and a real Anthropic key, hitting the playground twice triggers the friendly message on call #2. + +## Dependencies + +- Blocked by: STORY-005 (Auth.js — needs a `user_id` to attribute usage to) **or** a stub auth middleware that pins a fixed `user_id` for dev. Acceptable to land the table + sink without auth, with the API wiring deferred until STORY-005. +- Blocks: nothing structural, but deferring it past 100 daily users would be expensive. + +## Notes + +Filed during STORY-012 close-out (2026-04-26). The interfaces (`UsageStore`, `LLMTelemetrySink`) are already stable from STORY-012; this Story just adds the Drizzle implementations behind them. + +## Activity log + +- 2026-04-26 — created (split from STORY-012).