khoks · khoks · Apr 26, 2026 · Apr 26, 2026
@@ -0,0 +1,166 @@
+import { describe, expect, it } from "vitest";
+import {
+  DEFAULT_DIFFICULTY_HEURISTIC,
+  difficultySignal,
+  episodeSuccessScore,
+  nextDifficulty,
+  updateSkillScore,
+  type EpisodeSignalInput,
+} from "./difficulty.js";
+import type { ConceptSkill } from "./policies/types.js";
+
+function ep(overrides: Partial<EpisodeSignalInput> = {}): EpisodeSignalInput {
+  return {
+    passed: true,
+    reveal_clicked: false,
+    hints_used: 0,
+    submit_count: 1,
+    time_to_solve_ms: 60_000,
+    expected_time_ms: 120_000,
+    ...overrides,
+  };
+}
+
+function skill(overrides: Partial<ConceptSkill> = {}): ConceptSkill {
+  return { concept_id: "list-comp", skill: 0.5, confidence: 0.3, attempts: 5, ...overrides };
+}
+
+describe("difficultySignal", () => {
+  it("returns +correctness_bonus for a perfect under-time solve", () => {
+    const s = difficultySignal(ep({ time_to_solve_ms: 60_000 }));
+    expect(s).toBeCloseTo(DEFAULT_DIFFICULTY_HEURISTIC.correctness_bonus, 6);
+  });
+
+  it("clamps overtime contribution at the configured cap (no runaway negative signal)", () => {
+    const huge = difficultySignal(ep({ time_to_solve_ms: 1_000_000_000, passed: false }));
+    // overtime clamped to 1 → -0.5 contribution from overtime, no other contributions
+    expect(huge).toBe(-0.5);
+  });
+
+  it("clamps hints contribution at the configured cap", () => {
+    const s = difficultySignal(ep({ hints_used: 999, time_to_solve_ms: 60_000, passed: false }));
+    // hints clamped to 1 → -0.3, no overtime, no fails, no correctness
+    expect(s).toBe(-0.3);
+  });
+});
+
+describe("nextDifficulty", () => {
+  it("perfect solve: easy → medium (step up)", () => {
+    const next = nextDifficulty("easy", ep({ time_to_solve_ms: 30_000 }));
+    expect(next).toBe("medium");
+  });
+
+  it("hint-heavy solve: same difficulty (no step in either direction)", () => {
+    const next = nextDifficulty(
+      "medium",
+      ep({ hints_used: 2, submit_count: 2, time_to_solve_ms: 100_000 }),
+    );
+    expect(next).toBe("medium");
+  });
+
+  it("repeated failures (heavy struggle, didn't pass): hard → medium (step down)", () => {
+    const next = nextDifficulty(
+      "hard",
+      ep({
+        passed: false,
+        submit_count: 4,
+        hints_used: 2,
+        time_to_solve_ms: 200_000,
+      }),
+    );
+    expect(next).toBe("medium");
+  });
+
+  it("massive overtime (~3× expected) on a passed solve still steps down", () => {
+    const next = nextDifficulty(
+      "medium",
+      ep({ time_to_solve_ms: 360_000, hints_used: 1, submit_count: 2 }),
+    );
+    expect(next).toBe("easy");
+  });
+
+  it("under-time clean solve at expert stays expert (cap at top of ladder)", () => {
+    const next = nextDifficulty("expert", ep({ time_to_solve_ms: 30_000 }));
+    expect(next).toBe("expert");
+  });
+
+  it("no-progress (failed, max hints, max retries, way overtime) at easy stays easy (cap at bottom)", () => {
+    const next = nextDifficulty(
+      "easy",
+      ep({
+        passed: false,
+        hints_used: 3,
+        submit_count: 5,
+        time_to_solve_ms: 240_000,
+        reveal_clicked: true,
+      }),
+    );
+    expect(next).toBe("easy");
+  });
+
+  it("respects an operator-injected stricter step_up_threshold (no step up on a perfect solve)", () => {
+    const next = nextDifficulty("medium", ep({ time_to_solve_ms: 30_000 }), {
+      ...DEFAULT_DIFFICULTY_HEURISTIC,
+      step_up_threshold: 0.5,
+    });
+    expect(next).toBe("medium");
+  });
+});
+
+describe("episodeSuccessScore", () => {
+  it("clean solve = 1", () => {
+    expect(episodeSuccessScore(ep())).toBe(1);
+  });
+
+  it("revealed solution = 0 even if 'passed' is true", () => {
+    expect(episodeSuccessScore(ep({ reveal_clicked: true }))).toBe(0);
+  });
+
+  it("failed = 0", () => {
+    expect(episodeSuccessScore(ep({ passed: false }))).toBe(0);
+  });
+
+  it("hints + retries shave the score down (still positive)", () => {
+    const s = episodeSuccessScore(ep({ hints_used: 1, submit_count: 2 }));
+    // 1 - 1*0.15 (hint) - 1*0.10 (1 retry) = 0.75
+    expect(s).toBeCloseTo(0.75, 6);
+  });
+
+  it("excessive hints/retries floor at 0 (never negative)", () => {
+    const s = episodeSuccessScore(ep({ hints_used: 99, submit_count: 99 }));
+    expect(s).toBe(0);
+  });
+});
+
+describe("updateSkillScore", () => {
+  it("EWMA pulls skill toward 1 on a clean solve", () => {
+    const next = updateSkillScore(skill({ skill: 0.5 }), ep());
+    // 0.4 * 1 + 0.6 * 0.5 = 0.7
+    expect(next.skill).toBeCloseTo(0.7, 6);
+    expect(next.attempts).toBe(6);
+  });
+
+  it("EWMA pulls skill toward 0 on a failed solve", () => {
+    const next = updateSkillScore(skill({ skill: 0.5 }), ep({ passed: false }));
+    // 0.4 * 0 + 0.6 * 0.5 = 0.3
+    expect(next.skill).toBeCloseTo(0.3, 6);
+  });
+
+  it("confidence grows asymptotically toward confidence_max", () => {
+    let s = skill({ confidence: 0 });
+    for (let i = 0; i < 100; i++) s = updateSkillScore(s, ep());
+    expect(s.confidence).toBeGreaterThan(0.94);
+    expect(s.confidence).toBeLessThanOrEqual(0.95);
+  });
+
+  it("clamps skill into [0, 1] (defensive — formula already keeps it bounded)", () => {
+    const next = updateSkillScore(skill({ skill: 1 }), ep());
+    expect(next.skill).toBeLessThanOrEqual(1);
+    expect(next.skill).toBeGreaterThanOrEqual(0);
+  });
+
+  it("preserves the concept_id of the previous record", () => {
+    const next = updateSkillScore(skill({ concept_id: "dict-comp" }), ep());
+    expect(next.concept_id).toBe("dict-comp");
+  });
+});
@@ -0,0 +1,133 @@
+import { z } from "zod";
+import { type ConceptSkill, type DifficultyTier } from "./policies/types.js";
+
+// Heuristic per-episode difficulty signal: a single number `s` in [-1, +1] derived from how the
+// learner just struggled (or didn't). The policy layer (see policies/difficulty-policy.ts) keeps
+// looking at multi-episode EWMAs to pick a tier from a problem catalog; these helpers are the
+// finer-grained per-episode step decision that callers can use directly between catalog lookups.
+//
+// Why heuristic, not learned: explicitly v1 territory. A learned model only earns its keep once we
+// have enough episodes to fit one. Heuristics are interpretable, debuggable, and good enough until
+// proven otherwise. See STORY-018 for the rationale.
+
+export const TIER_ORDER: readonly DifficultyTier[] = ["easy", "medium", "hard", "expert"];
+
+export const DifficultyHeuristicConfigSchema = z.object({
+  // Signal weights — overtime/hints/failures pull `s` negative (struggle); correctness pushes positive.
+  weight_overtime: z.number().default(-0.5),
+  weight_hint_usage: z.number().default(-0.3),
+  weight_failed_attempts: z.number().default(-0.2),
+  correctness_bonus: z.number().default(0.3),
+  // Step thresholds. `s > step_up_threshold` → harder; `s < step_down_threshold` → easier; otherwise same.
+  step_up_threshold: z.number().default(0.3),
+  step_down_threshold: z.number().default(-0.3),
+  // Normalization caps — ratios above these clamp to 1 (max struggle on that axis).
+  overtime_cap_ratio: z.number().min(1).default(2.0),
+  hints_cap: z.number().int().min(1).default(3),
+  failed_attempts_cap: z.number().int().min(1).default(4),
+  // EWMA + Bayesian-flavored confidence growth for the per-concept skill score.
+  ewma_alpha: z.number().min(0).max(1).default(0.4),
+  confidence_growth: z.number().min(0).max(1).default(0.1),
+  confidence_max: z.number().min(0).max(1).default(0.95),
+  hint_skill_penalty: z.number().min(0).max(1).default(0.15),
+  fail_skill_penalty: z.number().min(0).max(1).default(0.1),
+});
+export type DifficultyHeuristicConfig = z.infer<typeof DifficultyHeuristicConfigSchema>;
+
+export const DEFAULT_DIFFICULTY_HEURISTIC: DifficultyHeuristicConfig =
+  DifficultyHeuristicConfigSchema.parse({});
+
+export const EpisodeSignalInputSchema = z.object({
+  passed: z.boolean(),
+  reveal_clicked: z.boolean(),
+  hints_used: z.number().int().min(0),
+  submit_count: z.number().int().min(1),
+  time_to_solve_ms: z.number().int().min(0),
+  expected_time_ms: z.number().int().positive(),
+});
+export type EpisodeSignalInput = z.infer<typeof EpisodeSignalInputSchema>;
+
+// Returns the per-episode difficulty signal `s` in (-∞..+∞ but typically clamped near [-1, +1]).
+// Negative = the learner struggled (slow, lots of hints, retries); positive = breezed through.
+export function difficultySignal(
+  episode: EpisodeSignalInput,
+  config: DifficultyHeuristicConfig = DEFAULT_DIFFICULTY_HEURISTIC,
+): number {
+  const overtimeRatio = episode.time_to_solve_ms / episode.expected_time_ms;
+  const overtime = clamp01((overtimeRatio - 1) / (config.overtime_cap_ratio - 1));
+  const hintUsage = clamp01(episode.hints_used / config.hints_cap);
+  const failedAttempts = clamp01((episode.submit_count - 1) / config.failed_attempts_cap);
+  const correctness = episode.passed && !episode.reveal_clicked ? config.correctness_bonus : 0;
+  return (
+    config.weight_overtime * overtime +
+    config.weight_hint_usage * hintUsage +
+    config.weight_failed_attempts * failedAttempts +
+    correctness
+  );
+}
+
+// Returns the next difficulty tier given the current tier and the episode just completed.
+// Step direction: signal > step_up_threshold → harder; < step_down_threshold → easier; else same.
+// Caps at the ladder ends — "expert" stays "expert" if you keep crushing it.
+export function nextDifficulty(
+  current: DifficultyTier,
+  episode: EpisodeSignalInput,
+  config: DifficultyHeuristicConfig = DEFAULT_DIFFICULTY_HEURISTIC,
+): DifficultyTier {
+  const s = difficultySignal(episode, config);
+  if (s >= config.step_up_threshold) return stepTier(current, +1);
+  if (s <= config.step_down_threshold) return stepTier(current, -1);
+  return current;
+}
+
+// Per-episode contribution to the per-concept skill score, in [0, 1].
+// Failed solve / revealed solution = 0. Clean solve = 1. Hints + retries shave it down.
+export function episodeSuccessScore(
+  episode: EpisodeSignalInput,
+  config: DifficultyHeuristicConfig = DEFAULT_DIFFICULTY_HEURISTIC,
+): number {
+  if (!episode.passed) return 0;
+  if (episode.reveal_clicked) return 0;
+  const hintPenalty = episode.hints_used * config.hint_skill_penalty;
+  const failPenalty = (episode.submit_count - 1) * config.fail_skill_penalty;
+  return Math.max(0, 1 - hintPenalty - failPenalty);
+}
+
+// Bayesian-flavored EMA update for a per-concept skill score:
+// - skill: EWMA(prev.skill, this episode's success score).
+// - confidence: monotonically grows toward `confidence_max` as attempts accumulate.
+// - attempts: incremented.
+export function updateSkillScore(
+  prev: ConceptSkill,
+  episode: EpisodeSignalInput,
+  config: DifficultyHeuristicConfig = DEFAULT_DIFFICULTY_HEURISTIC,
+): ConceptSkill {
+  const x = episodeSuccessScore(episode, config);
+  const skill = config.ewma_alpha * x + (1 - config.ewma_alpha) * prev.skill;
+  const confidence = Math.min(
+    config.confidence_max,
+    prev.confidence + config.confidence_growth * (config.confidence_max - prev.confidence),
+  );
+  return {
+    concept_id: prev.concept_id,
+    skill: round6(clamp01(skill)),
+    confidence: round6(clamp01(confidence)),
+    attempts: prev.attempts + 1,
+  };
+}
+
+function stepTier(tier: DifficultyTier, delta: number): DifficultyTier {
+  const idx = TIER_ORDER.indexOf(tier);
+  const next = Math.max(0, Math.min(TIER_ORDER.length - 1, idx + delta));
+  return TIER_ORDER[next] as DifficultyTier;
+}
+
+function clamp01(n: number): number {
+  if (n < 0) return 0;
+  if (n > 1) return 1;
+  return n;
+}
+
+function round6(n: number): number {
+  return Math.round(n * 1_000_000) / 1_000_000;
+}
@@ -1,3 +1,15 @@
 export const PACKAGE_NAME = "@learnpro/scoring";
 
 export * from "./policies/index.js";
+export {
+  DEFAULT_DIFFICULTY_HEURISTIC,
+  DifficultyHeuristicConfigSchema,
+  EpisodeSignalInputSchema,
+  TIER_ORDER,
+  difficultySignal,
+  episodeSuccessScore,
+  nextDifficulty,
+  updateSkillScore,
+  type DifficultyHeuristicConfig,
+  type EpisodeSignalInput,
+} from "./difficulty.js";
@@ -1,6 +1,6 @@
 # LearnPro Board
 
-> **Last updated:** 2026-04-26 (STORY-012 done — versioned `MODEL_PRICING` table + `costFor()`, per-user `DailyTokenBudget` with Opus → Sonnet → Haiku tier ladder, `BudgetGatedLLMProvider` decorator wrapping any `LLMProvider`, `LLMTelemetryEvent` extended with `cost_usd`/`pricing_version`/`session_id`/`tool_used`/`cached_tokens`. DB-backed sink + `agent_calls` migration split into [STORY-060](./stories/STORY-060-agent-calls-db-sink.md) so STORY-012 stays at S.)
+> **Last updated:** 2026-04-26 (STORY-018 done — heuristic per-episode `difficultySignal()` / `nextDifficulty()` / `episodeSuccessScore()` / `updateSkillScore()` in `packages/scoring/src/difficulty.ts`, all coefficients tunable via Zod-schema'd config, 20 unit tests covering 6+ representative scenarios, EWMA + asymptotic confidence growth for per-concept skill score. Complement to the catalog-level multi-episode `EloEwmaPolicy` already in `policies/difficulty-policy.ts`.)
 > **How to read this:** This is the live status of every Epic, Story, and Task in the project. Hand-maintained for now (a regenerator script lives in the v1 backlog). When you change an item's `status:` frontmatter, also update the row here in the same commit.
 
 ---
@@ -34,7 +34,6 @@ Path A locked 2026-04-25. EPIC-019 (foundation) must land first since every othe
 | [STORY-015](stories/STORY-015-session-plan.md) | Session plan agent (3–5 micro-objectives per session) | EPIC-006 | mvp | P0 | M |
 | [STORY-016](stories/STORY-016-seed-bank.md) | Curated seed problem bank (~30 Python + ~30 TS) with hidden tests | EPIC-007 | mvp | P0 | L |
 | [STORY-017](stories/STORY-017-hint-ladder.md) | 3-rung hint ladder | EPIC-007 | mvp | P0 | S |
-| [STORY-018](stories/STORY-018-heuristic-difficulty.md) | Heuristic difficulty tuner (time + hints + errors → next difficulty) | EPIC-007 | mvp | P0 | S |
 | [STORY-019](stories/STORY-019-python-track.md) | Python fundamentals track | EPIC-009 | mvp | P0 | M |
 | [STORY-020](stories/STORY-020-typescript-track.md) | TypeScript fundamentals track | EPIC-009 | mvp | P0 | M |
 | [STORY-021](stories/STORY-021-onboarding-interview.md) | Career-aware onboarding interview (target role, time budget, level) | EPIC-010 | mvp | P0 | S |
@@ -90,10 +89,11 @@ These stories were filed during EPIC-017 Phase C from the expanded idea catalog
 
 ## Recently Done
 
-STORY-012 (per-call LLM cost telemetry + per-user daily token budget) landed 2026-04-26 — versioned `MODEL_PRICING` + `costFor()` calculator, `DailyTokenBudget` with Opus → Sonnet → Haiku tier ladder + downgrade at 80%, `BudgetGatedLLMProvider` decorator. DB-backed sink + `agent_calls` migration split into [STORY-060](./stories/STORY-060-agent-calls-db-sink.md). STORY-006 (Monaco editor + Run button + result panel) landed 2026-04-26 — first user-facing feature in `apps/web`. STORY-008 (TypeScript sandbox runner via Piston) landed 2026-04-26. STORY-007 (Python sandbox runner via Piston) landed 2026-04-26 (PR #14) — first feature Story under EPIC-003. STORY-013 (learner profile schema) landed 2026-04-26 (PR #11) — first feature Story under EPIC-005. STORY-009 (LLM gateway) landed 2026-04-26 (PR #9) — first feature Story under EPIC-004. EPIC-019 (foundation) closed 2026-04-26 with STORY-052 (monorepo skeleton, PR #5) and STORY-057 (policy adapters, PR #7). GitHub repo + PR workflow landed 2026-04-25 (PR #1, STORY-058). EPIC-017 product grooming closed in full on 2026-04-25 (Phases A + B + C). EPIC-001 closed on 2026-04-25 (initial scaffolding commit `c1e17a1`). Phase A commit: `bbf7300`.
+STORY-018 (heuristic difficulty tuner) landed 2026-04-26 — per-episode `difficultySignal` + `nextDifficulty` + `episodeSuccessScore` + Bayesian-flavored `updateSkillScore` in `packages/scoring/src/difficulty.ts`, all tunable via Zod-schema'd config, 20 unit tests covering perfect/hint-heavy/repeated-failure/overtime/under-time/no-progress + capped-at-extremes + operator-stricter-threshold scenarios. STORY-012 (per-call LLM cost telemetry + per-user daily token budget) landed 2026-04-26 — versioned `MODEL_PRICING` + `costFor()` calculator, `DailyTokenBudget` with Opus → Sonnet → Haiku tier ladder + downgrade at 80%, `BudgetGatedLLMProvider` decorator. DB-backed sink + `agent_calls` migration split into [STORY-060](./stories/STORY-060-agent-calls-db-sink.md). STORY-006 (Monaco editor + Run button + result panel) landed 2026-04-26 — first user-facing feature in `apps/web`. STORY-008 (TypeScript sandbox runner via Piston) landed 2026-04-26. STORY-007 (Python sandbox runner via Piston) landed 2026-04-26 (PR #14) — first feature Story under EPIC-003. STORY-013 (learner profile schema) landed 2026-04-26 (PR #11) — first feature Story under EPIC-005. STORY-009 (LLM gateway) landed 2026-04-26 (PR #9) — first feature Story under EPIC-004. EPIC-019 (foundation) closed 2026-04-26 with STORY-052 (monorepo skeleton, PR #5) and STORY-057 (policy adapters, PR #7). GitHub repo + PR workflow landed 2026-04-25 (PR #1, STORY-058). EPIC-017 product grooming closed in full on 2026-04-25 (Phases A + B + C). EPIC-001 closed on 2026-04-25 (initial scaffolding commit `c1e17a1`). Phase A commit: `bbf7300`.
 
 | ID | Title | Done |
 |----|-------|------|
+| [STORY-018](stories/STORY-018-heuristic-difficulty.md) | Heuristic difficulty tuner (per-episode signal + next-difficulty step + EWMA skill score) | 2026-04-26 |
 | [STORY-012](stories/STORY-012-cost-telemetry.md) | Per-call LLM cost & latency telemetry + per-user daily token budget (DB sink → STORY-060) | 2026-04-26 |
 | [STORY-006](stories/STORY-006-monaco-editor.md) | Monaco editor + Run button + result panel (`/playground` → Next.js proxy → Fastify `/sandbox/run`) | 2026-04-26 |
 | [STORY-008](stories/STORY-008-typescript-runner.md) | TypeScript sandbox runner via Piston (TS-specific unit/integration/API tests on top of STORY-007 infra) | 2026-04-26 |