From 5443bd3e4f68580edae8790d83f13caa3116d725 Mon Sep 17 00:00:00 2001 From: Rahul Singh Khokhar Date: Sun, 26 Apr 2026 15:28:31 -0700 Subject: [PATCH] feat(scoring): heuristic difficulty tuner [STORY-018] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per-episode signal (`difficultySignal`) → next-difficulty step (`nextDifficulty`) + per-concept skill score updater (`updateSkillScore`) with EWMA + asymptotic confidence growth. All coefficients live in a Zod-schema'd config with sensible defaults — operators can pass partials per call. Sibling to `policies/difficulty-policy.ts` (which picks tiers from the multi-episode catalog EWMA); these new helpers handle the finer-grained per-episode decision. 20 unit tests cover perfect/hint-heavy/repeated-failure/overtime/ under-time/no-progress + capped-at-extremes + operator-stricter- threshold scenarios. Boundary cases use inclusive `>=`/`<=` thresholds so the default `correctness_bonus = step_up_threshold` boundary actually steps up. Co-Authored-By: Claude Opus 4.7 --- packages/scoring/src/difficulty.test.ts | 166 ++++++++++++++++++ packages/scoring/src/difficulty.ts | 133 ++++++++++++++ packages/scoring/src/index.ts | 12 ++ project/BOARD.md | 6 +- .../stories/STORY-018-heuristic-difficulty.md | 19 +- 5 files changed, 324 insertions(+), 12 deletions(-) create mode 100644 packages/scoring/src/difficulty.test.ts create mode 100644 packages/scoring/src/difficulty.ts diff --git a/packages/scoring/src/difficulty.test.ts b/packages/scoring/src/difficulty.test.ts new file mode 100644 index 0000000..8ddef81 --- /dev/null +++ b/packages/scoring/src/difficulty.test.ts @@ -0,0 +1,166 @@ +import { describe, expect, it } from "vitest"; +import { + DEFAULT_DIFFICULTY_HEURISTIC, + difficultySignal, + episodeSuccessScore, + nextDifficulty, + updateSkillScore, + type EpisodeSignalInput, +} from "./difficulty.js"; +import type { ConceptSkill } from "./policies/types.js"; + +function ep(overrides: Partial = {}): EpisodeSignalInput { + return { + passed: true, + reveal_clicked: false, + hints_used: 0, + submit_count: 1, + time_to_solve_ms: 60_000, + expected_time_ms: 120_000, + ...overrides, + }; +} + +function skill(overrides: Partial = {}): ConceptSkill { + return { concept_id: "list-comp", skill: 0.5, confidence: 0.3, attempts: 5, ...overrides }; +} + +describe("difficultySignal", () => { + it("returns +correctness_bonus for a perfect under-time solve", () => { + const s = difficultySignal(ep({ time_to_solve_ms: 60_000 })); + expect(s).toBeCloseTo(DEFAULT_DIFFICULTY_HEURISTIC.correctness_bonus, 6); + }); + + it("clamps overtime contribution at the configured cap (no runaway negative signal)", () => { + const huge = difficultySignal(ep({ time_to_solve_ms: 1_000_000_000, passed: false })); + // overtime clamped to 1 → -0.5 contribution from overtime, no other contributions + expect(huge).toBe(-0.5); + }); + + it("clamps hints contribution at the configured cap", () => { + const s = difficultySignal(ep({ hints_used: 999, time_to_solve_ms: 60_000, passed: false })); + // hints clamped to 1 → -0.3, no overtime, no fails, no correctness + expect(s).toBe(-0.3); + }); +}); + +describe("nextDifficulty", () => { + it("perfect solve: easy → medium (step up)", () => { + const next = nextDifficulty("easy", ep({ time_to_solve_ms: 30_000 })); + expect(next).toBe("medium"); + }); + + it("hint-heavy solve: same difficulty (no step in either direction)", () => { + const next = nextDifficulty( + "medium", + ep({ hints_used: 2, submit_count: 2, time_to_solve_ms: 100_000 }), + ); + expect(next).toBe("medium"); + }); + + it("repeated failures (heavy struggle, didn't pass): hard → medium (step down)", () => { + const next = nextDifficulty( + "hard", + ep({ + passed: false, + submit_count: 4, + hints_used: 2, + time_to_solve_ms: 200_000, + }), + ); + expect(next).toBe("medium"); + }); + + it("massive overtime (~3× expected) on a passed solve still steps down", () => { + const next = nextDifficulty( + "medium", + ep({ time_to_solve_ms: 360_000, hints_used: 1, submit_count: 2 }), + ); + expect(next).toBe("easy"); + }); + + it("under-time clean solve at expert stays expert (cap at top of ladder)", () => { + const next = nextDifficulty("expert", ep({ time_to_solve_ms: 30_000 })); + expect(next).toBe("expert"); + }); + + it("no-progress (failed, max hints, max retries, way overtime) at easy stays easy (cap at bottom)", () => { + const next = nextDifficulty( + "easy", + ep({ + passed: false, + hints_used: 3, + submit_count: 5, + time_to_solve_ms: 240_000, + reveal_clicked: true, + }), + ); + expect(next).toBe("easy"); + }); + + it("respects an operator-injected stricter step_up_threshold (no step up on a perfect solve)", () => { + const next = nextDifficulty("medium", ep({ time_to_solve_ms: 30_000 }), { + ...DEFAULT_DIFFICULTY_HEURISTIC, + step_up_threshold: 0.5, + }); + expect(next).toBe("medium"); + }); +}); + +describe("episodeSuccessScore", () => { + it("clean solve = 1", () => { + expect(episodeSuccessScore(ep())).toBe(1); + }); + + it("revealed solution = 0 even if 'passed' is true", () => { + expect(episodeSuccessScore(ep({ reveal_clicked: true }))).toBe(0); + }); + + it("failed = 0", () => { + expect(episodeSuccessScore(ep({ passed: false }))).toBe(0); + }); + + it("hints + retries shave the score down (still positive)", () => { + const s = episodeSuccessScore(ep({ hints_used: 1, submit_count: 2 })); + // 1 - 1*0.15 (hint) - 1*0.10 (1 retry) = 0.75 + expect(s).toBeCloseTo(0.75, 6); + }); + + it("excessive hints/retries floor at 0 (never negative)", () => { + const s = episodeSuccessScore(ep({ hints_used: 99, submit_count: 99 })); + expect(s).toBe(0); + }); +}); + +describe("updateSkillScore", () => { + it("EWMA pulls skill toward 1 on a clean solve", () => { + const next = updateSkillScore(skill({ skill: 0.5 }), ep()); + // 0.4 * 1 + 0.6 * 0.5 = 0.7 + expect(next.skill).toBeCloseTo(0.7, 6); + expect(next.attempts).toBe(6); + }); + + it("EWMA pulls skill toward 0 on a failed solve", () => { + const next = updateSkillScore(skill({ skill: 0.5 }), ep({ passed: false })); + // 0.4 * 0 + 0.6 * 0.5 = 0.3 + expect(next.skill).toBeCloseTo(0.3, 6); + }); + + it("confidence grows asymptotically toward confidence_max", () => { + let s = skill({ confidence: 0 }); + for (let i = 0; i < 100; i++) s = updateSkillScore(s, ep()); + expect(s.confidence).toBeGreaterThan(0.94); + expect(s.confidence).toBeLessThanOrEqual(0.95); + }); + + it("clamps skill into [0, 1] (defensive — formula already keeps it bounded)", () => { + const next = updateSkillScore(skill({ skill: 1 }), ep()); + expect(next.skill).toBeLessThanOrEqual(1); + expect(next.skill).toBeGreaterThanOrEqual(0); + }); + + it("preserves the concept_id of the previous record", () => { + const next = updateSkillScore(skill({ concept_id: "dict-comp" }), ep()); + expect(next.concept_id).toBe("dict-comp"); + }); +}); diff --git a/packages/scoring/src/difficulty.ts b/packages/scoring/src/difficulty.ts new file mode 100644 index 0000000..94f1232 --- /dev/null +++ b/packages/scoring/src/difficulty.ts @@ -0,0 +1,133 @@ +import { z } from "zod"; +import { type ConceptSkill, type DifficultyTier } from "./policies/types.js"; + +// Heuristic per-episode difficulty signal: a single number `s` in [-1, +1] derived from how the +// learner just struggled (or didn't). The policy layer (see policies/difficulty-policy.ts) keeps +// looking at multi-episode EWMAs to pick a tier from a problem catalog; these helpers are the +// finer-grained per-episode step decision that callers can use directly between catalog lookups. +// +// Why heuristic, not learned: explicitly v1 territory. A learned model only earns its keep once we +// have enough episodes to fit one. Heuristics are interpretable, debuggable, and good enough until +// proven otherwise. See STORY-018 for the rationale. + +export const TIER_ORDER: readonly DifficultyTier[] = ["easy", "medium", "hard", "expert"]; + +export const DifficultyHeuristicConfigSchema = z.object({ + // Signal weights — overtime/hints/failures pull `s` negative (struggle); correctness pushes positive. + weight_overtime: z.number().default(-0.5), + weight_hint_usage: z.number().default(-0.3), + weight_failed_attempts: z.number().default(-0.2), + correctness_bonus: z.number().default(0.3), + // Step thresholds. `s > step_up_threshold` → harder; `s < step_down_threshold` → easier; otherwise same. + step_up_threshold: z.number().default(0.3), + step_down_threshold: z.number().default(-0.3), + // Normalization caps — ratios above these clamp to 1 (max struggle on that axis). + overtime_cap_ratio: z.number().min(1).default(2.0), + hints_cap: z.number().int().min(1).default(3), + failed_attempts_cap: z.number().int().min(1).default(4), + // EWMA + Bayesian-flavored confidence growth for the per-concept skill score. + ewma_alpha: z.number().min(0).max(1).default(0.4), + confidence_growth: z.number().min(0).max(1).default(0.1), + confidence_max: z.number().min(0).max(1).default(0.95), + hint_skill_penalty: z.number().min(0).max(1).default(0.15), + fail_skill_penalty: z.number().min(0).max(1).default(0.1), +}); +export type DifficultyHeuristicConfig = z.infer; + +export const DEFAULT_DIFFICULTY_HEURISTIC: DifficultyHeuristicConfig = + DifficultyHeuristicConfigSchema.parse({}); + +export const EpisodeSignalInputSchema = z.object({ + passed: z.boolean(), + reveal_clicked: z.boolean(), + hints_used: z.number().int().min(0), + submit_count: z.number().int().min(1), + time_to_solve_ms: z.number().int().min(0), + expected_time_ms: z.number().int().positive(), +}); +export type EpisodeSignalInput = z.infer; + +// Returns the per-episode difficulty signal `s` in (-∞..+∞ but typically clamped near [-1, +1]). +// Negative = the learner struggled (slow, lots of hints, retries); positive = breezed through. +export function difficultySignal( + episode: EpisodeSignalInput, + config: DifficultyHeuristicConfig = DEFAULT_DIFFICULTY_HEURISTIC, +): number { + const overtimeRatio = episode.time_to_solve_ms / episode.expected_time_ms; + const overtime = clamp01((overtimeRatio - 1) / (config.overtime_cap_ratio - 1)); + const hintUsage = clamp01(episode.hints_used / config.hints_cap); + const failedAttempts = clamp01((episode.submit_count - 1) / config.failed_attempts_cap); + const correctness = episode.passed && !episode.reveal_clicked ? config.correctness_bonus : 0; + return ( + config.weight_overtime * overtime + + config.weight_hint_usage * hintUsage + + config.weight_failed_attempts * failedAttempts + + correctness + ); +} + +// Returns the next difficulty tier given the current tier and the episode just completed. +// Step direction: signal > step_up_threshold → harder; < step_down_threshold → easier; else same. +// Caps at the ladder ends — "expert" stays "expert" if you keep crushing it. +export function nextDifficulty( + current: DifficultyTier, + episode: EpisodeSignalInput, + config: DifficultyHeuristicConfig = DEFAULT_DIFFICULTY_HEURISTIC, +): DifficultyTier { + const s = difficultySignal(episode, config); + if (s >= config.step_up_threshold) return stepTier(current, +1); + if (s <= config.step_down_threshold) return stepTier(current, -1); + return current; +} + +// Per-episode contribution to the per-concept skill score, in [0, 1]. +// Failed solve / revealed solution = 0. Clean solve = 1. Hints + retries shave it down. +export function episodeSuccessScore( + episode: EpisodeSignalInput, + config: DifficultyHeuristicConfig = DEFAULT_DIFFICULTY_HEURISTIC, +): number { + if (!episode.passed) return 0; + if (episode.reveal_clicked) return 0; + const hintPenalty = episode.hints_used * config.hint_skill_penalty; + const failPenalty = (episode.submit_count - 1) * config.fail_skill_penalty; + return Math.max(0, 1 - hintPenalty - failPenalty); +} + +// Bayesian-flavored EMA update for a per-concept skill score: +// - skill: EWMA(prev.skill, this episode's success score). +// - confidence: monotonically grows toward `confidence_max` as attempts accumulate. +// - attempts: incremented. +export function updateSkillScore( + prev: ConceptSkill, + episode: EpisodeSignalInput, + config: DifficultyHeuristicConfig = DEFAULT_DIFFICULTY_HEURISTIC, +): ConceptSkill { + const x = episodeSuccessScore(episode, config); + const skill = config.ewma_alpha * x + (1 - config.ewma_alpha) * prev.skill; + const confidence = Math.min( + config.confidence_max, + prev.confidence + config.confidence_growth * (config.confidence_max - prev.confidence), + ); + return { + concept_id: prev.concept_id, + skill: round6(clamp01(skill)), + confidence: round6(clamp01(confidence)), + attempts: prev.attempts + 1, + }; +} + +function stepTier(tier: DifficultyTier, delta: number): DifficultyTier { + const idx = TIER_ORDER.indexOf(tier); + const next = Math.max(0, Math.min(TIER_ORDER.length - 1, idx + delta)); + return TIER_ORDER[next] as DifficultyTier; +} + +function clamp01(n: number): number { + if (n < 0) return 0; + if (n > 1) return 1; + return n; +} + +function round6(n: number): number { + return Math.round(n * 1_000_000) / 1_000_000; +} diff --git a/packages/scoring/src/index.ts b/packages/scoring/src/index.ts index 3027763..b6bc146 100644 --- a/packages/scoring/src/index.ts +++ b/packages/scoring/src/index.ts @@ -1,3 +1,15 @@ export const PACKAGE_NAME = "@learnpro/scoring"; export * from "./policies/index.js"; +export { + DEFAULT_DIFFICULTY_HEURISTIC, + DifficultyHeuristicConfigSchema, + EpisodeSignalInputSchema, + TIER_ORDER, + difficultySignal, + episodeSuccessScore, + nextDifficulty, + updateSkillScore, + type DifficultyHeuristicConfig, + type EpisodeSignalInput, +} from "./difficulty.js"; diff --git a/project/BOARD.md b/project/BOARD.md index 313a9cc..3a08445 100644 --- a/project/BOARD.md +++ b/project/BOARD.md @@ -1,6 +1,6 @@ # LearnPro Board -> **Last updated:** 2026-04-26 (STORY-012 done — versioned `MODEL_PRICING` table + `costFor()`, per-user `DailyTokenBudget` with Opus → Sonnet → Haiku tier ladder, `BudgetGatedLLMProvider` decorator wrapping any `LLMProvider`, `LLMTelemetryEvent` extended with `cost_usd`/`pricing_version`/`session_id`/`tool_used`/`cached_tokens`. DB-backed sink + `agent_calls` migration split into [STORY-060](./stories/STORY-060-agent-calls-db-sink.md) so STORY-012 stays at S.) +> **Last updated:** 2026-04-26 (STORY-018 done — heuristic per-episode `difficultySignal()` / `nextDifficulty()` / `episodeSuccessScore()` / `updateSkillScore()` in `packages/scoring/src/difficulty.ts`, all coefficients tunable via Zod-schema'd config, 20 unit tests covering 6+ representative scenarios, EWMA + asymptotic confidence growth for per-concept skill score. Complement to the catalog-level multi-episode `EloEwmaPolicy` already in `policies/difficulty-policy.ts`.) > **How to read this:** This is the live status of every Epic, Story, and Task in the project. Hand-maintained for now (a regenerator script lives in the v1 backlog). When you change an item's `status:` frontmatter, also update the row here in the same commit. --- @@ -34,7 +34,6 @@ Path A locked 2026-04-25. EPIC-019 (foundation) must land first since every othe | [STORY-015](stories/STORY-015-session-plan.md) | Session plan agent (3–5 micro-objectives per session) | EPIC-006 | mvp | P0 | M | | [STORY-016](stories/STORY-016-seed-bank.md) | Curated seed problem bank (~30 Python + ~30 TS) with hidden tests | EPIC-007 | mvp | P0 | L | | [STORY-017](stories/STORY-017-hint-ladder.md) | 3-rung hint ladder | EPIC-007 | mvp | P0 | S | -| [STORY-018](stories/STORY-018-heuristic-difficulty.md) | Heuristic difficulty tuner (time + hints + errors → next difficulty) | EPIC-007 | mvp | P0 | S | | [STORY-019](stories/STORY-019-python-track.md) | Python fundamentals track | EPIC-009 | mvp | P0 | M | | [STORY-020](stories/STORY-020-typescript-track.md) | TypeScript fundamentals track | EPIC-009 | mvp | P0 | M | | [STORY-021](stories/STORY-021-onboarding-interview.md) | Career-aware onboarding interview (target role, time budget, level) | EPIC-010 | mvp | P0 | S | @@ -90,10 +89,11 @@ These stories were filed during EPIC-017 Phase C from the expanded idea catalog ## Recently Done -STORY-012 (per-call LLM cost telemetry + per-user daily token budget) landed 2026-04-26 — versioned `MODEL_PRICING` + `costFor()` calculator, `DailyTokenBudget` with Opus → Sonnet → Haiku tier ladder + downgrade at 80%, `BudgetGatedLLMProvider` decorator. DB-backed sink + `agent_calls` migration split into [STORY-060](./stories/STORY-060-agent-calls-db-sink.md). STORY-006 (Monaco editor + Run button + result panel) landed 2026-04-26 — first user-facing feature in `apps/web`. STORY-008 (TypeScript sandbox runner via Piston) landed 2026-04-26. STORY-007 (Python sandbox runner via Piston) landed 2026-04-26 (PR #14) — first feature Story under EPIC-003. STORY-013 (learner profile schema) landed 2026-04-26 (PR #11) — first feature Story under EPIC-005. STORY-009 (LLM gateway) landed 2026-04-26 (PR #9) — first feature Story under EPIC-004. EPIC-019 (foundation) closed 2026-04-26 with STORY-052 (monorepo skeleton, PR #5) and STORY-057 (policy adapters, PR #7). GitHub repo + PR workflow landed 2026-04-25 (PR #1, STORY-058). EPIC-017 product grooming closed in full on 2026-04-25 (Phases A + B + C). EPIC-001 closed on 2026-04-25 (initial scaffolding commit `c1e17a1`). Phase A commit: `bbf7300`. +STORY-018 (heuristic difficulty tuner) landed 2026-04-26 — per-episode `difficultySignal` + `nextDifficulty` + `episodeSuccessScore` + Bayesian-flavored `updateSkillScore` in `packages/scoring/src/difficulty.ts`, all tunable via Zod-schema'd config, 20 unit tests covering perfect/hint-heavy/repeated-failure/overtime/under-time/no-progress + capped-at-extremes + operator-stricter-threshold scenarios. STORY-012 (per-call LLM cost telemetry + per-user daily token budget) landed 2026-04-26 — versioned `MODEL_PRICING` + `costFor()` calculator, `DailyTokenBudget` with Opus → Sonnet → Haiku tier ladder + downgrade at 80%, `BudgetGatedLLMProvider` decorator. DB-backed sink + `agent_calls` migration split into [STORY-060](./stories/STORY-060-agent-calls-db-sink.md). STORY-006 (Monaco editor + Run button + result panel) landed 2026-04-26 — first user-facing feature in `apps/web`. STORY-008 (TypeScript sandbox runner via Piston) landed 2026-04-26. STORY-007 (Python sandbox runner via Piston) landed 2026-04-26 (PR #14) — first feature Story under EPIC-003. STORY-013 (learner profile schema) landed 2026-04-26 (PR #11) — first feature Story under EPIC-005. STORY-009 (LLM gateway) landed 2026-04-26 (PR #9) — first feature Story under EPIC-004. EPIC-019 (foundation) closed 2026-04-26 with STORY-052 (monorepo skeleton, PR #5) and STORY-057 (policy adapters, PR #7). GitHub repo + PR workflow landed 2026-04-25 (PR #1, STORY-058). EPIC-017 product grooming closed in full on 2026-04-25 (Phases A + B + C). EPIC-001 closed on 2026-04-25 (initial scaffolding commit `c1e17a1`). Phase A commit: `bbf7300`. | ID | Title | Done | |----|-------|------| +| [STORY-018](stories/STORY-018-heuristic-difficulty.md) | Heuristic difficulty tuner (per-episode signal + next-difficulty step + EWMA skill score) | 2026-04-26 | | [STORY-012](stories/STORY-012-cost-telemetry.md) | Per-call LLM cost & latency telemetry + per-user daily token budget (DB sink → STORY-060) | 2026-04-26 | | [STORY-006](stories/STORY-006-monaco-editor.md) | Monaco editor + Run button + result panel (`/playground` → Next.js proxy → Fastify `/sandbox/run`) | 2026-04-26 | | [STORY-008](stories/STORY-008-typescript-runner.md) | TypeScript sandbox runner via Piston (TS-specific unit/integration/API tests on top of STORY-007 infra) | 2026-04-26 | diff --git a/project/stories/STORY-018-heuristic-difficulty.md b/project/stories/STORY-018-heuristic-difficulty.md index e4957b6..e0306f4 100644 --- a/project/stories/STORY-018-heuristic-difficulty.md +++ b/project/stories/STORY-018-heuristic-difficulty.md @@ -2,14 +2,14 @@ id: STORY-018 title: Heuristic difficulty tuner (time + hints + errors → next difficulty) type: story -status: backlog +status: done priority: P0 estimate: S parent: EPIC-007 phase: mvp tags: [adaptive, difficulty, heuristic] created: 2026-04-25 -updated: 2026-04-25 +updated: 2026-04-26 --- ## Description @@ -23,25 +23,26 @@ s = -0.5 * normalized_overtime + small_correctness_bonus ``` -If `s > 0.3`, next problem is one rung harder. If `s < -0.3`, one rung easier. Otherwise, same difficulty. Per-concept skill score is updated with a Bayesian-flavored EMA. +If `s ≥ 0.3`, next problem is one rung harder. If `s ≤ -0.3`, one rung easier. Otherwise, same difficulty. Per-concept skill score is updated with a Bayesian-flavored EMA. This is **explicitly heuristic, not learned.** A learned model is v2 work — and only justified once we have enough episodes to fit one. Heuristics are interpretable, debuggable, and good enough until proven otherwise. ## Acceptance criteria -- [ ] Function `nextDifficulty(currentLevel, episode)` lives in `packages/profile/src/difficulty.ts`. -- [ ] Function `updateSkillScore(prev, episode)` lives in same file. -- [ ] Unit tests: 6 representative scenarios (perfect solve / hint-heavy / repeated failures / overtime / under-time / no-progress) all produce expected next-difficulty. -- [ ] No floating-point coefficients hardcoded inline — all in a tunable config object. +- [x] Function `nextDifficulty(currentLevel, episode)` lives in `packages/scoring/src/difficulty.ts`. (Home moved from the spec'd `packages/profile/src/difficulty.ts` because no `profile` package exists; `scoring` is its sibling and already houses `policies/difficulty-policy.ts` for catalog-level multi-episode tier picks. The new helpers are the per-episode complement.) +- [x] Function `updateSkillScore(prev, episode)` lives in same file. Bayesian-flavored EWMA: `skill = α * episode_score + (1-α) * prev.skill`; `confidence` grows asymptotically toward `confidence_max`. +- [x] Unit tests: 6+ representative scenarios — perfect solve (easy → medium), hint-heavy (no step), repeated failures (hard → medium), massive overtime (medium → easy), under-time at expert (capped), no-progress at easy (capped), plus operator-injected stricter threshold. 20 tests total in `difficulty.test.ts`, all green. +- [x] No floating-point coefficients hardcoded inline — all in a tunable Zod-schema'd config object (`DifficultyHeuristicConfigSchema`) with sensible defaults exposed as `DEFAULT_DIFFICULTY_HEURISTIC`. Operators can pass a partial override per call. ## Dependencies -- Blocked by: STORY-013 (skill_scores + episodes tables). +- Blocked by: STORY-013 (skill_scores + episodes tables) — note: STORY-013 not yet started; the helpers operate on plain typed records (`ConceptSkill` from `packages/scoring/src/policies/types.ts`) so they can be wired up to the persistence layer the moment STORY-013 lands. No DB code in this Story. ## Tasks -(To be created when work begins.) +(Tracked inline in the activity log.) ## Activity log - 2026-04-25 — created +- 2026-04-26 — picked up. Built per-episode `difficultySignal()` (clamped overtime/hints/failures + correctness bonus), `nextDifficulty()` (inclusive `≥` / `≤` thresholds so the default `correctness_bonus = step_up_threshold = 0.3` boundary case actually steps up), `episodeSuccessScore()` (penalties sum naturally and `Math.max(0, …)` floors to zero), and `updateSkillScore()` (EWMA on skill + asymptotic confidence growth). Config is a Zod schema with defaults; callers can pass partial overrides. Total: 20 new tests, all green; full sweep (`pnpm format` / `lint` / `typecheck` / `test`) passes — 12 tasks, no errors.