From 9628686e834746cc0740e86304c0bd8f62957fb8 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Thu, 14 May 2026 09:48:13 -0500 Subject: [PATCH 1/8] fix: Modernize AI Configs agent skills to current 0.20 SDK - Replace removed Node *Provider class namespaces with bare module exports + *RunnerFactory - Replace removed LDAIClient method aliases and managed-chat API with current names - Replace removed tracker convenience helpers with trackMetricsOf + bare extractors - Drop unnecessary createTracker!() non-null assertions - Switch LDAIMetrics literal usage: -> tokens: (0.20 rename) - Rename LDAIDefaults -> LDAICompletionConfigDefault and LDAIAgentDefault -> LDAIAgentConfigDefault - Bump compatibility statement to >=0.20.0 - Generalize provider-tracking prose to use trackMetricsOf universally Refs AIC-2383 Co-Authored-By: Claude Opus 4.7 (1M context) --- skills.json | 4 +- .../ai-configs/aiconfig-ai-metrics/SKILL.md | 17 ++-- .../references/anthropic-tracking.md | 16 ++-- .../references/bedrock-tracking.md | 23 ++--- .../references/gemini-tracking.md | 8 +- .../references/langchain-tracking.md | 49 +++++----- .../references/openai-tracking.md | 26 +++--- .../references/strands-tracking.md | 2 +- .../references/streaming-tracking.md | 6 +- skills/ai-configs/aiconfig-migrate/SKILL.md | 32 +++---- .../references/agent-mode-frameworks.md | 21 ++--- .../references/phase-1-analysis-checklist.md | 6 +- .../references/sdk-ai-tracker-patterns.md | 91 +++++++++---------- .../ai-configs/aiconfig-online-evals/SKILL.md | 8 +- skills/ai-configs/aiconfig-tools/SKILL.md | 2 +- 15 files changed, 147 insertions(+), 164 deletions(-) diff --git a/skills.json b/skills.json index 9688478..254beeb 100644 --- a/skills.json +++ b/skills.json @@ -14,7 +14,7 @@ "path": "skills/ai-configs/aiconfig-ai-metrics", "version": "1.0.0-experimental", "license": "Apache-2.0", - "compatibility": "Requires the LaunchDarkly server-side AI SDK (`launchdarkly-server-sdk-ai>=0.18.0` for Python or `@launchdarkly/server-sdk-ai>=0.17.0` for Node) and an existing AI Config." + "compatibility": "Requires the LaunchDarkly server-side AI SDK (`launchdarkly-server-sdk-ai>=0.18.0` for Python or `@launchdarkly/server-sdk-ai>=0.20.0` for Node) and an existing AI Config." }, { "name": "aiconfig-create", @@ -45,7 +45,7 @@ "description": "Attach judges to AI Config variations for automatic LLM-as-a-judge evaluation. Create custom judges, configure sampling rates, and monitor quality scores.", "path": "skills/ai-configs/aiconfig-online-evals", "version": "0.1.0", - "compatibility": "Requires LaunchDarkly API access token with ai-configs:write permission. SDK versions Python v0.18.0+ or Node.js v0.17.0+ for automatic metric recording and the consolidated `track_judge_result` / `trackJudgeResult` API." + "compatibility": "Requires LaunchDarkly API access token with ai-configs:write permission. SDK versions Python v0.18.0+ or Node.js v0.20.0+ for automatic metric recording and the consolidated `track_judge_result` / `trackJudgeResult` API." }, { "name": "aiconfig-projects", diff --git a/skills/ai-configs/aiconfig-ai-metrics/SKILL.md b/skills/ai-configs/aiconfig-ai-metrics/SKILL.md index 2d13059..5ebb447 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/SKILL.md +++ b/skills/ai-configs/aiconfig-ai-metrics/SKILL.md @@ -2,7 +2,7 @@ name: aiconfig-ai-metrics description: "Instrument an existing codebase with LaunchDarkly AI Config tracking. Walks the four-tier ladder (managed runner → provider package → custom extractor + trackMetricsOf → raw manual) and picks the lowest-ceremony option that still captures duration, tokens, and success/error." license: Apache-2.0 -compatibility: Requires the LaunchDarkly server-side AI SDK (`launchdarkly-server-sdk-ai>=0.18.0` for Python or `@launchdarkly/server-sdk-ai>=0.17.0` for Node) and an existing AI Config. +compatibility: Requires the LaunchDarkly server-side AI SDK (`launchdarkly-server-sdk-ai>=0.18.0` for Python or `@launchdarkly/server-sdk-ai>=0.20.0` for Node) and an existing AI Config. metadata: author: launchdarkly version: "1.0.0-experimental" @@ -20,12 +20,12 @@ This is the order the official SDK READMEs (Python core, Node core, and every pr | Tier | Pattern | Use when | Tracks automatically | |------|---------|----------|----------------------| -| **1 — Managed runner** | Python: `ai_client.create_model(...)` returning a `ManagedModel`, then `await model.invoke(...)`.
Node: `aiClient.initChat(...)` / `aiClient.createChat(...)` returning a `TrackedChat`, then `await chat.invoke(...)`. | The call is conversational (chat history, turn-based). This is what the provider READMEs lead with. | Duration, tokens, success/error — **all of it, zero tracker calls**. | +| **1 — Managed runner** | Python: `ai_client.create_model(...)` returning a `ManagedModel`, then `await model.invoke(...)`.
Node: `aiClient.createModel(...)` returning a `ManagedModel`, then `await model.run(...)`. | The call is conversational (chat history, turn-based). This is what the provider READMEs lead with. | Duration, tokens, success/error — **all of it, zero tracker calls**. | | **2 — Provider package + `trackMetricsOf`** | `tracker.trackMetricsOf(Provider.getAIMetricsFromResponse, () => providerCall())`. Provider packages today: `@launchdarkly/server-sdk-ai-openai`, `-langchain`, `-vercel` (Node) and `launchdarkly-server-sdk-ai-openai`, `-langchain` (Python). | The shape isn't a chat loop (one-shot completion, structured output, agent step) but the framework or provider has a package. | Duration + success/error from the wrapper; tokens from the package's built-in `getAIMetricsFromResponse` extractor. | | **3 — Custom extractor + `trackMetricsOf`** | Same `trackMetricsOf` wrapper, but you write a small function that maps the provider response to `LDAIMetrics` (tokens + success). | No provider package exists (Anthropic direct, Gemini, Cohere, custom HTTP). | Duration + success/error from the wrapper; tokens from your extractor. | | **4 — Raw manual** | Separate calls to `trackDuration`, `trackTokens`, `trackSuccess` / `trackError`, plus `trackTimeToFirstToken` for streams. | Streaming with TTFT, unusual response shapes, partial tracking, anything Tier 2–3 can't cleanly wrap. | Only what you explicitly call — it's on you to not miss one. | -A call to `track_openai_metrics` / `trackOpenAIMetrics` / `track_bedrock_converse_metrics` / `trackBedrockConverseMetrics` / `trackVercelAISDKGenerateTextMetrics` is **Tier-2 legacy shorthand**. These helpers still exist in the SDK source but none of the current provider READMEs use them — they've been superseded by `trackMetricsOf` + `Provider.getAIMetricsFromResponse`. Do not recommend them for new code; if you see them in an existing codebase, leave them alone unless the user is already on a cleanup pass. +Every provider — OpenAI, LangChain, Vercel, Bedrock, Anthropic, Gemini, custom HTTP — uses the same generic shape: `tracker.trackMetricsOf(getAIMetricsFromResponse, () => providerCall())` in Node, `tracker.track_metrics_of(provider_call, get_ai_metrics_from_response)` in Python. The extractor is the only thing that changes per provider: import `getAIMetricsFromResponse` from the matching `@launchdarkly/server-sdk-ai-` (or `ldai_`) package, or write a small custom function that returns `LDAIMetrics`. There are no provider-specific tracker methods. ## Workflow @@ -38,7 +38,7 @@ Before picking a tier, find the provider call and answer these questions: - [ ] **Provider?** OpenAI, Anthropic, Bedrock, Gemini, Azure, custom HTTP? → cross-reference with the package availability matrix below. - [ ] **Streaming?** If yes, you'll need TTFT tracking, which means Tier 4 for the TTFT part even if the rest is Tier 2. - [ ] **Language?** Python or Node? Provider-package coverage differs between them. -- [ ] **Already using an AI Config?** If not, route to `aiconfig-create` first — tracking requires a tracker, which is obtained by calling `create_tracker()` / `createTracker()` on the config object returned by `completion_config()` / `completionConfig()` / `initChat()`. +- [ ] **Already using an AI Config?** If not, route to `aiconfig-create` first — tracking requires a tracker, which is obtained by calling `create_tracker()` / `createTracker()` on the config object returned by `completion_config()` / `completionConfig()` / `createModel()`. ### 2. Look up your Tier-2 option @@ -78,7 +78,7 @@ Confirm the Monitoring tab fills in: ## Quick reference: tracker methods -Obtain a tracker via the factory on the config object: `tracker = config.create_tracker()` (Python v0.18.0+) or `const tracker = aiConfig.createTracker!()` (Node v0.17.0+). Call the factory once per execution and reuse the returned `tracker` for every call — each factory invocation mints a new `runId` that tags every tracking event emitted by that tracker so events from a single execution can be correlated together (via exported events / downstream systems). The Monitoring tab aggregates events rather than grouping them by run today — the `runId` is useful when events are exported or queried outside the UI, and is the identifier the SDK's at-most-once guards are keyed on. The methods below are the raw API surface — most of the time you should not call them individually; use `trackMetricsOf` or a Tier-1 managed runner. The list is here so you can recognize the methods in existing code and reach for the right one when you genuinely need Tier 4. +Obtain a tracker via the factory on the config object: `tracker = config.create_tracker()` (Python) or `const tracker = aiConfig.createTracker()` (Node). Call the factory once per execution and reuse the returned `tracker` for every call — each factory invocation mints a new `runId` that tags every tracking event emitted by that tracker so events from a single execution can be correlated together (via exported events / downstream systems). The Monitoring tab aggregates events rather than grouping them by run today — the `runId` is useful when events are exported or queried outside the UI, and is the identifier the SDK's at-most-once guards are keyed on. The methods below are the raw API surface — most of the time you should not call them individually; use `trackMetricsOf` or a Tier-1 managed runner. The list is here so you can recognize the methods in existing code and reach for the right one when you genuinely need Tier 4. | Method (Python ↔ Node) | Tier | What it does | |---|---|---| @@ -92,12 +92,9 @@ Obtain a tracker via the factory on the config object: `tracker = config.create_ | `track_success()` / `trackSuccess()` | 4 | Mark the generation as successful. Required for the Monitoring tab to count it. | | `track_error()` / `trackError()` | 4 | Mark the generation as failed. Do not also call `trackSuccess()` in the same request. | | `track_feedback({kind})` / `trackFeedback({kind})` | any | Record thumbs-up / thumbs-down from a feedback UI. Independent of the success/error path. | -| `track_tool_call(name)` / `trackToolCall(name)` | any | Record a single tool invocation by name. Available on both SDKs as of Python v0.18.0 / Node v0.17.0. | +| `track_tool_call(name)` / `trackToolCall(name)` | any | Record a single tool invocation by name. Available on both SDKs. | | `track_tool_calls([names])` / `trackToolCalls([names])` | any | Batch variant — record a list of tool invocations in one call. | -| `track_judge_result(result)` / `trackJudgeResult(result)` | any | Record a programmatic judge evaluation (consolidates the earlier `track_eval_scores` + `track_judge_response` pair). `result.sampled` indicates whether evaluation ran. | -| `track_openai_metrics(fn)` / `trackOpenAIMetrics(fn)` | **legacy** | Predates provider packages. Still works; do not use in new code. Replace with `trackMetricsOf(OpenAIProvider.getAIMetricsFromResponse, fn)`. | -| `track_bedrock_converse_metrics(res)` / `trackBedrockConverseMetrics(res)` | **legacy** | Same story. Do not use in new code. | -| `trackVercelAISDKGenerateTextMetrics(fn)` (Node) | **legacy** | Same story. Use `trackMetricsOf` with the Vercel provider package's extractor. | +| `track_judge_result(result)` / `trackJudgeResult(result)` | any | Record a programmatic judge evaluation. `result.sampled` indicates whether evaluation ran. | ## Related skills diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md index ea1e1ab..11611da 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md @@ -4,13 +4,13 @@ Three viable paths, in order of preference: -1. **Route Anthropic through LangChain.** If the app already uses LangChain (or can adopt it cheaply), install the LangChain provider package and use it as Tier 2. LangChain's `ChatAnthropic` wrapper exposes the standardized `usage_metadata` that `LangChainProvider.getAIMetricsFromResponse` reads. +1. **Route Anthropic through LangChain.** If the app already uses LangChain (or can adopt it cheaply), install the LangChain provider package and use it as Tier 2. LangChain's `ChatAnthropic` wrapper exposes the standardized `usage_metadata` that `getAIMetricsFromResponse` reads. 2. **Route Anthropic through Bedrock Converse.** If the app can switch to Bedrock Converse (Claude is available on Bedrock), you inherit Bedrock's Converse response shape and a custom-extractor pattern that's slightly cleaner. See [bedrock-tracking.md](bedrock-tracking.md). 3. **Custom extractor on the direct SDK** (this file's primary pattern). ## Tier 1 is not available -`ManagedModel` / `TrackedChat` do not currently ship an Anthropic provider. If you need Tier 1 for a chat app, use option 1 or 2 above — the LangChain provider package lets `ManagedModel` wrap a `ChatAnthropic` under the hood, which restores the zero-tracker-call experience. +`ManagedModel` does not currently ship an Anthropic provider. If you need Tier 1 for a chat app, use option 1 or 2 above — the LangChain provider package lets `ManagedModel` wrap a `ChatAnthropic` under the hood, which restores the zero-tracker-call experience. ## Tier 3 — Custom extractor + `trackMetricsOf` (primary) @@ -66,7 +66,7 @@ const client = new Anthropic(); const anthropicExtractor = (response: Anthropic.Message): LDAIMetrics => ({ success: true, - usage: { + tokens: { total: response.usage.input_tokens + response.usage.output_tokens, input: response.usage.input_tokens, output: response.usage.output_tokens, @@ -81,7 +81,7 @@ async function callWithTracking( const systemContent = aiConfig.messages?.[0]?.content ?? ''; - const tracker = aiConfig.createTracker!(); + const tracker = aiConfig.createTracker(); // Exceptions are tracked automatically: trackMetricsOf catches exceptions, // records tracker.trackError(), and re-throws. Do NOT add // catch (err) { tracker.trackError(); throw err } on top — it's a noop @@ -108,18 +108,18 @@ Notes on the extractor shape: ## Tier 2 option — route via LangChain -If the app can adopt LangChain, the LangChain provider package handles Anthropic (via `@langchain/anthropic`) through the same `trackMetricsOf(LangChainProvider.getAIMetricsFromResponse, ...)` pattern used for any other LangChain model. This is often the cleanest answer if the app already uses or is open to LangChain, because the extractor is built in and shared with every other LangChain-wrapped model. +If the app can adopt LangChain, the LangChain provider package handles Anthropic (via `@langchain/anthropic`) through the same `trackMetricsOf(getAIMetricsFromResponse, ...)` pattern used for any other LangChain model. This is often the cleanest answer if the app already uses or is open to LangChain, because the extractor is built in and shared with every other LangChain-wrapped model. ```python -from ldai_langchain import LangChainProvider +from ldai_langchain import create_langchain_model, get_ai_metrics_from_response ai_config = ai_client.completion_config("my-config-key", context, default_config) -llm = await LangChainProvider.create_langchain_model(ai_config) # ChatAnthropic under the hood +llm = create_langchain_model(ai_config) # ChatAnthropic under the hood tracker = ai_config.create_tracker() response = tracker.track_metrics_of( lambda: llm.invoke(messages), - LangChainProvider.get_ai_metrics_from_response, + get_ai_metrics_from_response, ) ``` diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md index 193acb0..288f73a 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md @@ -7,7 +7,7 @@ ## Tier 1 is not available -Neither `ManagedModel` (Python) nor `TrackedChat` / `initChat` (Node) ship a Bedrock provider today. If you want Tier 1 for a Bedrock chat app, route via LangChain — `ManagedModel` can wrap a `ChatBedrockConverse` through the LangChain provider package. +`ManagedModel` does not ship a Bedrock provider today (Python or Node). If you want Tier 1 for a Bedrock chat app, route via LangChain — `ManagedModel` can wrap a `ChatBedrockConverse` through the LangChain provider package. ## Tier 3 — Custom extractor + `trackMetricsOf` (primary) @@ -64,7 +64,7 @@ const bedrock = new BedrockRuntimeClient({}); const bedrockConverseExtractor = (response: ConverseCommandOutput): LDAIMetrics => ({ success: true, - usage: { + tokens: { total: response.usage?.totalTokens ?? 0, input: response.usage?.inputTokens ?? 0, output: response.usage?.outputTokens ?? 0, @@ -79,7 +79,7 @@ async function callWithTracking( const systemContent = aiConfig.messages?.[0]?.content; - const tracker = aiConfig.createTracker!(); + const tracker = aiConfig.createTracker(); // Exceptions are tracked automatically — trackMetricsOf catches // exceptions, records tracker.trackError(), and re-throws. const response = await tracker.trackMetricsOf( @@ -123,29 +123,20 @@ This is a good reason to migrate to Converse if you can. If the app uses LangChain, the LangChain provider package's `ChatBedrockConverse` support gives you the Tier-2 experience: ```python -from ldai_langchain import LangChainProvider +from ldai_langchain import create_langchain_model, get_ai_metrics_from_response ai_config = ai_client.completion_config("my-config-key", context, default_config) -llm = await LangChainProvider.create_langchain_model(ai_config) # ChatBedrockConverse when provider=bedrock +llm = create_langchain_model(ai_config) # ChatBedrockConverse when provider=bedrock tracker = ai_config.create_tracker() response = tracker.track_metrics_of( lambda: llm.invoke(messages), - LangChainProvider.get_ai_metrics_from_response, + get_ai_metrics_from_response, ) ``` -LangChain normalizes the Converse response shape into `AIMessage.usage_metadata`, which `LangChainProvider.get_ai_metrics_from_response` reads — so you don't need a Bedrock-specific extractor. +LangChain normalizes the Converse response shape into `AIMessage.usage_metadata`, which `get_ai_metrics_from_response` reads — so you don't need a Bedrock-specific extractor. ## Tier 4 — Manual (streaming only) Bedrock Converse streaming (`ConverseStream`) needs manual TTFT tracking. The pattern is identical to OpenAI streaming. See [streaming-tracking.md](streaming-tracking.md). - -## Legacy: `track_bedrock_converse_metrics` / `trackBedrockConverseMetrics` - -Existing code may call `tracker.track_bedrock_converse_metrics(response)` directly. This helper still works and reads the same fields the custom extractor above reads. The current recommendation is to prefer `trackMetricsOf` with a custom extractor because: - -- It keeps the tracking call in one place (the wrapper) rather than requiring a separate post-call step, which is easy to forget. -- It captures duration automatically; the legacy helper does not, so existing code typically pairs it with `track_duration_of`, which drifts. - -**Do not introduce `track_bedrock_converse_metrics` in new code.** Leave it alone in existing code unless the user asks for a cleanup. diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md index f9d8d94..5ea1703 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md @@ -4,7 +4,7 @@ ## Tier 1 is not available -`ManagedModel` / `TrackedChat` do not currently ship a Gemini provider. If you need Tier 1 for a chat app, route via the LangChain provider package (`ChatGoogleGenerativeAI` under the hood), which restores the zero-tracker-call experience. See [langchain-tracking.md](langchain-tracking.md). +`ManagedModel` does not currently ship a Gemini provider. If you need Tier 1 for a chat app, route via the LangChain provider package (`ChatGoogleGenerativeAI` under the hood), which restores the zero-tracker-call experience. See [langchain-tracking.md](langchain-tracking.md). ## Tier 3 — Custom extractor + `trackMetricsOf` (primary) @@ -97,7 +97,7 @@ const geminiMetrics = (response: any): LDAIMetrics => { const usage = response.usageMetadata; return { success: true, - usage: usage + tokens: usage ? { total: usage.totalTokenCount ?? 0, input: usage.promptTokenCount ?? 0, @@ -147,7 +147,7 @@ async function callWithTracking( const params = (aiConfig.model?.parameters ?? {}) as Record; - const tracker = aiConfig.createTracker!(); + const tracker = aiConfig.createTracker(); // Exceptions are tracked automatically — trackMetricsOf catches // exceptions, records tracker.trackError(), and re-throws. const response = await tracker.trackMetricsOf( @@ -195,7 +195,7 @@ Tool handlers stay in your application code — LaunchDarkly stores the schema, ## Tier 2 option — route via LangChain -If the app can adopt LangChain, the LangChain provider package handles Gemini (via `@langchain/google-genai` / `langchain-google-genai`) through the standard `trackMetricsOf(LangChainProvider.getAIMetricsFromResponse, ...)` pattern. The provider package handles LaunchDarkly→LangChain provider-name mapping (for example, `"gemini"` → `"google_genai"`) and forwards all variation parameters automatically, so you do not need your own mapping helper. See [langchain-tracking.md](langchain-tracking.md). +If the app can adopt LangChain, the LangChain provider package handles Gemini (via `@langchain/google-genai` / `langchain-google-genai`) through the standard `trackMetricsOf(getAIMetricsFromResponse, ...)` pattern. The provider package handles LaunchDarkly→LangChain provider-name mapping (for example, `"gemini"` → `"google_genai"`) and forwards all variation parameters automatically, so you do not need your own mapping helper. See [langchain-tracking.md](langchain-tracking.md). ## Tier 4 — Manual (streaming only) diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md index 913836f..668d74c 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md @@ -3,15 +3,16 @@ LangChain is covered by a first-class LaunchDarkly provider package in both Python and Node. The same package is what LangGraph rides on — there is no separate LangGraph helper. - Python: `launchdarkly-server-sdk-ai-langchain` (imported as `ldai_langchain`) -- Node: `@launchdarkly/server-sdk-ai-langchain` (exports `LangChainProvider`) +- Node: `@launchdarkly/server-sdk-ai-langchain` Three helpers do the heavy lifting. Use them — skipping any silently drops value that the provider package would otherwise give you. | Helper | Purpose | |---|---| -| `create_langchain_model(config)` (Python) / `LangChainProvider.createLangChainModel(config)` (Node) | Build a LangChain chat model from the AI Config. Forwards **all** variation parameters (temperature, max_tokens, top_p, and so on), picks the correct LangChain chat class based on `config.provider.name`, and handles provider-name mapping internally (for example, LaunchDarkly's `"gemini"` → LangChain's `"google_genai"`). | +| `create_langchain_model(config)` (Python) / `createLangChainModel(config)` (Node, bare export) | Build a LangChain chat model from the AI Config. Forwards **all** variation parameters (temperature, max_tokens, top_p, and so on), picks the correct LangChain chat class based on `config.provider.name`, and handles provider-name mapping internally (for example, LaunchDarkly's `"gemini"` → LangChain's `"google_genai"`). | | `build_structured_tools(config, registry)` (Python, `ldai_langchain.langchain_helper`) | Read `config.model.parameters.tools` and wrap the matching entries in your `{name: callable}` registry as LangChain `StructuredTool` instances ready for `bind_tools`. This is the first-class replacement for hand-rolled `resolve_tools` / `TOOL_REGISTRY` / `ALL_TOOLS` patterns — it handles async callables via `coroutine=` and uses the LD tool key as the `StructuredTool.name`, so `ToolNode` lookup works without extra mapping. | -| `get_ai_metrics_from_response` (top-level import) / `LangChainProvider.getAIMetricsFromResponse` (Node class method) | Extract token usage from a LangChain response. Pass as the extractor argument to `track_metrics_of` / `trackMetricsOf`. Both import forms are supported in Node; the top-level import is how Python exposes it. | +| `get_ai_metrics_from_response` (Python top-level import) / `getAIMetricsFromResponse` (Node, bare export) | Extract token usage from a LangChain response. Pass as the extractor argument to `track_metrics_of` / `trackMetricsOf`. | +| `LangChainRunnerFactory` (Node) | Managed-runner factory: `new LangChainRunnerFactory().createModel(aiConfig)` wires the chat model into a `ManagedModel` that handles tracking end-to-end (Tier 1). | ## `model.parameters` vs `model.custom` — the biggest gotcha @@ -100,7 +101,11 @@ return completion.content **Node:** ```typescript -import { LangChainProvider } from '@launchdarkly/server-sdk-ai-langchain'; +import { + createLangChainModel, + convertMessagesToLangChain, + getAIMetricsFromResponse, +} from '@launchdarkly/server-sdk-ai-langchain'; import { HumanMessage } from '@langchain/core/messages'; const aiConfig = await aiClient.completionConfig('my-config-key', context); @@ -108,22 +113,22 @@ if (!aiConfig.enabled) return null; // createLangChainModel picks the right chat class (ChatOpenAI, ChatAnthropic, …) // and forwards all variation parameters. -const llm = await LangChainProvider.createLangChainModel(aiConfig); +const llm = await createLangChainModel(aiConfig); -const messages = LangChainProvider.convertMessagesToLangChain(aiConfig.messages ?? []); +const messages = convertMessagesToLangChain(aiConfig.messages ?? []); messages.push(new HumanMessage(userPrompt)); -const tracker = aiConfig.createTracker!(); +const tracker = aiConfig.createTracker(); // Exceptions are tracked automatically — trackMetricsOf catches // exceptions, records tracker.trackError(), and re-throws. const completion = await tracker.trackMetricsOf( - LangChainProvider.getAIMetricsFromResponse, + getAIMetricsFromResponse, () => llm.invoke(messages), ); return completion.content; ``` -Both `create_langchain_model` and `LangChainProvider.createLangChainModel` raise at model-creation time if the matching LangChain provider integration is not installed. For example, if the variation's `provider.name` is `anthropic`, your environment needs `langchain-anthropic` (Python) or `@langchain/anthropic` (Node). The error surface is LangChain's, not LaunchDarkly's — install the missing integration and re-run. +Both `create_langchain_model` (Python) and `createLangChainModel` (Node) raise at model-creation time if the matching LangChain provider integration is not installed. For example, if the variation's `provider.name` is `anthropic`, your environment needs `langchain-anthropic` (Python) or `@langchain/anthropic` (Node). The error surface is LangChain's, not LaunchDarkly's — install the missing integration and re-run. ### Why not `init_chat_model` + a custom provider-name mapping helper? @@ -131,7 +136,7 @@ You will see examples in the wild that build the model by hand with `init_chat_m ## Tier 2 — LangGraph (agent workflows) -LangGraph's prebuilt agent takes a model, tools, and a system prompt. Build the model with `create_langchain_model` (Python) or `LangChainProvider.createLangChainModel` (Node) and pass it in. The tracker wraps the whole agent invocation; the extractor aggregates token usage across every message the agent produced, and tool-call telemetry is read off the result after the wrapped call returns. +LangGraph's prebuilt agent takes a model, tools, and a system prompt. Build the model with `create_langchain_model` (Python) or `createLangChainModel` (Node) and pass it in. The tracker wraps the whole agent invocation; the extractor aggregates token usage across every message the agent produced, and tool-call telemetry is read off the result after the wrapped call returns. > **API note (Python).** Use `from langchain.agents import create_agent`. The earlier `from langgraph.prebuilt import create_react_agent` is deprecated in LangGraph 1.0 and removed in 2.0 — same return shape; the only call-site rename is `prompt=` → `system_prompt=`. Node still uses `createReactAgent` from `@langchain/langgraph/prebuilt`. @@ -188,7 +193,10 @@ except Exception as e: **Node** — same pattern with `trackMetricsOf` + a custom aggregator: ```typescript -import { LangChainProvider } from '@launchdarkly/server-sdk-ai-langchain'; +import { + createLangChainModel, + getAIMetricsFromResponse, +} from '@launchdarkly/server-sdk-ai-langchain'; import type { LDAIMetrics } from '@launchdarkly/server-sdk-ai'; import { createReactAgent } from '@langchain/langgraph/prebuilt'; import { MemorySaver } from '@langchain/langgraph'; @@ -196,7 +204,7 @@ import { MemorySaver } from '@langchain/langgraph'; const agentConfig = await aiClient.agentConfig('my-agent-key', context); if (!agentConfig.enabled) return null; -const llm = await LangChainProvider.createLangChainModel(agentConfig); +const llm = await createLangChainModel(agentConfig); const checkpointer = new MemorySaver(); const agent = createReactAgent({ llm, @@ -209,19 +217,19 @@ const agent = createReactAgent({ const langgraphMetrics = (result: any): LDAIMetrics => { let input = 0, output = 0, total = 0; for (const message of result.messages ?? []) { - const m = LangChainProvider.getAIMetricsFromResponse(message); - if (m.usage) { - input += m.usage.input ?? 0; - output += m.usage.output ?? 0; - total += m.usage.total ?? 0; + const m = getAIMetricsFromResponse(message); + if (m.tokens) { + input += m.tokens.input ?? 0; + output += m.tokens.output ?? 0; + total += m.tokens.total ?? 0; } } - return { success: true, usage: total > 0 ? { input, output, total } : undefined }; + return { success: true, tokens: total > 0 ? { input, output, total } : undefined }; }; // trackMetricsOf records duration + success/error itself; do not call // trackError after this — it would be a redundant second event. -const agentTracker = agentConfig.createTracker!(); +const agentTracker = agentConfig.createTracker(); const result = await agentTracker.trackMetricsOf( langgraphMetrics, () => agent.invoke( @@ -230,8 +238,7 @@ const result = await agentTracker.trackMetricsOf( ), ); -// Tool-call telemetry: walk the result messages. Once the JS SDK ships -// `LangChainProvider.getToolCallsFromResponse`, this collapses to one helper call. +// Tool-call telemetry: walk the result messages. for (const msg of result.messages ?? []) { for (const tc of (msg as any).tool_calls ?? []) { agentTracker.trackToolCall(tc.name); diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md index b330fc9..017239f 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md @@ -31,7 +31,7 @@ async def handle_turn(ai_client: LDAIClient, context: Context, user_input: str) return response.message.content ``` -**Node** — `TrackedChat` via `aiClient.initChat()`: +**Node** — `ManagedModel` via `aiClient.createModel()`: ```typescript import { init } from '@launchdarkly/node-server-sdk'; @@ -41,7 +41,7 @@ const ldClient = init(process.env.LD_SDK_KEY!); const aiClient = initAi(ldClient); async function handleTurn(context: LDContext, userInput: string): Promise { - const chat = await aiClient.initChat( + const model = await aiClient.createModel( 'customer-support-chat', context, { @@ -51,13 +51,13 @@ async function handleTurn(context: LDContext, userInput: string): Promise client.chat.completions.create({ model: aiConfig.model!.name, messages: [ @@ -135,9 +135,9 @@ return response.choices[0].message.content; **Error handling.** `trackMetricsOf` catches exceptions internally, records `trackError()` on the tracker, and re-throws — so you do **not** need a try/catch block that calls `trackError()` yourself. Call the wrapper directly; if the caller wants to log or handle the exception, do that in addition to (not instead of) letting it propagate: ```typescript -const tracker = aiConfig.createTracker!(); +const tracker = aiConfig.createTracker(); const response = await tracker.trackMetricsOf( - OpenAIProvider.getAIMetricsFromResponse, + getAIMetricsFromResponse, () => client.chat.completions.create({ /* ... */ }), ); return response.choices[0].message.content; @@ -169,7 +169,3 @@ response = tracker.track_metrics_of(call_openai, my_openai_extractor) ## Tier 4 — Manual (streaming only) For OpenAI streaming calls you need manual tracking because the current provider packages don't capture TTFT. See [streaming-tracking.md](streaming-tracking.md) for the full pattern. The short version: the helper that looks like it should work (`trackStreamMetricsOf` in Node) captures tokens from stream chunks but does not record TTFT, so you still need a manual `trackTimeToFirstToken` call on the first content chunk. - -## Legacy: `track_openai_metrics` / `trackOpenAIMetrics` - -You may see existing code that calls `config.tracker.track_openai_metrics(lambda: openai.chat.completions.create(...))` or the Node equivalent. (Note: the `config.tracker` property itself was removed in Python v0.18.0 / Node v0.17.0 in favor of the `create_tracker()` / `createTracker()` factory — that code is pre-0.17/0.18 and will need updating regardless.) These helpers still work but are no longer the recommended pattern — they predate the provider packages and the generic `trackMetricsOf` + `getAIMetricsFromResponse` composition. **Do not introduce them in new code.** If you're migrating an existing codebase, leave them in place unless the user has specifically asked for a cleanup pass — the migration from the legacy helper to the new pattern is mechanical but not free. diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md index 0d99a11..4a99634 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md @@ -8,7 +8,7 @@ The key names inside `accumulated_usage` are camelCase even in Python: `inputTok ## Tier 1 is not available -`ManagedModel` / `TrackedChat` do not currently ship a Strands runner. Strands owns its own agent loop and short-term memory (`SlidingWindowConversationManager`), so wrapping it in a LaunchDarkly managed runner would fight against the framework. Stay on Tier 3. +`ManagedModel` does not currently ship a Strands runner. Strands owns its own agent loop and short-term memory (`SlidingWindowConversationManager`), so wrapping it in a LaunchDarkly managed runner would fight against the framework. Stay on Tier 3. ## Tier 3 — Explicit `track_duration_of` + manual `track_tokens` (primary) diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/streaming-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/streaming-tracking.md index 38e2c87..b026c8c 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/streaming-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/streaming-tracking.md @@ -106,7 +106,7 @@ async function callStreamingWithTracking( ): Promise { if (!aiConfig.enabled) return null; - const tracker = aiConfig.createTracker!(); + const tracker = aiConfig.createTracker(); const startTime = Date.now(); let firstTokenTime: number | null = null; @@ -162,14 +162,14 @@ async function callStreamingWithTracking( If the app doesn't need TTFT, the Node SDK has a built-in streaming wrapper that handles tokens + success/error + duration: ```typescript -const tracker = aiConfig.createTracker!(); +const tracker = aiConfig.createTracker(); const response = await tracker.trackStreamMetricsOf( (chunks) => { // Extract usage from the final chunk const final = chunks[chunks.length - 1]; return { success: true, - usage: { + tokens: { total: final.usage?.total_tokens ?? 0, input: final.usage?.prompt_tokens ?? 0, output: final.usage?.completion_tokens ?? 0, diff --git a/skills/ai-configs/aiconfig-migrate/SKILL.md b/skills/ai-configs/aiconfig-migrate/SKILL.md index 2577957..f714148 100644 --- a/skills/ai-configs/aiconfig-migrate/SKILL.md +++ b/skills/ai-configs/aiconfig-migrate/SKILL.md @@ -31,7 +31,7 @@ The skill is optimized for Python and Node.js / TypeScript; other languages are | Shape | Python | Node.js | Reference | |-------|--------|---------|-----------| | One-shot completion (direct OpenAI / Anthropic / Bedrock / Gemini call) | ✅ Worked example | ✅ Worked example | [before-after-examples.md](references/before-after-examples.md), per-provider docs in `aiconfig-ai-metrics/references/` | -| Chat loop via managed runner (`ManagedModel` / `TrackedChat`) | ✅ Tier 1 pattern | ✅ Tier 1 pattern | [aiconfig-ai-metrics SKILL.md](../aiconfig-ai-metrics/SKILL.md) | +| Chat loop via managed runner (`ManagedModel`) | ✅ Tier 1 pattern | ✅ Tier 1 pattern | [aiconfig-ai-metrics SKILL.md](../aiconfig-ai-metrics/SKILL.md) | | LangChain single-call | ✅ Worked example | ✅ Worked example | [langchain-tracking.md](../aiconfig-ai-metrics/references/langchain-tracking.md) | | LangGraph prebuilt agent (Python `langchain.agents.create_agent`, Node `createReactAgent`) | ✅ Worked example | ✅ Worked example | [agent-mode-frameworks.md § LangGraph](references/agent-mode-frameworks.md) | | LangGraph custom `StateGraph` with run-scoped tracker (setup_run + call_model + finalize) | ✅ Deep worked example | ⚠️ Mentioned — translate from Python | [agent-mode-frameworks.md § Custom `StateGraph`](references/agent-mode-frameworks.md) | @@ -146,7 +146,7 @@ This is the first stage that writes code. It has nine sub-steps. 2. **Install the AI SDK.** Detect the package manager from Step 1, then install: - Python: `launchdarkly-server-sdk` + `launchdarkly-server-sdk-ai>=0.18.0` - - Node.js/TypeScript: `@launchdarkly/node-server-sdk` + `@launchdarkly/server-sdk-ai@^0.17.0` + - Node.js/TypeScript: `@launchdarkly/node-server-sdk` + `@launchdarkly/server-sdk-ai@^0.20.0` - Go: `github.com/launchdarkly/go-server-sdk/v7` + `github.com/launchdarkly/go-server-sdk/ldai` Tier-2 provider packages (install in Stage 4, only if you're using the matching provider): @@ -347,15 +347,15 @@ Delegate: **`aiconfig-ai-metrics`** wires the per-request `tracker.track_*` call Hand off: print the AI Config key, variation key, provider, and whether the call is streaming, then tell the user: *"Run `/aiconfig-ai-metrics` with these inputs, then come back here."* Do not auto-invoke. Return here for sub-step 5 (verify) once they're done. -1. **Create the tracker.** Obtain a per-execution tracker via the factory on the config returned in Stage 2: `tracker = config.create_tracker()` (Python v0.18.0+) or `const tracker = aiConfig.createTracker!();` (Node v0.17.0+). Call the factory **once per user turn** and reuse the returned `tracker` for every tracking call in that turn — each call mints a fresh `runId` that tags every event emitted from the turn so they can be correlated via exported events or downstream queries. (The Monitoring tab aggregates today; run-level grouping is a downstream concern — but the `runId` is also what the SDK's at-most-once guards are keyed on, so minting a new one mid-turn breaks the guard semantics regardless of where the events end up.) +1. **Create the tracker.** Obtain a per-execution tracker via the factory on the config returned in Stage 2: `tracker = config.create_tracker()` (Python) or `const tracker = aiConfig.createTracker();` (Node). Call the factory **once per user turn** and reuse the returned `tracker` for every tracking call in that turn — each call mints a fresh `runId` that tags every event emitted from the turn so they can be correlated via exported events or downstream queries. (The Monitoring tab aggregates today; run-level grouping is a downstream concern — but the `runId` is also what the SDK's at-most-once guards are keyed on, so minting a new one mid-turn breaks the guard semantics regardless of where the events end up.) **Where to call the factory depends on the call shape:** - **Completion mode / one-shot provider call:** mint the tracker right after `completion_config(...)` returns, in the same function that handles the request. - - **Agent mode with a ReAct loop (LangGraph, LangChain, custom):** mint the tracker in a dedicated `setup_run` entry node that executes **once** before the loop, stash it on graph state, and read it from state in `call_model` / tool handlers / a terminal `finalize` node. Emitting `track_duration` / `track_tokens` / `track_success` inside the loop body will trip the at-most-once guards in v0.18.0+. See [agent-mode-frameworks.md § Custom `StateGraph` (run-scoped architecture)](references/agent-mode-frameworks.md) for the full `setup_run` + `call_model` + `finalize` pattern. - - **Managed runner (Tier 1):** skip this step entirely. `ManagedModel` / `TrackedChat` mint the tracker internally per `invoke()`. Move to sub-step 4 if that's what the app uses. + - **Agent mode with a ReAct loop (LangGraph, LangChain, custom):** mint the tracker in a dedicated `setup_run` entry node that executes **once** before the loop, stash it on graph state, and read it from state in `call_model` / tool handlers / a terminal `finalize` node. Emitting `track_duration` / `track_tokens` / `track_success` inside the loop body will trip the at-most-once guards. See [agent-mode-frameworks.md § Custom `StateGraph` (run-scoped architecture)](references/agent-mode-frameworks.md) for the full `setup_run` + `call_model` + `finalize` pattern. + - **Managed runner (Tier 1):** skip this step entirely. `ManagedModel` mints the tracker internally per `run()` / `invoke()`. Move to sub-step 4 if that's what the app uses. -2. **Pick a tier from the four-tier ladder.** See [sdk-ai-tracker-patterns.md § Tier decision table](references/sdk-ai-tracker-patterns.md) for the full table (chat loop → Tier 1; provider-package call → Tier 2; custom extractor → Tier 3; streaming/manual → Tier 4). **Do not introduce the legacy helpers** (`track_openai_metrics`, `track_bedrock_converse_metrics`, `trackVercelAISDKGenerateTextMetrics`) in new code — use `trackMetricsOf` + a provider-package extractor instead. +2. **Pick a tier from the four-tier ladder.** See [sdk-ai-tracker-patterns.md § Tier decision table](references/sdk-ai-tracker-patterns.md) for the full table (chat loop → Tier 1; provider-package call → Tier 2; custom extractor → Tier 3; streaming/manual → Tier 4). 3. **Wire the chosen tier.** The delegate skill has full Python + Node examples for each tier plus per-provider files. A condensed Tier 2/3 example for reference — OpenAI via the provider package: @@ -386,13 +386,13 @@ Hand off: print the AI Config key, variation key, provider, and whether the call **Node:** ```typescript - import { OpenAIProvider } from '@launchdarkly/server-sdk-ai-openai'; + import { getAIMetricsFromResponse } from '@launchdarkly/server-sdk-ai-openai'; - const tracker = aiConfig.createTracker!(); + const tracker = aiConfig.createTracker(); // Exceptions are tracked automatically — trackMetricsOf catches // exceptions, records tracker.trackError(), and re-throws. const response = await tracker.trackMetricsOf( - OpenAIProvider.getAIMetricsFromResponse, + getAIMetricsFromResponse, () => openaiClient.chat.completions.create({ model: aiConfig.model!.name, messages: [...aiConfig.messages, { role: 'user', content: userPrompt }], @@ -400,7 +400,7 @@ Hand off: print the AI Config key, variation key, provider, and whether the call ); ``` - For Anthropic direct, Bedrock (no provider package), Gemini, and custom HTTP, write a small extractor returning `LDAIMetrics` — see the delegate skill's [anthropic-tracking.md](../aiconfig-ai-metrics/references/anthropic-tracking.md), [bedrock-tracking.md](../aiconfig-ai-metrics/references/bedrock-tracking.md), and [gemini-tracking.md](../aiconfig-ai-metrics/references/gemini-tracking.md). LangChain single-node and LangGraph go through the `launchdarkly-server-sdk-ai-langchain` / `@launchdarkly/server-sdk-ai-langchain` provider package. Build the model with `create_langchain_model(config)` / `LangChainProvider.createLangChainModel(config)` (forwards all variation parameters) and track with `get_ai_metrics_from_response` / `LangChainProvider.getAIMetricsFromResponse`. See [langchain-tracking.md](../aiconfig-ai-metrics/references/langchain-tracking.md). + For Anthropic direct, Bedrock (no provider package), Gemini, and custom HTTP, write a small extractor returning `LDAIMetrics` — see the delegate skill's [anthropic-tracking.md](../aiconfig-ai-metrics/references/anthropic-tracking.md), [bedrock-tracking.md](../aiconfig-ai-metrics/references/bedrock-tracking.md), and [gemini-tracking.md](../aiconfig-ai-metrics/references/gemini-tracking.md). LangChain single-node and LangGraph go through the `launchdarkly-server-sdk-ai-langchain` / `@launchdarkly/server-sdk-ai-langchain` provider package. Build the model with `create_langchain_model(config)` (Python) / `createLangChainModel(config)` (Node) — both forward all variation parameters — and track with `get_ai_metrics_from_response` / `getAIMetricsFromResponse`. See [langchain-tracking.md](../aiconfig-ai-metrics/references/langchain-tracking.md). 4. **Wire feedback tracking if the app has thumbs-up/down UI.** Both SDKs expose `trackFeedback` with a `{kind}` argument. @@ -449,7 +449,7 @@ Hand off: print the AI Config key, variation key, provider, and whether the call The delegate handles creating custom judge AI Configs, attaching them via the variation PATCH endpoint, and setting fallthrough on each judge config. Offline eval does **not** go through this delegate — it's a Playground workflow, not an API write. -4. **For programmatic direct-judge: wire `create_judge` + `evaluate` + `track_judge_result`.** This is the only path at Stage 5 that writes code. The correct shape (Python v0.18.0+): +4. **For programmatic direct-judge: wire `create_judge` + `evaluate` + `track_judge_result`.** This is the only path at Stage 5 that writes code. The Python shape: ```python from ldai.client import AIJudgeConfigDefault @@ -474,7 +474,7 @@ Hand off: print the AI Config key, variation key, provider, and whether the call - **`create_judge` returns `Optional[Judge]`.** Always guard with `if judge and judge.enabled:` — it returns `None` if the judge AI Config is disabled for the context or the provider is missing. A direct `.evaluate()` on a `None` return will raise `AttributeError`. - **Pass `AIJudgeConfigDefault`**, not `AICompletionConfigDefault`. The `create_judge` `default` parameter is typed `Optional[AIJudgeConfigDefault]`; passing the completion type will not type-check and is a doc-level bug in some older examples. - **`sampling_rate` is a parameter on `evaluate()`**, not on `create_judge`. It defaults to `1.0` (evaluate every call). For live paths, pass something lower (0.1–0.25) to control cost. - - **`evaluate()` always returns a `JudgeResult` in v0.18.0+** (never `None`). Check `result.sampled` to know whether the evaluation actually ran, and call `track_judge_result(result)` — the consolidated method replaces the earlier `track_eval_scores` / `track_judge_response` pair. Node uses `trackJudgeResult(result)` and `LDJudgeResult` with the same `sampled` field. + - **`evaluate()` returns a `JudgeResult`** (never `None`). Check `result.sampled` to know whether the evaluation actually ran, and call `track_judge_result(result)`. Node uses `trackJudgeResult(result)` and `LDJudgeResult` with the same `sampled` field. **Ask the user which judge AI Config key to use.** LaunchDarkly ships three built-in judges — Accuracy, Relevance, Toxicity — but the actual AI Config **keys** for the built-ins are not canonical SDK constants and aren't documented. Have the user open **AI Configs > Library** in the LD UI and copy the key of the judge they want to reference, or create a custom judge AI Config via `aiconfig-create` first. @@ -490,7 +490,7 @@ Delegate: **`aiconfig-online-evals`** (sub-step 3, optional — only for UI-atta | Situation | Action | |-----------|--------| | App already initializes `LDClient` for feature flags | Reuse it — pass the existing client to `LDAIClient()` / `initAi()`, do not create a second client | -| App uses LangChain `ChatOpenAI(model=...)` | Replace the hand-rolled model construction with `create_langchain_model(config)` (Python) or `LangChainProvider.createLangChainModel(config)` (Node). Do not read `config.model.name` and pass it to `ChatOpenAI(model=...)` by hand — that pattern drops every variation parameter except the ones you explicitly name | +| App uses LangChain `ChatOpenAI(model=...)` | Replace the hand-rolled model construction with `create_langchain_model(config)` (Python) or `createLangChainModel(config)` (Node). Do not read `config.model.name` and pass it to `ChatOpenAI(model=...)` by hand — that pattern drops every variation parameter except the ones you explicitly name | | Retry wrapper around the provider call | The tracker is minted once at the top of the user turn; the retry loop is inside that scope. Every retry attempt shares the same `runId`. Tracker calls (`track_duration` / `track_tokens` / `track_success` / `track_error`) live *outside* the retry body — one call at the end of the turn, on the success path or the final-failure path | | App has no tools — Stage 3 skipped | Move directly from Stage 2 verification to Stage 4 (tracking) | | Mode mismatch: user said agent, audit shows one-shot chat | Choose completion mode unless the app uses a LangGraph prebuilt agent (`langchain.agents.create_agent` in Python or `createReactAgent` in Node), CrewAI `Agent`, Strands `Agent`, or a similar goal-driven framework | @@ -498,7 +498,7 @@ Delegate: **`aiconfig-online-evals`** (sub-step 3, optional — only for UI-atta | Strands app on TypeScript | TS SDK ships `BedrockModel` and `OpenAIModel` only — cannot serve Anthropic-backed variations. Use the Python SDK if multi-provider variations are required | | TypeScript app using Anthropic SDK | No `trackAnthropicMetrics` helper exists. Use Tier 3: `trackMetricsOf` with a small custom extractor that reads `response.usage.input_tokens` / `response.usage.output_tokens` and returns `LDAIMetrics`. See [anthropic-tracking.md](../aiconfig-ai-metrics/references/anthropic-tracking.md) in the `aiconfig-ai-metrics` skill for the exact extractor | | Fallback would silently crash because `LD_SDK_KEY` is missing | Log a startup warning; proceed with the fallback. Never raise at import time | -| Multi-agent graph (supervisor + workers) | Stop after migrating a single agent. Agent Graph Definitions landed in **both** SDKs — Python via `launchdarkly-server-sdk-ai.agent_graph` and Node via the graph API added in `@launchdarkly/server-sdk-ai` v0.17.0. Read [agent-graph-reference.md](references/agent-graph-reference.md) for the graph-level migration path — it is deliberately out of this skill's main scope | +| Multi-agent graph (supervisor + workers) | Stop after migrating a single agent. Agent Graph Definitions are available in **both** SDKs — Python via `launchdarkly-server-sdk-ai.agent_graph` and Node via the graph API in `@launchdarkly/server-sdk-ai`. Read [agent-graph-reference.md](references/agent-graph-reference.md) for the graph-level migration path — it is deliberately out of this skill's main scope | | Single-agent (ReAct, tool loop) + agent mode | Default to offline eval via the LD Playground + Datasets for Stage 5. UI-attached judges are completion-only today, and programmatic direct-judge adds per-call cost that is usually not worth it until after the migration is live and stable. Point at the [Offline Evals guide](https://docs.launchdarkly.com/guides/ai-configs/offline-evaluations) | | Tool with a Pydantic `args_schema` (LangChain `@tool`) | Extract the schema via `tool.args_schema.model_json_schema()`; do not hand-write the JSON schema for the delegate | | Custom `StateGraph` with module-level `TOOLS` list bound via `.bind_tools(TOOLS)` and run through `ToolNode(TOOLS)` (e.g. the `langchain-ai/react-agent` template) | Find the `TOOLS` list (usually in a separate `tools.py` module). Extract schemas the same way. Swap **both** call sites — `.bind_tools(...)` and `ToolNode(...)` — to read from the same `config.tools`-derived list | @@ -515,7 +515,7 @@ These are ordered by how likely they are to show up as a first-run failure. The - **Don't call `agent_config()` / `completion_config()` more than once per user turn.** Each call is a flag evaluation and emits a `$ld:ai:agent:config` event. Re-fetching inside a loop step or a tool body inflates agent-config counts on the Monitoring tab and lets a mid-turn targeting change swap the variation between LLM calls in a single turn. Resolve once at the top, stash on state, and have every subsequent consumer read from state. Tools that need variation-scoped knobs should use the tool-factory pattern (`make_search(ai_config)` that closes over the knob at setup time) — see [agent-mode-frameworks.md § Getting knobs into tools](references/agent-mode-frameworks.md). - Don't cache the config object *across* requests — resolve once per turn, yes, but still resolve once per turn. Caching at module scope defeats the targeting-change mechanism entirely. - Don't delete the fallback once LaunchDarkly is wired up. It is required for the `enabled=False` and SDK-unreachable paths. -- Don't tuple-unpack the return of `completion_config` / `agent_config` / `completionConfig` / `agentConfig`. They return a **single** config object (e.g. `AIAgentConfig`, `AICompletionConfig`), not `(config, tracker)`. Obtain the tracker by calling `config.create_tracker()` / `aiConfig.createTracker!()`. LLMs hallucinate both the tuple shape and the earlier `config.tracker` property — the current API (Python v0.18.0+, Node v0.17.0+) is a factory. +- Don't tuple-unpack the return of `completion_config` / `agent_config` / `completionConfig` / `agentConfig`. They return a **single** config object (e.g. `AIAgentConfig`, `AICompletionConfig`), not `(config, tracker)`. Obtain the tracker by calling `config.create_tracker()` / `aiConfig.createTracker()`. LLMs hallucinate both the tuple shape and a `config.tracker` property — the actual API is a factory. ### LangChain / LangGraph patterns (second most common failure mode) @@ -546,7 +546,7 @@ These are ordered by how likely they are to show up as a first-run failure. The - Don't use `launchdarkly-metric-instrument` for Stage 4 (tracking). That skill is for `ldClient.track()` feature metrics, not AI `tracker.track_*` calls — they are different APIs. - Don't use `track_request()` in Python — it does not exist in `launchdarkly-server-sdk-ai`. Use `track_metrics_of` with a provider-package or custom extractor, or drop to explicit `track_duration` + `track_tokens` + `track_success` / `track_error` if you're on the streaming path. -- Don't pass `graph_key=...` to `tracker.track_*()` methods in Python — that keyword argument was removed in v0.18.0. Trackers obtained inside a graph traversal are automatically configured with the correct graph key. +- Don't pass `graph_key=...` to `tracker.track_*()` methods in Python — it is not an accepted argument. Trackers obtained inside a graph traversal are automatically configured with the correct graph key. ## Related Skills diff --git a/skills/ai-configs/aiconfig-migrate/references/agent-mode-frameworks.md b/skills/ai-configs/aiconfig-migrate/references/agent-mode-frameworks.md index 6843975..b986441 100644 --- a/skills/ai-configs/aiconfig-migrate/references/agent-mode-frameworks.md +++ b/skills/ai-configs/aiconfig-migrate/references/agent-mode-frameworks.md @@ -18,14 +18,14 @@ Agent mode returns an `instructions` string. Completion mode returns a `messages **Caveat:** judges cannot be attached to agent-mode variations via the LaunchDarkly UI. Agent mode evaluations must go through the programmatic judge API (`create_judge(...).evaluate(input, output)`). See `aiconfig-online-evals` for the programmatic path. -**Model construction for LangChain / LangGraph.** When the framework runs on top of LangChain (which includes LangGraph's prebuilt agent and most custom graphs), build the chat model with `create_langchain_model(ai_config)` (Python) or `LangChainProvider.createLangChainModel(aiConfig)` (Node). These helpers forward every variation parameter (`temperature`, `max_tokens`, `top_p`, …) and handle LaunchDarkly→LangChain provider-name mapping internally. Do not hand-roll `init_chat_model(model=..., model_provider=...)` — it silently drops every variation parameter. See [langchain-tracking.md](../../aiconfig-ai-metrics/references/langchain-tracking.md) for the canonical single-model and LangGraph patterns, including the SDK helpers `sum_token_usage_from_messages` / `get_tool_calls_from_response` (Python, `ldai_langchain`) used inside the `track_metrics_of_async` / `trackMetricsOf` extractor. +**Model construction for LangChain / LangGraph.** When the framework runs on top of LangChain (which includes LangGraph's prebuilt agent and most custom graphs), build the chat model with `create_langchain_model(ai_config)` (Python) or `createLangChainModel(aiConfig)` (Node). These helpers forward every variation parameter (`temperature`, `max_tokens`, `top_p`, …) and handle LaunchDarkly→LangChain provider-name mapping internally. Do not hand-roll `init_chat_model(model=..., model_provider=...)` — it silently drops every variation parameter. See [langchain-tracking.md](../../aiconfig-ai-metrics/references/langchain-tracking.md) for the canonical single-model and LangGraph patterns, including the SDK helpers `sum_token_usage_from_messages` / `get_tool_calls_from_response` (Python, `ldai_langchain`) used inside the `track_metrics_of_async` / `trackMetricsOf` extractor. ## Framework-agnostic invariants for the run-scoped pattern The concrete examples below use specific frameworks (LangGraph, CrewAI, Strands) and specific node names (`setup_run`, `call_model`, `finalize`). Treat those as incidentals. The three invariants below apply to **any** agent framework — DSPy, AutoGen, Pydantic AI, Haystack, LlamaIndex agents, or a hand-rolled tool loop in pure Python/TypeScript. If the framework has its own idioms, translate these three rules onto them: 1. **Resolve `agent_config()` / `agentConfig()` once per user turn.** Every call is a flag evaluation and emits a `$ld:ai:agent:config` event. Re-fetching inside a loop step or a tool body amplifies the event count per turn and lets a mid-turn targeting change swap the variation between LLM calls. Do the resolve at the highest scope that corresponds to "one user-input-to-final-response cycle" — a handler function, a LangGraph entry node, a CrewAI `kickoff`, whatever the framework exposes. -2. **Mint one tracker via `create_tracker()` / `createTracker!()` per user turn.** Same scope as the `agent_config` call. The `runId` ties every event from one turn together; per-step factory calls fragment the correlation and reset the SDK's at-most-once guards. If the framework has a multi-turn session (chat thread), each turn inside the session still gets its own fresh tracker — sessions share a `thread_id`, not a `runId`. +2. **Mint one tracker via `create_tracker()` / `createTracker()` per user turn.** Same scope as the `agent_config` call. The `runId` ties every event from one turn together; per-step factory calls fragment the correlation and reset the SDK's at-most-once guards. If the framework has a multi-turn session (chat thread), each turn inside the session still gets its own fresh tracker — sessions share a `thread_id`, not a `runId`. 3. **Emit the five at-most-once methods once at the end of the turn.** `track_duration` / `track_tokens` / `track_success` / `track_error` / `track_time_to_first_token` each fire at most once per tracker. Accumulate inside the loop body (sum token usage across steps, stash a `perf_counter_ns` timer up top), emit once after the loop exits or in a dedicated finalize node. `track_tool_calls` / `track_feedback` / `track_judge_result` are per-event — call them as many times as the agent does those things. What "one user turn" means differs by app shape: @@ -51,7 +51,7 @@ If the Stage 1 audit identified configuration that isn't a native model paramete - `enable_reranking`, `use_cache`, any boolean feature toggle the agent consumes - any value that governs **tool behavior** or **app behavior** rather than **model behavior** -`create_langchain_model` / `LangChainProvider.createLangChainModel` forwards every key in `parameters` wholesale to the provider SDK. Anthropic, OpenAI, and Gemini all raise on unknown kwargs — a `max_search_results` entry in `parameters` crashes the request with `AsyncMessages.create() got an unexpected keyword argument 'max_search_results'`. Put the same field in `custom` and the helper leaves it alone; the app reads it where it's needed. +`create_langchain_model` (Python) / `createLangChainModel` (Node) forwards every key in `parameters` wholesale to the provider SDK. Anthropic, OpenAI, and Gemini all raise on unknown kwargs — a `max_search_results` entry in `parameters` crashes the request with `AsyncMessages.create() got an unexpected keyword argument 'max_search_results'`. Put the same field in `custom` and the helper leaves it alone; the app reads it where it's needed. ```python # Fallback: mirror the hardcoded knob shape using custom @@ -337,13 +337,13 @@ builder.add_edge("tools", "call_model") graph = builder.compile() ``` -After — **run-scoped** architecture. The critical shape for the v0.18.0+ tracker factory is that **one user turn = one `runId` = one tracker**, not one LLM call = one tracker. A ReAct loop that calls `call_model` three times in a single turn must not mint three trackers, or billing and the Monitoring tab will treat the turn as three separate executions. The fix is to resolve the AI Config and mint the tracker once, in a dedicated entry node, and thread both through graph state for every subsequent node. +After — **run-scoped** architecture. The critical shape for the tracker factory is that **one user turn = one `runId` = one tracker**, not one LLM call = one tracker. A ReAct loop that calls `call_model` three times in a single turn must not mint three trackers, or billing and the Monitoring tab will treat the turn as three separate executions. The fix is to resolve the AI Config and mint the tracker once, in a dedicated entry node, and thread both through graph state for every subsequent node. Three nodes, in order: 1. **`setup_run`** (entry) — resolves `agent_config`, mints the tracker with `create_tracker()`, builds `model` with `create_langchain_model(ai_config)`, builds tools via the factory pattern below, starts a `perf_counter_ns()` timer, and stashes all of it on `State`. Runs exactly once per turn. 2. **`call_model`** — reads model / tools / tracker / accumulator from `State`, runs `model.ainvoke(...)`, accumulates token usage, calls `tracker.track_tool_calls([...])` per step. **Does not** call `track_metrics_of_async` here — that wrapper records duration + success on every invocation and would fire once per iteration. On exception: call `tracker.track_duration` + `tracker.track_error` and re-raise (the finalize node will not run). -3. **`finalize`** (terminal) — runs exactly once at the end of the turn on the success path. Calls `tracker.track_duration(elapsed_ms)` + `tracker.track_tokens(accumulated)` + `tracker.track_success()`. Each of these now fires exactly once per run, which is what the at-most-once guards in 0.18.0 enforce. +3. **`finalize`** (terminal) — runs exactly once at the end of the turn on the success path. Calls `tracker.track_duration(elapsed_ms)` + `tracker.track_tokens(accumulated)` + `tracker.track_success()`. Each of these fires exactly once per run, which is what the at-most-once guards enforce. ```python # tools.py — tool factories close over per-run config, so tools never re-fetch @@ -572,7 +572,7 @@ For apps built on `@langchain/langgraph`'s prebuilt `createReactAgent`, the loop ```typescript import { init } from '@launchdarkly/node-server-sdk'; import { initAi, type LDAIAgentConfig, type LDAIMetrics } from '@launchdarkly/server-sdk-ai'; -import { LangChainProvider } from '@launchdarkly/server-sdk-ai-langchain'; +import { createLangChainModel } from '@launchdarkly/server-sdk-ai-langchain'; import { createReactAgent } from '@langchain/langgraph/prebuilt'; import { MemorySaver } from '@langchain/langgraph'; @@ -593,7 +593,7 @@ function langgraphMetrics(result: any): LDAIMetrics { total += usage.total_tokens ?? usage.totalTokens ?? 0; } if (total === 0) total = input + output; - return { success: true, usage: total > 0 ? { input, output, total } : undefined }; + return { success: true, tokens: total > 0 ? { input, output, total } : undefined }; } async function runTurn(userInput: string, threadId: string): Promise { @@ -607,7 +607,7 @@ async function runTurn(userInput: string, threadId: string): Promise(extractor, func)` | `await tracker.trackMetricsOf((result) => extractor(result), async () => ...)` | **2 / 3** | **Canonical generic wrapper.** `extractor` maps provider response → `LDAIMetrics`. Use a provider package's `Provider.getAIMetricsFromResponse` for Tier 2 (`@launchdarkly/server-sdk-ai-openai`, `-langchain`, `-vercel`) or a small custom function for Tier 3. | | `trackStreamMetricsOf(extractor, streamCreator)` | `tracker.trackStreamMetricsOf(async (chunks) => extractor(chunks), () => createStream())` | 2 / 3 | Stream variant. Does **not** capture TTFT automatically — if you need TTFT, use the manual pattern in [streaming-tracking.md](../../aiconfig-ai-metrics/references/streaming-tracking.md). | | `trackDurationOf(func)` | `await tracker.trackDurationOf(async () => ...)` | 4 | Wraps an async callable; captures duration only. Pair with explicit `trackTokens` + `trackSuccess`. | -| `trackOpenAIMetrics(func)` | `await tracker.trackOpenAIMetrics(async () => openai.chat.completions.create(...))` | **[legacy]** | Predates `@launchdarkly/server-sdk-ai-openai`. Replace with `trackMetricsOf(OpenAIProvider.getAIMetricsFromResponse, () => ...)`. | -| `trackBedrockConverseMetrics(res)` | `tracker.trackBedrockConverseMetrics(bedrockResponse)` | **[legacy]** | Replace with a Converse extractor passed to `trackMetricsOf`. | -| `trackVercelAISDKGenerateTextMetrics(func)` | `await tracker.trackVercelAISDKGenerateTextMetrics(async () => generateText({...}))` | **[legacy]** | Replace with `trackMetricsOf` + `VercelAISDKProvider.getAIMetricsFromResponse` from `@launchdarkly/server-sdk-ai-vercel`. | - -Example — OpenAI via `trackMetricsOf` + the provider package (current pattern): +Example — OpenAI via `trackMetricsOf` + the provider package: ```typescript -import { OpenAIProvider } from '@launchdarkly/server-sdk-ai-openai'; +import { getAIMetricsFromResponse } from '@launchdarkly/server-sdk-ai-openai'; -const tracker = aiConfig.createTracker!(); +const tracker = aiConfig.createTracker(); const response = await tracker.trackMetricsOf( - OpenAIProvider.getAIMetricsFromResponse, + getAIMetricsFromResponse, () => openai.chat.completions.create({ model: aiConfig.model?.name ?? 'gpt-4o', messages: [...(aiConfig.messages ?? []), { role: 'user', content: userPrompt }], @@ -325,12 +315,15 @@ const response = await tracker.trackMetricsOf( Example — LangChain via `trackMetricsOf` (works for any model LangChain wraps, including Anthropic and Bedrock): ```typescript -import { LangChainProvider } from '@launchdarkly/server-sdk-ai-langchain'; +import { + createLangChainModel, + getAIMetricsFromResponse, +} from '@launchdarkly/server-sdk-ai-langchain'; -const llm = await LangChainProvider.createLangChainModel(aiConfig); -const tracker = aiConfig.createTracker!(); +const llm = await createLangChainModel(aiConfig); +const tracker = aiConfig.createTracker(); const response = await tracker.trackMetricsOf( - LangChainProvider.getAIMetricsFromResponse, + getAIMetricsFromResponse, () => llm.invoke(messages), ); ``` @@ -340,7 +333,7 @@ const response = await tracker.trackMetricsOf( For chat-loop applications, both SDKs expose a higher-level API that handles tracking end-to-end with no tracker calls at all: - Python: `ai_client.create_model(...)` → `ManagedModel`, then `await model.invoke(user_input)` -- Node: `aiClient.initChat(...)` / `aiClient.createChat(...)` → `TrackedChat`, then `await chat.invoke(userInput)` +- Node: `aiClient.createModel(...)` → `ManagedModel`, then `await model.run(userInput)` The managed runner handles message history, provider dispatch (via the installed provider package — OpenAI, LangChain, Vercel), and tracker wiring. The runner creates its own tracker internally via the factory — you do **not** pass a tracker in. If the migration target is conversational, this is the right tier and you don't need anything from the tables above. @@ -352,9 +345,9 @@ Neither `@launchdarkly/server-sdk-ai-anthropic` nor `launchdarkly-server-sdk-ai- | Situation | Tier | Pattern | |-----------|------|---------| -| Chat loop (history, turn-based), any provider with a package | **1** | `ManagedModel` / `TrackedChat` / `initChat` — no tracker calls | -| OpenAI direct SDK, non-chat shape | **2** | `trackMetricsOf(OpenAIProvider.getAIMetricsFromResponse, fn)` | -| LangChain / LangGraph (any underlying model), non-chat shape | **2** | `trackMetricsOf(LangChainProvider.getAIMetricsFromResponse, fn)` | +| Chat loop (history, turn-based), any provider with a package | **1** | `ManagedModel` / `createModel` — no tracker calls | +| OpenAI direct SDK, non-chat shape | **2** | `trackMetricsOf(getAIMetricsFromResponse, fn)` (extractor from `@launchdarkly/server-sdk-ai-openai`) | +| LangChain / LangGraph (any underlying model), non-chat shape | **2** | `trackMetricsOf(getAIMetricsFromResponse, fn)` (extractor from `@launchdarkly/server-sdk-ai-langchain`) | | Vercel AI SDK, non-chat shape (Node only) | **2** | `trackMetricsOf` with the Vercel provider package's extractor | | Anthropic direct SDK | **3** | Custom extractor reading `response.usage.input_tokens` / `output_tokens` | | Bedrock Converse (no provider package) | **3** | Custom extractor reading `response.usage.inputTokens` / `outputTokens` (or route via LangChain for Tier 2) | @@ -391,7 +384,7 @@ tracker.track_success() **Node — use `trackStreamMetricsOf`:** ```typescript -const tracker = aiConfig.createTracker!(); +const tracker = aiConfig.createTracker(); const stream = await tracker.trackStreamMetricsOf( () => openai.chat.completions.create({ stream: true, /* ... */ }), async (s) => { @@ -422,7 +415,7 @@ if result.success: **Node:** ```typescript // Producer process: persist the token (accessor on the tracker) -const tracker = aiConfig.createTracker!(); +const tracker = aiConfig.createTracker(); const response = await callProvider(...); await saveMessage(messageId, response.content, { resumptionToken: tracker.resumptionToken }); @@ -451,13 +444,13 @@ Run the checklist in order. Each step rules out one cause. 5. **Mode match** — if the code calls `completion_config` but the AI Config in LaunchDarkly is in agent mode (or vice versa), the SDK call will error out. Check the mode in the UI. 6. **Flush on shutdown** — on short-lived processes (tests, scripts), call `ld_client.flush()` before exit. Long-running servers flush automatically on an interval. 7. **Data delay** — the Monitoring tab updates within 1–2 minutes. If you just deployed, wait and retry before debugging further. -8. **SDK version** — current releases are Python `launchdarkly-server-sdk-ai` v0.18.0 and Node `@launchdarkly/server-sdk-ai` v0.17.0. The `create_tracker` / `createTracker` factory, `runId`-grouped metrics, `track_judge_result`, and `trackToolCall` / `trackToolCalls` (Node) all require these versions. +8. **SDK version** — current releases are Python `launchdarkly-server-sdk-ai` v0.18.0 and Node `@launchdarkly/server-sdk-ai` v0.20.0. The `create_tracker` / `createTracker` factory, `runId`-grouped metrics, `track_judge_result`, and `trackToolCall` / `trackToolCalls` (Node) all require these versions. 9. **Debug logging** — enable SDK debug logging (`LD_LOG_LEVEL=debug` / `setLevel('debug')`) to see evaluation results and tracker calls in stdout. 10. **Error path silent** — are you catching exceptions that swallow tracker errors? The tracker should never raise, but if a custom wrapper catches everything, confirm the call fires by logging before and after. ## Common gotchas -- **`model.parameters` vs `model.custom`.** `create_langchain_model` / `LangChainProvider.createLangChainModel` forwards every key in `model.parameters` to the provider SDK. App-scoped knobs (search result limits, retry budgets, feature toggles) **must** live in `model.custom` or the provider will crash at runtime with an unexpected-keyword-argument error. Read them with `ai_config.model.get_custom("key")`. Full walk-through with the MCP/REST-API caveat in [langchain-tracking.md § `model.parameters` vs `model.custom`](../../aiconfig-ai-metrics/references/langchain-tracking.md). +- **`model.parameters` vs `model.custom`.** `create_langchain_model` (Python) / `createLangChainModel` (Node) forwards every key in `model.parameters` to the provider SDK. App-scoped knobs (search result limits, retry budgets, feature toggles) **must** live in `model.custom` or the provider will crash at runtime with an unexpected-keyword-argument error. Read them with `ai_config.model.get_custom("key")`. Full walk-through with the MCP/REST-API caveat in [langchain-tracking.md § `model.parameters` vs `model.custom`](../../aiconfig-ai-metrics/references/langchain-tracking.md). - **`track_tokens` token shape.** The Python `TokenUsage` dataclass requires `total` to be set — it is not derived. Compute `total = input + output` if the provider doesn't return one. - **`track_feedback` lifecycle.** The feedback call must be made on a tracker bound to the same `runId` that produced the response. If the thumbs-up comes in a later process, use the cross-process resumption pattern above — do **not** call `create_tracker()` again in the consumer, because that mints a *new* `runId`. - **OpenAI streaming tokens.** OpenAI only emits `usage` in the final chunk when `stream_options={"include_usage": True}` is passed. Without that flag, you have to tokenize manually — `tiktoken` for OpenAI models. diff --git a/skills/ai-configs/aiconfig-online-evals/SKILL.md b/skills/ai-configs/aiconfig-online-evals/SKILL.md index e47c95b..763827f 100644 --- a/skills/ai-configs/aiconfig-online-evals/SKILL.md +++ b/skills/ai-configs/aiconfig-online-evals/SKILL.md @@ -1,7 +1,7 @@ --- name: aiconfig-online-evals description: Attach judges to AI Config variations for automatic LLM-as-a-judge evaluation. Create custom judges, configure sampling rates, and monitor quality scores. -compatibility: Requires LaunchDarkly API access token with ai-configs:write permission. SDK versions Python v0.18.0+ or Node.js v0.17.0+ for automatic metric recording and the consolidated `track_judge_result` / `trackJudgeResult` API. +compatibility: Requires LaunchDarkly API access token with ai-configs:write permission. SDK versions Python v0.18.0+ or Node.js v0.20.0+ for automatic metric recording and the consolidated `track_judge_result` / `trackJudgeResult` API. metadata: author: launchdarkly version: "0.1.0" @@ -16,7 +16,7 @@ Attach judges to AI Config variations for automatic quality scoring using LLM-as - LaunchDarkly account with AI Configs enabled - API access token with write permissions - Existing AI Config with variations (use `aiconfig-create` skill) -- For automatic metric recording and the consolidated judge-result API: Python AI SDK v0.18.0+ or Node.js AI SDK v0.17.0+ +- For automatic metric recording and the consolidated judge-result API: Python AI SDK v0.18.0+ or Node.js AI SDK v0.20.0+ ## API Key Detection @@ -375,7 +375,7 @@ async def async_main(): input_text = 'You are a helpful assistant. How can you help me?' output_text = 'I can answer any question you have.' - # Evaluate the input/output pair — always returns a JudgeResult in v0.18.0+ + # Evaluate the input/output pair — returns a JudgeResult. judge_result = await judge.evaluate(input_text, output_text) if not judge_result.sampled: @@ -395,7 +395,7 @@ async def async_main(): ldclient.get().close() ``` -> **Note:** Direct evaluation does not automatically record metrics. Obtain a tracker via `ai_config.create_tracker()` / `aiConfig.createTracker!()` and call `tracker.track_judge_result(result)` / `tracker.trackJudgeResult(result)` to record scores for the AI Config you're evaluating. (This consolidates the earlier `track_eval_scores` + `track_judge_response` pair that was removed in Python v0.18.0 / Node v0.17.0.) +> **Note:** Direct evaluation does not automatically record metrics. Obtain a tracker via `ai_config.create_tracker()` / `aiConfig.createTracker()` and call `tracker.track_judge_result(result)` / `tracker.trackJudgeResult(result)` to record scores for the AI Config you're evaluating. ## Sampling Rates diff --git a/skills/ai-configs/aiconfig-tools/SKILL.md b/skills/ai-configs/aiconfig-tools/SKILL.md index f0545d0..5ec5942 100644 --- a/skills/ai-configs/aiconfig-tools/SKILL.md +++ b/skills/ai-configs/aiconfig-tools/SKILL.md @@ -106,7 +106,7 @@ LaunchDarkly stores the tool schema once — the flat `{type, name, description, | Bedrock Converse | `{toolSpec: {name, description, inputSchema: {json: parameters}}}` | inside `toolConfig.tools=[...]` | | Gemini (`google-genai`) | `{function_declarations: [{name, description, parameters}]}` (Python) / `{functionDeclarations: [...]}` (Node) | `GenerateContentConfig.tools=[...]` | | OpenAI Responses API | LaunchDarkly's flat shape passes through unchanged | top-level `tools=[...]` | -| LangChain / LangGraph | `LangChainProvider.createLangChainModel(config)` and pass `ai_config.tools` (or your own `StructuredTool` list) into `bind_tools(...)` / `create_react_agent(tools=[...])` | framework-native; no per-call conversion | +| LangChain / LangGraph | `createLangChainModel(config)` (Node) / `create_langchain_model(config)` (Python) and pass `ai_config.tools` (or your own `StructuredTool` list) into `bind_tools(...)` / `create_react_agent(tools=[...])` | framework-native; no per-call conversion | | Strands Agents | LaunchDarkly's flat shape; drop `parameters.tools` before passing params to the Strands model class (`AnthropicModel`, `OpenAIModel`) — Python `@tool`-decorated callables stay in code | `Agent(tools=[...])` constructor; no per-call conversion | Minimal conversion snippets (Python): From e8c736cbf172acad733c8c508d3db094630d860f Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Thu, 14 May 2026 09:55:13 -0500 Subject: [PATCH 2/8] fix: Replace Python OpenAIProvider with managed-model API and bare helpers - openai-tracking.md, aiconfig-migrate/SKILL.md, sdk-ai-tracker-patterns.md - Python OpenAIProvider class does not exist in ldai_openai; replace OpenAIProvider.create + provider.invoke_model with managed aiClient.create_model() + model.run() (Tier 1) and bare get_ai_metrics_from_response import (Tier 2) - Swap track_metrics_of arg order to (extractor, func) per Python 0.19 across all Python examples (anthropic, bedrock, gemini, langchain, openai, strands, aiconfig-tools, aiconfig-migrate, sdk-ai-tracker-patterns) Refs AIC-2383 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ai-configs/aiconfig-ai-metrics/SKILL.md | 2 +- .../references/anthropic-tracking.md | 4 ++-- .../references/bedrock-tracking.md | 4 ++-- .../references/gemini-tracking.md | 2 +- .../references/langchain-tracking.md | 12 +++++----- .../references/openai-tracking.md | 24 +++++++------------ .../references/strands-tracking.md | 2 +- skills/ai-configs/aiconfig-migrate/SKILL.md | 9 +++---- .../references/phase-1-analysis-checklist.md | 2 +- .../references/sdk-ai-tracker-patterns.md | 15 +++++------- skills/ai-configs/aiconfig-tools/SKILL.md | 2 +- 11 files changed, 32 insertions(+), 46 deletions(-) diff --git a/skills/ai-configs/aiconfig-ai-metrics/SKILL.md b/skills/ai-configs/aiconfig-ai-metrics/SKILL.md index 5ebb447..361887e 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/SKILL.md +++ b/skills/ai-configs/aiconfig-ai-metrics/SKILL.md @@ -25,7 +25,7 @@ This is the order the official SDK READMEs (Python core, Node core, and every pr | **3 — Custom extractor + `trackMetricsOf`** | Same `trackMetricsOf` wrapper, but you write a small function that maps the provider response to `LDAIMetrics` (tokens + success). | No provider package exists (Anthropic direct, Gemini, Cohere, custom HTTP). | Duration + success/error from the wrapper; tokens from your extractor. | | **4 — Raw manual** | Separate calls to `trackDuration`, `trackTokens`, `trackSuccess` / `trackError`, plus `trackTimeToFirstToken` for streams. | Streaming with TTFT, unusual response shapes, partial tracking, anything Tier 2–3 can't cleanly wrap. | Only what you explicitly call — it's on you to not miss one. | -Every provider — OpenAI, LangChain, Vercel, Bedrock, Anthropic, Gemini, custom HTTP — uses the same generic shape: `tracker.trackMetricsOf(getAIMetricsFromResponse, () => providerCall())` in Node, `tracker.track_metrics_of(provider_call, get_ai_metrics_from_response)` in Python. The extractor is the only thing that changes per provider: import `getAIMetricsFromResponse` from the matching `@launchdarkly/server-sdk-ai-` (or `ldai_`) package, or write a small custom function that returns `LDAIMetrics`. There are no provider-specific tracker methods. +Every provider — OpenAI, LangChain, Vercel, Bedrock, Anthropic, Gemini, custom HTTP — uses the same generic shape: `tracker.trackMetricsOf(getAIMetricsFromResponse, () => providerCall())` in Node, `tracker.track_metrics_of(get_ai_metrics_from_response, provider_call)` in Python. The extractor is the only thing that changes per provider: import `getAIMetricsFromResponse` from the matching `@launchdarkly/server-sdk-ai-` (or `ldai_`) package, or write a small custom function that returns `LDAIMetrics`. There are no provider-specific tracker methods. ## Workflow diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md index 11611da..9373cad 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md @@ -52,7 +52,7 @@ def call_with_tracking(ai_config, user_prompt: str) -> str | None: # except: tracker.track_error() on top — it's a noop that trips the # at-most-once guard. Wrap in your own try/except only if you need # local handling (logging, fallback, alert); the error is already tracked. - response = tracker.track_metrics_of(call_anthropic, anthropic_extractor) + response = tracker.track_metrics_of(anthropic_extractor, call_anthropic) return response.content[0].text ``` @@ -118,8 +118,8 @@ llm = create_langchain_model(ai_config) # ChatAnthropic under the hood tracker = ai_config.create_tracker() response = tracker.track_metrics_of( - lambda: llm.invoke(messages), get_ai_metrics_from_response, + lambda: llm.invoke(messages), ) ``` diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md index 288f73a..5b2ed4e 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md @@ -50,7 +50,7 @@ def call_with_tracking(ai_config, user_prompt: str) -> str | None: tracker = ai_config.create_tracker() # Exceptions are tracked automatically — track_metrics_of catches # exceptions, records tracker.track_error(), and re-raises. - response = tracker.track_metrics_of(call_bedrock, bedrock_converse_extractor) + response = tracker.track_metrics_of(bedrock_converse_extractor, call_bedrock) return response["output"]["message"]["content"][0]["text"] ``` @@ -130,8 +130,8 @@ llm = create_langchain_model(ai_config) # ChatBedrockConverse when provider=bed tracker = ai_config.create_tracker() response = tracker.track_metrics_of( - lambda: llm.invoke(messages), get_ai_metrics_from_response, + lambda: llm.invoke(messages), ) ``` diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md index 5ea1703..78b20fb 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md @@ -81,7 +81,7 @@ def call_with_tracking(ai_config, user_prompt: str) -> str | None: tracker = ai_config.create_tracker() # Exceptions are tracked automatically — track_metrics_of catches # exceptions, records tracker.track_error(), and re-raises. - response = tracker.track_metrics_of(call_gemini, gemini_metrics) + response = tracker.track_metrics_of(gemini_metrics, call_gemini) return response.text ``` diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md index 668d74c..094f5dd 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md @@ -92,8 +92,8 @@ tracker = config.create_tracker() # Exceptions are tracked automatically — track_metrics_of_async catches # exceptions, records tracker.track_error(), and re-raises. completion = await tracker.track_metrics_of_async( - lambda: llm.ainvoke(messages), get_ai_metrics_from_response, + lambda: llm.ainvoke(messages), ) return completion.content ``` @@ -173,14 +173,14 @@ agent = create_agent( tracker = agent_config.create_tracker() try: result = await tracker.track_metrics_of_async( - lambda: agent.ainvoke( - {"messages": [{"role": "user", "content": user_prompt}]}, - config={"configurable": {"thread_id": thread_id}}, - ), lambda res: LDAIMetrics( success=True, usage=sum_token_usage_from_messages(res.get("messages", [])), ), + lambda: agent.ainvoke( + {"messages": [{"role": "user", "content": user_prompt}]}, + config={"configurable": {"thread_id": thread_id}}, + ), ) for msg in result.get("messages", []): for name in get_tool_calls_from_response(msg): @@ -273,8 +273,8 @@ from ldai_langchain.langchain_helper import build_structured_tools model = create_langchain_model(ai_config) tools = build_structured_tools(ai_config, TOOL_REGISTRY) response = await tracker.track_metrics_of_async( - lambda: model.bind_tools(tools).ainvoke(messages), get_ai_metrics_from_response, + lambda: model.bind_tools(tools).ainvoke(messages), ) ``` diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md index 017239f..d9e7422 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md @@ -66,22 +66,17 @@ Use this when the call isn't a chat loop (one-shot completion, structured output **Python** — `launchdarkly-server-sdk-ai-openai`: ```python -from ldai_openai import OpenAIProvider - -ai_config = ai_client.completion_config("my-config-key", context, default_config) -if not ai_config.enabled: - return None - -provider = await OpenAIProvider.create(ai_config) -response = await provider.invoke_model(ai_config.messages) -return response.message.content +managed = await ai_client.create_model("my-config-key", context, default_config) +if managed: + result = await managed.run(user_prompt) + return result.content ``` -`OpenAIProvider.invoke_model()` also tracks automatically. If you need finer-grained control (e.g., you want to supply your own OpenAI client with custom retries), use the raw SDK + `track_metrics_of`: +`managed.run()` tracks automatically — the managed runner handles duration, tokens, and success/error end-to-end. If you need finer-grained control (e.g., you want to supply your own OpenAI client with custom retries), use the raw SDK + `track_metrics_of` with the bare extractor: ```python import openai -from ldai_openai import OpenAIProvider +from ldai_openai import get_ai_metrics_from_response client = openai.OpenAI() @@ -100,10 +95,7 @@ def call_openai(): ], ) -response = tracker.track_metrics_of( - call_openai, - OpenAIProvider.get_ai_metrics_from_response, -) +response = tracker.track_metrics_of(get_ai_metrics_from_response, call_openai) return response.choices[0].message.content ``` @@ -163,7 +155,7 @@ def my_openai_extractor(response) -> LDAIMetrics: ) tracker = ai_config.create_tracker() -response = tracker.track_metrics_of(call_openai, my_openai_extractor) +response = tracker.track_metrics_of(my_openai_extractor, call_openai) ``` ## Tier 4 — Manual (streaming only) diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md index 4a99634..0c64f58 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md @@ -69,8 +69,8 @@ async def run_turn(agent, tracker, user_input): # Exceptions are tracked automatically — track_metrics_of_async catches # exceptions, records tracker.track_error(), and re-raises. result = await tracker.track_metrics_of_async( - lambda: agent.invoke_async(user_input), strands_extractor, + lambda: agent.invoke_async(user_input), ) return result.message["content"][0]["text"] ``` diff --git a/skills/ai-configs/aiconfig-migrate/SKILL.md b/skills/ai-configs/aiconfig-migrate/SKILL.md index f714148..131b5f0 100644 --- a/skills/ai-configs/aiconfig-migrate/SKILL.md +++ b/skills/ai-configs/aiconfig-migrate/SKILL.md @@ -361,7 +361,7 @@ Hand off: print the AI Config key, variation key, provider, and whether the call **Python:** ```python - from ldai_openai import OpenAIProvider + from ldai_openai import get_ai_metrics_from_response import openai client = openai.OpenAI() @@ -378,10 +378,7 @@ Hand off: print the AI Config key, variation key, provider, and whether the call # Exceptions are tracked automatically — track_metrics_of catches # exceptions, records tracker.track_error(), and re-raises. Wrap your # own try/except only for local handling (logging, fallback). - response = tracker.track_metrics_of( - call_openai, - OpenAIProvider.get_ai_metrics_from_response, - ) + response = tracker.track_metrics_of(get_ai_metrics_from_response, call_openai) ``` **Node:** @@ -523,7 +520,7 @@ These are ordered by how likely they are to show up as a first-run failure. The - **Same rule applies to hand-rolled `resolve_tools` / `TOOL_REGISTRY` / `ALL_TOOLS` helpers.** If the template already has a `resolve_tools(tool_keys)` or an `ALL_TOOLS` module-level list, import `build_structured_tools` from `ldai_langchain.langchain_helper` and delete the hand-rolled version. `build_structured_tools(ai_config, TOOL_REGISTRY_DICT)` reads `ai_config.model.parameters.tools` and wraps the matching callables as LangChain `StructuredTool`s with the LD tool key as the `StructuredTool.name` — so `ToolNode` lookup works without a second mapping. Don't leave both in the repo. - Don't put app-scoped knobs directly in `model.parameters`. `create_langchain_model` forwards every key in `parameters` to the provider SDK via `init_chat_model`, so a `max_search_results` / `retry_budget` / `feature_toggle` entry will crash the provider with an unexpected-keyword-argument error. The correct home is `model.custom`, which the provider helpers ignore and the app reads via `ai_config.model.get_custom("key")`. The MCP `update-ai-config-variation` tool does not currently expose top-level `custom`, so pick one of two paths: (a) PATCH the variation via the REST API to set `model.custom` directly, or (b) set it via MCP inside `parameters.custom` (as a nested dict) and use a defensive accessor that reads both locations. Full walk-through with code samples in [langchain-tracking.md § MCP caveat](../aiconfig-ai-metrics/references/langchain-tracking.md). - Don't re-encode tool schemas inside the fallback. When LaunchDarkly is unreachable the fallback should run without tools (or with whatever minimal provider-bound parameters the app needs to keep operating). Building a `_FALLBACK_TOOLS` array that duplicates the AI Config's tool schema re-introduces the hardcoded config the migration was supposed to move out of code. -- Don't import `LaunchDarklyCallbackHandler` from `ldai.langchain` — neither the class nor the dotted module path exists. The Python LangChain helper package is `ldai_langchain` (top-level module, underscore). Use `create_langchain_model(config)` + `track_metrics_of_async(lambda: llm.ainvoke(messages), get_ai_metrics_from_response)` as the canonical pattern. +- Don't import `LaunchDarklyCallbackHandler` from `ldai.langchain` — neither the class nor the dotted module path exists. The Python LangChain helper package is `ldai_langchain` (top-level module, underscore). Use `create_langchain_model(config)` + `track_metrics_of_async(get_ai_metrics_from_response, lambda: llm.ainvoke(messages))` as the canonical pattern. ### Stage / handoff discipline diff --git a/skills/ai-configs/aiconfig-migrate/references/phase-1-analysis-checklist.md b/skills/ai-configs/aiconfig-migrate/references/phase-1-analysis-checklist.md index 0cbcfe0..aadb437 100644 --- a/skills/ai-configs/aiconfig-migrate/references/phase-1-analysis-checklist.md +++ b/skills/ai-configs/aiconfig-migrate/references/phase-1-analysis-checklist.md @@ -151,7 +151,7 @@ Feeds into Stage 2 (install + wrap). Quoted from the `ai-configs-relaunch-guides | LangChain / LangGraph | `@launchdarkly/server-sdk-ai-langchain` | `createLangChainModel(config)` (forwards all variation parameters and handles provider-name mapping) + `getAIMetricsFromResponse` with `trackMetricsOf` | | Vercel AI SDK | `@launchdarkly/server-sdk-ai-vercel` | `getAIMetricsFromResponse` + `trackMetricsOf`, or `VercelRunnerFactory.createVercelModel(aiConfig)` for a managed runner | -Python currently ships helper packages for OpenAI (`ldai_openai`) and LangChain (`ldai_langchain`). The LangChain Python package exposes `create_langchain_model(config)` (builds a LangChain chat model from the AI Config, forwarding every variation parameter and mapping LD provider names to LangChain equivalents), `convert_messages_to_langchain`, and `get_ai_metrics_from_response` — the same package covers LangGraph. Use `create_langchain_model(config)` + `track_metrics_of_async(lambda: llm.ainvoke(messages), get_ai_metrics_from_response)` as the canonical single-call pattern. See [langchain-tracking.md](../../aiconfig-ai-metrics/references/langchain-tracking.md) for both LangChain and LangGraph patterns and [sdk-ai-tracker-patterns.md](sdk-ai-tracker-patterns.md) for the full tracker-method matrix. +Python currently ships helper packages for OpenAI (`ldai_openai`) and LangChain (`ldai_langchain`). The LangChain Python package exposes `create_langchain_model(config)` (builds a LangChain chat model from the AI Config, forwarding every variation parameter and mapping LD provider names to LangChain equivalents), `convert_messages_to_langchain`, and `get_ai_metrics_from_response` — the same package covers LangGraph. Use `create_langchain_model(config)` + `track_metrics_of_async(get_ai_metrics_from_response, lambda: llm.ainvoke(messages))` as the canonical single-call pattern. See [langchain-tracking.md](../../aiconfig-ai-metrics/references/langchain-tracking.md) for both LangChain and LangGraph patterns and [sdk-ai-tracker-patterns.md](sdk-ai-tracker-patterns.md) for the full tracker-method matrix. ## Phase 1 output format diff --git a/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md b/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md index 793fcfe..b2577d8 100644 --- a/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md +++ b/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md @@ -249,13 +249,13 @@ The canonical tracking surface is **`trackMetricsOf` composed with a provider-pa | Helper | Signature | Tier | Notes | |--------|-----------|------|-------| -| `track_metrics_of(func, extractor)` | `tracker.track_metrics_of(func, extractor)` | **2 / 3** | **Canonical generic wrapper.** Sync. Calls `extractor(result)` to get an `LDAIMetrics` object; records tokens + duration + success. Use a provider package's `get_ai_metrics_from_response` as the extractor for Tier 2, or write a small custom function for Tier 3. | -| `track_metrics_of_async(func, extractor)` | `await tracker.track_metrics_of_async(async_func, extractor)` | 2 / 3 | Async variant. | +| `track_metrics_of(extractor, func)` | `tracker.track_metrics_of(extractor, func)` | **2 / 3** | **Canonical generic wrapper.** Sync. Calls `extractor(result)` to get an `LDAIMetrics` object; records tokens + duration + success. Use a provider package's `get_ai_metrics_from_response` as the extractor for Tier 2, or write a small custom function for Tier 3. | +| `track_metrics_of_async(extractor, func)` | `await tracker.track_metrics_of_async(extractor, async_func)` | 2 / 3 | Async variant. | | `track_duration_of(func)` | `tracker.track_duration_of(lambda: provider_call())` | 4 | Wraps a sync callable; captures duration only. Pair with explicit `track_tokens` + `track_success`. Useful when the response shape makes `track_metrics_of` awkward. | Example — OpenAI via `track_metrics_of` + the provider package extractor: ```python -from ldai_openai import OpenAIProvider +from ldai_openai import get_ai_metrics_from_response tracker = ai_config.create_tracker() @@ -265,10 +265,7 @@ def call_openai(): messages=[m.to_dict() for m in ai_config.messages or []], ) -completion = tracker.track_metrics_of( - call_openai, - OpenAIProvider.get_ai_metrics_from_response, -) +completion = tracker.track_metrics_of(get_ai_metrics_from_response, call_openai) ``` Example — custom extractor for Anthropic direct (Tier 3): @@ -287,8 +284,8 @@ def anthropic_extractor(response) -> LDAIMetrics: tracker = ai_config.create_tracker() response = tracker.track_metrics_of( - lambda: anthropic_client.messages.create(...), anthropic_extractor, + lambda: anthropic_client.messages.create(...), ) ``` @@ -296,7 +293,7 @@ response = tracker.track_metrics_of( | Helper | Signature | Tier | Notes | |--------|-----------|------|-------| -| `trackMetricsOf(extractor, func)` | `await tracker.trackMetricsOf((result) => extractor(result), async () => ...)` | **2 / 3** | **Canonical generic wrapper.** `extractor` maps provider response → `LDAIMetrics`. Use a provider package's `Provider.getAIMetricsFromResponse` for Tier 2 (`@launchdarkly/server-sdk-ai-openai`, `-langchain`, `-vercel`) or a small custom function for Tier 3. | +| `trackMetricsOf(extractor, func)` | `await tracker.trackMetricsOf((result) => extractor(result), async () => ...)` | **2 / 3** | **Canonical generic wrapper.** `extractor` maps provider response → `LDAIMetrics`. Use a provider package's bare `getAIMetricsFromResponse` for Tier 2 (`@launchdarkly/server-sdk-ai-openai`, `-langchain`, `-vercel`) or a small custom function for Tier 3. | | `trackStreamMetricsOf(extractor, streamCreator)` | `tracker.trackStreamMetricsOf(async (chunks) => extractor(chunks), () => createStream())` | 2 / 3 | Stream variant. Does **not** capture TTFT automatically — if you need TTFT, use the manual pattern in [streaming-tracking.md](../../aiconfig-ai-metrics/references/streaming-tracking.md). | | `trackDurationOf(func)` | `await tracker.trackDurationOf(async () => ...)` | 4 | Wraps an async callable; captures duration only. Pair with explicit `trackTokens` + `trackSuccess`. | Example — OpenAI via `trackMetricsOf` + the provider package: diff --git a/skills/ai-configs/aiconfig-tools/SKILL.md b/skills/ai-configs/aiconfig-tools/SKILL.md index 5ec5942..3bce3fa 100644 --- a/skills/ai-configs/aiconfig-tools/SKILL.md +++ b/skills/ai-configs/aiconfig-tools/SKILL.md @@ -181,6 +181,7 @@ messages = [{"role": "user", "content": initial_input}] MAX_STEPS = 5 for _ in range(MAX_STEPS): response = tracker.track_metrics_of( + anthropic_metrics, lambda: anthropic_client.messages.create( model=agent.model.name, system=agent.instructions, @@ -188,7 +189,6 @@ for _ in range(MAX_STEPS): tools=anthropic_tools, **params, ), - anthropic_metrics, ) if response.stop_reason != "tool_use": break From 275de9858bf3b0b6ff5321efb178c0cd6fbf061b Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Thu, 14 May 2026 11:18:08 -0500 Subject: [PATCH 3/8] fix: Bump Python compatibility statement to launchdarkly-server-sdk-ai>=0.20.0 The skill code now uses 0.20 APIs (tokens= field, removed providers, etc.). Match the Python compat declaration to the Node side and the actual SDK floor the snippets require. Refs AIC-2383 Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 1 + skills.json | 2 +- skills/ai-configs/aiconfig-ai-metrics/SKILL.md | 2 +- skills/ai-configs/aiconfig-migrate/SKILL.md | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index e38d870..402583b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ node_modules/ dist/ __pycache__/ *.pyc +.claude/ diff --git a/skills.json b/skills.json index 254beeb..b44eae4 100644 --- a/skills.json +++ b/skills.json @@ -14,7 +14,7 @@ "path": "skills/ai-configs/aiconfig-ai-metrics", "version": "1.0.0-experimental", "license": "Apache-2.0", - "compatibility": "Requires the LaunchDarkly server-side AI SDK (`launchdarkly-server-sdk-ai>=0.18.0` for Python or `@launchdarkly/server-sdk-ai>=0.20.0` for Node) and an existing AI Config." + "compatibility": "Requires the LaunchDarkly server-side AI SDK (`launchdarkly-server-sdk-ai>=0.20.0` for Python or `@launchdarkly/server-sdk-ai>=0.20.0` for Node) and an existing AI Config." }, { "name": "aiconfig-create", diff --git a/skills/ai-configs/aiconfig-ai-metrics/SKILL.md b/skills/ai-configs/aiconfig-ai-metrics/SKILL.md index 361887e..d5fc5db 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/SKILL.md +++ b/skills/ai-configs/aiconfig-ai-metrics/SKILL.md @@ -2,7 +2,7 @@ name: aiconfig-ai-metrics description: "Instrument an existing codebase with LaunchDarkly AI Config tracking. Walks the four-tier ladder (managed runner → provider package → custom extractor + trackMetricsOf → raw manual) and picks the lowest-ceremony option that still captures duration, tokens, and success/error." license: Apache-2.0 -compatibility: Requires the LaunchDarkly server-side AI SDK (`launchdarkly-server-sdk-ai>=0.18.0` for Python or `@launchdarkly/server-sdk-ai>=0.20.0` for Node) and an existing AI Config. +compatibility: Requires the LaunchDarkly server-side AI SDK (`launchdarkly-server-sdk-ai>=0.20.0` for Python or `@launchdarkly/server-sdk-ai>=0.20.0` for Node) and an existing AI Config. metadata: author: launchdarkly version: "1.0.0-experimental" diff --git a/skills/ai-configs/aiconfig-migrate/SKILL.md b/skills/ai-configs/aiconfig-migrate/SKILL.md index 131b5f0..73bad40 100644 --- a/skills/ai-configs/aiconfig-migrate/SKILL.md +++ b/skills/ai-configs/aiconfig-migrate/SKILL.md @@ -145,7 +145,7 @@ This is the first stage that writes code. It has nine sub-steps. Commit the deletion separately from the SDK install if the repo's review process benefits from it — otherwise bundle with sub-step 2. 2. **Install the AI SDK.** Detect the package manager from Step 1, then install: - - Python: `launchdarkly-server-sdk` + `launchdarkly-server-sdk-ai>=0.18.0` + - Python: `launchdarkly-server-sdk` + `launchdarkly-server-sdk-ai>=0.20.0` - Node.js/TypeScript: `@launchdarkly/node-server-sdk` + `@launchdarkly/server-sdk-ai@^0.20.0` - Go: `github.com/launchdarkly/go-server-sdk/v7` + `github.com/launchdarkly/go-server-sdk/ldai` From 2da249671cc09782a35e1778bf8a7638fe0c3cc8 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Thu, 14 May 2026 11:50:33 -0500 Subject: [PATCH 4/8] fix: Correct stale Python SDK version references to v0.20.0 The skill code targets Python AI SDK 0.20+ (tokens= field, removed provider classes, track_judge_result, etc.) but five "compatibility" / "verified against" / "current releases" statements still said v0.18.0 for Python. Update to v0.20.0 to match what the snippets actually require. Refs AIC-2383 Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 1 - skills.json | 2 +- .../aiconfig-migrate/references/sdk-ai-tracker-patterns.md | 4 ++-- skills/ai-configs/aiconfig-online-evals/SKILL.md | 4 ++-- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 402583b..e38d870 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,3 @@ node_modules/ dist/ __pycache__/ *.pyc -.claude/ diff --git a/skills.json b/skills.json index b44eae4..1812d1c 100644 --- a/skills.json +++ b/skills.json @@ -45,7 +45,7 @@ "description": "Attach judges to AI Config variations for automatic LLM-as-a-judge evaluation. Create custom judges, configure sampling rates, and monitor quality scores.", "path": "skills/ai-configs/aiconfig-online-evals", "version": "0.1.0", - "compatibility": "Requires LaunchDarkly API access token with ai-configs:write permission. SDK versions Python v0.18.0+ or Node.js v0.20.0+ for automatic metric recording and the consolidated `track_judge_result` / `trackJudgeResult` API." + "compatibility": "Requires LaunchDarkly API access token with ai-configs:write permission. SDK versions Python v0.20.0+ or Node.js v0.20.0+ for automatic metric recording and the consolidated `track_judge_result` / `trackJudgeResult` API." }, { "name": "aiconfig-projects", diff --git a/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md b/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md index b2577d8..c82f9df 100644 --- a/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md +++ b/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md @@ -2,7 +2,7 @@ The main novel content of this skill — a per-method reference for the LaunchDarkly AI Config tracker in Python and Node side by side. **No existing skill covers this.** The `launchdarkly-metric-instrument` skill is for `ldClient.track()` feature metrics, which is a different API. -All method names and signatures below are verified against `launchdarkly-server-sdk-ai` v0.18.0 (Python) and `@launchdarkly/server-sdk-ai` v0.20.0 (`js-core/packages/sdk/server-ai`). If a method is not listed, it does not exist — do not invent it. +All method names and signatures below are verified against `launchdarkly-server-sdk-ai` v0.20.0 (Python) and `@launchdarkly/server-sdk-ai` v0.20.0 (`js-core/packages/sdk/server-ai`). If a method is not listed, it does not exist — do not invent it. ## Tracker lifetime @@ -441,7 +441,7 @@ Run the checklist in order. Each step rules out one cause. 5. **Mode match** — if the code calls `completion_config` but the AI Config in LaunchDarkly is in agent mode (or vice versa), the SDK call will error out. Check the mode in the UI. 6. **Flush on shutdown** — on short-lived processes (tests, scripts), call `ld_client.flush()` before exit. Long-running servers flush automatically on an interval. 7. **Data delay** — the Monitoring tab updates within 1–2 minutes. If you just deployed, wait and retry before debugging further. -8. **SDK version** — current releases are Python `launchdarkly-server-sdk-ai` v0.18.0 and Node `@launchdarkly/server-sdk-ai` v0.20.0. The `create_tracker` / `createTracker` factory, `runId`-grouped metrics, `track_judge_result`, and `trackToolCall` / `trackToolCalls` (Node) all require these versions. +8. **SDK version** — current releases are Python `launchdarkly-server-sdk-ai` v0.20.0 and Node `@launchdarkly/server-sdk-ai` v0.20.0. The `create_tracker` / `createTracker` factory, `runId`-grouped metrics, `track_judge_result`, and `trackToolCall` / `trackToolCalls` (Node) all require these versions. 9. **Debug logging** — enable SDK debug logging (`LD_LOG_LEVEL=debug` / `setLevel('debug')`) to see evaluation results and tracker calls in stdout. 10. **Error path silent** — are you catching exceptions that swallow tracker errors? The tracker should never raise, but if a custom wrapper catches everything, confirm the call fires by logging before and after. diff --git a/skills/ai-configs/aiconfig-online-evals/SKILL.md b/skills/ai-configs/aiconfig-online-evals/SKILL.md index 763827f..631c76f 100644 --- a/skills/ai-configs/aiconfig-online-evals/SKILL.md +++ b/skills/ai-configs/aiconfig-online-evals/SKILL.md @@ -1,7 +1,7 @@ --- name: aiconfig-online-evals description: Attach judges to AI Config variations for automatic LLM-as-a-judge evaluation. Create custom judges, configure sampling rates, and monitor quality scores. -compatibility: Requires LaunchDarkly API access token with ai-configs:write permission. SDK versions Python v0.18.0+ or Node.js v0.20.0+ for automatic metric recording and the consolidated `track_judge_result` / `trackJudgeResult` API. +compatibility: Requires LaunchDarkly API access token with ai-configs:write permission. SDK versions Python v0.20.0+ or Node.js v0.20.0+ for automatic metric recording and the consolidated `track_judge_result` / `trackJudgeResult` API. metadata: author: launchdarkly version: "0.1.0" @@ -16,7 +16,7 @@ Attach judges to AI Config variations for automatic quality scoring using LLM-as - LaunchDarkly account with AI Configs enabled - API access token with write permissions - Existing AI Config with variations (use `aiconfig-create` skill) -- For automatic metric recording and the consolidated judge-result API: Python AI SDK v0.18.0+ or Node.js AI SDK v0.20.0+ +- For automatic metric recording and the consolidated judge-result API: Python AI SDK v0.20.0+ or Node.js AI SDK v0.20.0+ ## API Key Detection From 4172449a41887e1e88f17315a8e44038f797839a Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Thu, 14 May 2026 11:57:15 -0500 Subject: [PATCH 5/8] fix: Point agent skills at CHANGELOG for SDK version history Skills that bake in specific SDK versions ("verified against v0.20.0", "renamed from track_latency in v0.18.0", "added in v0.17.0") go stale the moment the SDK ships a new release. Point at the CHANGELOG instead and describe what the API IS now. - aiconfig-migrate/SKILL.md: Prerequisites now instructs the agent to fetch the latest CHANGELOG before applying any pattern below. - sdk-ai-tracker-patterns.md: drop "verified against v0.20.0" pin and add CHANGELOG links at the top; reword the L8 SDK-version gotcha to point at CHANGELOGs for the per-method version-landed-in info. - agent-graph-reference.md: strip "renamed in v0.18.0" / "added in v0.17.0" historical annotations; describe current behavior only. Add CHANGELOG links to the Resources section. Refs AIC-2383 Co-Authored-By: Claude Opus 4.7 (1M context) --- skills/ai-configs/aiconfig-migrate/SKILL.md | 7 +++++++ .../references/agent-graph-reference.md | 17 ++++++++++------- .../references/sdk-ai-tracker-patterns.md | 9 ++++++--- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/skills/ai-configs/aiconfig-migrate/SKILL.md b/skills/ai-configs/aiconfig-migrate/SKILL.md index 73bad40..7b4b5bc 100644 --- a/skills/ai-configs/aiconfig-migrate/SKILL.md +++ b/skills/ai-configs/aiconfig-migrate/SKILL.md @@ -57,6 +57,13 @@ This skill requires the remotely hosted LaunchDarkly MCP server to be configured **MCP tools used directly by this skill:** none — every LaunchDarkly write happens in a focused sibling skill. +**Check the SDK CHANGELOG before applying any pattern.** The API surface described throughout this skill targets the SDK behavior at the time of the skill's last update; SDK releases can rename, remove, or split methods after that. Before you start, fetch the latest CHANGELOG for the SDK(s) you'll target and skim for anything that contradicts the pattern you're about to apply: + +- Python: https://github.com/launchdarkly/python-server-sdk-ai/blob/main/packages/sdk/server-ai/CHANGELOG.md (and per-provider CHANGELOGs under `packages/ai-providers/server-ai-{openai,langchain}/CHANGELOG.md`) +- Node: https://github.com/launchdarkly/js-core/blob/main/packages/sdk/server-ai/CHANGELOG.md (and per-provider CHANGELOGs under `packages/ai-providers/server-ai-{openai,langchain,vercel}/CHANGELOG.md`) + +If a CHANGELOG entry post-dates this skill and changes an API you're about to use, the CHANGELOG wins — and the skill should be updated. + **Hand-off model.** This skill does **not** auto-invoke other skills. At each stage that needs a LaunchDarkly write, this skill prepares the inputs (config key, mode, model, prompt, tool schemas, judge keys) and then **tells the user to run the next slash-command themselves**. After the user finishes that sibling skill, return to the next step here. Treat the "Delegate" lines below as next-step instructions, not auto-handoffs. **Sibling skills the user runs at each stage:** diff --git a/skills/ai-configs/aiconfig-migrate/references/agent-graph-reference.md b/skills/ai-configs/aiconfig-migrate/references/agent-graph-reference.md index 3b5f273..7c8d778 100644 --- a/skills/ai-configs/aiconfig-migrate/references/agent-graph-reference.md +++ b/skills/ai-configs/aiconfig-migrate/references/agent-graph-reference.md @@ -2,7 +2,7 @@ > **Out of scope for the main migration workflow.** Read this only after a single-agent migration works end-to-end. The main `SKILL.md` workflow stops at single-agent because multi-agent orchestration is a meaningful jump in complexity and is still evolving in the SDK. -> **Python is still the richer surface; Node v0.17.0 added foundational Agent Graph Definitions.** `launchdarkly-server-sdk-ai` (Python) has the fully-documented graph API used in the traversal pattern below. `@launchdarkly/server-sdk-ai` v0.17.0 (Node) introduced Agent Graph Definitions and graph metric tracking — consult the js-core source for the current Node API shape before wiring Node graph code; the Python pattern in this doc is canonical. +> **Python is still the richer surface.** `launchdarkly-server-sdk-ai` (Python) has the fully-documented graph API used in the traversal pattern below. `@launchdarkly/server-sdk-ai` (Node) exposes Agent Graph Definitions and graph metric tracking — consult the js-core source for the current Node API shape before wiring Node graph code; the Python pattern in this doc is canonical. ## What an agent graph is @@ -76,7 +76,7 @@ class Edge: ```python tracker.track_invocation_success() -> None tracker.track_invocation_failure() -> None -tracker.track_duration(duration: int) -> None # milliseconds, graph-level total (renamed from track_latency in v0.18.0) +tracker.track_duration(duration: int) -> None # milliseconds, graph-level total tracker.track_total_tokens(tokens: TokenUsage) -> None tracker.track_path(path: List[str]) -> None # e.g. ["supervisor", "security", "support"] tracker.track_redirect(source_key: str, redirected_target: str) -> None @@ -87,9 +87,9 @@ tracker.track_handoff_failure(source_key: str, target_key: str) -> None **Things that are NOT on the graph tracker:** - `track_node_invocation` — not a public method. Use `track_path(execution_path)` at the end of traversal instead. -- `track_tool_call(node_key, tool_name)` — graph-level tool-call tracking does not exist. Track per-node tool calls via `node_tracker.track_tool_call(tool_name)` on each node's tracker (obtained via `node.get_config().create_tracker()`). The `graph_key` keyword argument was removed in v0.18.0 — trackers returned via a graph traversal are automatically bound to the right graph key. -- `track_judge_response` — removed in v0.18.0 on `AIGraphTracker`. Record judge results at the config level via `LDAIConfigTracker.track_judge_result(result)` instead. -- No `track_request()`, no `track_duration()` per call — use `track_duration(total_ms)` once per traversal (renamed from `track_latency` in v0.18.0). +- `track_tool_call(node_key, tool_name)` — graph-level tool-call tracking does not exist. Track per-node tool calls via `node_tracker.track_tool_call(tool_name)` on each node's tracker (obtained via `node.get_config().create_tracker()`). Trackers returned via a graph traversal are automatically bound to the right graph key — do not pass `graph_key` as a keyword. +- `track_judge_response` — does not exist on `AIGraphTracker`. Record judge results at the config level via `LDAIConfigTracker.track_judge_result(result)` instead. +- No `track_request()`, no `track_duration()` per call — use `track_duration(total_ms)` once per traversal. If you see older devrel-agents-tutorial code that calls `track_node_invocation`, `track_tool_call`, or pokes `graph_tracker._ld_client.track(...)` directly, that code targets an earlier API shape and needs updating. A PR is in flight against `launchdarkly-labs/devrel-agents-tutorial` to align the tutorial with the current SDK. @@ -228,7 +228,7 @@ Each phase is reversible. If something breaks at phase 5, the supervisor can fal ## Limitations to know about -- **Python has the canonical surface; Node added foundational Agent Graph Definitions in v0.17.0** but the Python traversal pattern above is what this doc covers in full. For Node graphs, consult the `@launchdarkly/server-sdk-ai` source for the current API. +- **Python has the canonical surface.** The Python traversal pattern above is what this doc covers in full. For Node graphs, consult the `@launchdarkly/server-sdk-ai` source for the current API. - **`create_agent_graph` is experimental.** Do not build production features on `ManagedAgentGraph.run`. Use the traversal pattern above. - **Graph tracker is less granular than the config tracker.** If you want per-node duration or per-node token breakdowns, obtain a per-node tracker via `node.get_config().create_tracker()` — the graph tracker handles totals only. - **Cycles must be caught in your code.** The SDK does not stop cycle traversal automatically; track `visited` and `hop_count` yourself. @@ -240,5 +240,8 @@ Each phase is reversible. If something breaks at phase 5, the supervisor can fal - `packages/sdk/server-ai/src/ldai/agent_graph/__init__.py` — `AgentGraphDefinition` and `AgentGraphNode` - `packages/sdk/server-ai/src/ldai/tracker.py` — `AIGraphTracker` (near the bottom of the file) - `packages/sdk/server-ai/src/ldai/client.py` — `LDAIClient.agent_graph` and `create_agent_graph` -- Node SDK source (Agent Graph Definitions added in v0.17.0): https://github.com/launchdarkly/js-core/tree/main/packages/sdk/server-ai +- Node SDK source: https://github.com/launchdarkly/js-core/tree/main/packages/sdk/server-ai +- SDK CHANGELOGs (for per-release breaking changes and the version each method landed in): + - Python: https://github.com/launchdarkly/python-server-sdk-ai/blob/main/packages/sdk/server-ai/CHANGELOG.md + - Node: https://github.com/launchdarkly/js-core/blob/main/packages/sdk/server-ai/CHANGELOG.md - Devrel reference implementation (Python, after PR alignment): https://github.com/launchdarkly-labs/devrel-agents-tutorial on the `tutorial/agent-graphs` branch diff --git a/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md b/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md index c82f9df..83856cd 100644 --- a/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md +++ b/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md @@ -2,7 +2,10 @@ The main novel content of this skill — a per-method reference for the LaunchDarkly AI Config tracker in Python and Node side by side. **No existing skill covers this.** The `launchdarkly-metric-instrument` skill is for `ldClient.track()` feature metrics, which is a different API. -All method names and signatures below are verified against `launchdarkly-server-sdk-ai` v0.20.0 (Python) and `@launchdarkly/server-sdk-ai` v0.20.0 (`js-core/packages/sdk/server-ai`). If a method is not listed, it does not exist — do not invent it. +All method names and signatures below describe the current public surface of `launchdarkly-server-sdk-ai` (Python) and `@launchdarkly/server-sdk-ai` (Node). If a method is not listed, it does not exist — do not invent it. For per-release breaking changes and renames, consult the SDK CHANGELOGs: + +- Python: https://github.com/launchdarkly/python-server-sdk-ai/blob/main/packages/sdk/server-ai/CHANGELOG.md +- Node: https://github.com/launchdarkly/js-core/blob/main/packages/sdk/server-ai/CHANGELOG.md ## Tracker lifetime @@ -50,7 +53,7 @@ tracker.trackSuccess(); tracker.trackTokens(tokens); ``` -Other API notes you will see referenced below: +Other API notes worth knowing: - **Python:** `AIGraphTracker.track_latency` is `track_duration`. The `LDAIConfigTracker.track_*()` methods do not take a `graph_key` keyword — trackers obtained inside a graph traversal are already bound to the right graph key. - **Python:** `Judge.evaluate()` / `evaluate_messages()` return a `JudgeResult`; check `result.sampled` to know whether the evaluation ran. Record it with `tracker.track_judge_result(result)`. @@ -441,7 +444,7 @@ Run the checklist in order. Each step rules out one cause. 5. **Mode match** — if the code calls `completion_config` but the AI Config in LaunchDarkly is in agent mode (or vice versa), the SDK call will error out. Check the mode in the UI. 6. **Flush on shutdown** — on short-lived processes (tests, scripts), call `ld_client.flush()` before exit. Long-running servers flush automatically on an interval. 7. **Data delay** — the Monitoring tab updates within 1–2 minutes. If you just deployed, wait and retry before debugging further. -8. **SDK version** — current releases are Python `launchdarkly-server-sdk-ai` v0.20.0 and Node `@launchdarkly/server-sdk-ai` v0.20.0. The `create_tracker` / `createTracker` factory, `runId`-grouped metrics, `track_judge_result`, and `trackToolCall` / `trackToolCalls` (Node) all require these versions. +8. **SDK version** — confirm the installed `launchdarkly-server-sdk-ai` (Python) / `@launchdarkly/server-sdk-ai` (Node) version supports the API the code is calling. Methods like `create_tracker` / `createTracker`, `runId`-grouped metrics, `track_judge_result`, and `trackToolCall` / `trackToolCalls` (Node) were added in recent releases — see the SDK CHANGELOGs linked at the top of this file for the version they landed in. 9. **Debug logging** — enable SDK debug logging (`LD_LOG_LEVEL=debug` / `setLevel('debug')`) to see evaluation results and tracker calls in stdout. 10. **Error path silent** — are you catching exceptions that swallow tracker errors? The tracker should never raise, but if a custom wrapper catches everything, confirm the call fires by logging before and after. From 35f3f82f830f7cbcdf0c932abeae63bc76df3ce6 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Fri, 15 May 2026 13:36:04 -0500 Subject: [PATCH 6/8] fix: Modernize ManagedModel API + LDAIMetrics tokens field across skills MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two patterns Lindsey caught on the open PR review (and which the multi- line / pattern-list gap in our validation greps let through): - `LDAIMetrics(success=True, usage=TokenUsage(...))` Python multi-line constructor → `tokens=TokenUsage(...)`. The dataclass field is `tokens`; constructor with `usage=` raises TypeError on 0.20+. Fixed in 8 spots across anthropic-, bedrock-, gemini-, langchain-, openai-, strands-tracking.md and sdk-ai-tracker-patterns.md. - `ManagedModel.invoke()` → `model.run()` and `response.message.content` → `response.content` (ManagedResult is flat). Renamed in SDK 0.19. Fixed in SKILL.md Tier 1 table row (Python side), openai-tracking.md Tier 1 example, and sdk-ai-tracker-patterns.md Tier 1 mention. - `ai_client.create_chat(...)` + `chat.invoke(...)` removed at 0.20. Replaced with `create_model(...)` + `model.run(...)` in aiconfig-online-evals/SKILL.md. Refs AIC-2383 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ai-configs/aiconfig-ai-metrics/SKILL.md | 2 +- .../references/anthropic-tracking.md | 2 +- .../references/bedrock-tracking.md | 6 ++--- .../references/gemini-tracking.md | 2 +- .../references/langchain-tracking.md | 2 +- .../references/openai-tracking.md | 8 +++--- .../references/strands-tracking.md | 2 +- .../references/sdk-ai-tracker-patterns.md | 4 +-- .../ai-configs/aiconfig-online-evals/SKILL.md | 26 +++++++++---------- 9 files changed, 27 insertions(+), 27 deletions(-) diff --git a/skills/ai-configs/aiconfig-ai-metrics/SKILL.md b/skills/ai-configs/aiconfig-ai-metrics/SKILL.md index d5fc5db..b8b3448 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/SKILL.md +++ b/skills/ai-configs/aiconfig-ai-metrics/SKILL.md @@ -20,7 +20,7 @@ This is the order the official SDK READMEs (Python core, Node core, and every pr | Tier | Pattern | Use when | Tracks automatically | |------|---------|----------|----------------------| -| **1 — Managed runner** | Python: `ai_client.create_model(...)` returning a `ManagedModel`, then `await model.invoke(...)`.
Node: `aiClient.createModel(...)` returning a `ManagedModel`, then `await model.run(...)`. | The call is conversational (chat history, turn-based). This is what the provider READMEs lead with. | Duration, tokens, success/error — **all of it, zero tracker calls**. | +| **1 — Managed runner** | Python: `ai_client.create_model(...)` returning a `ManagedModel`, then `await model.run(...)`.
Node: `aiClient.createModel(...)` returning a `ManagedModel`, then `await model.run(...)`. | The call is conversational (chat history, turn-based). This is what the provider READMEs lead with. | Duration, tokens, success/error — **all of it, zero tracker calls**. | | **2 — Provider package + `trackMetricsOf`** | `tracker.trackMetricsOf(Provider.getAIMetricsFromResponse, () => providerCall())`. Provider packages today: `@launchdarkly/server-sdk-ai-openai`, `-langchain`, `-vercel` (Node) and `launchdarkly-server-sdk-ai-openai`, `-langchain` (Python). | The shape isn't a chat loop (one-shot completion, structured output, agent step) but the framework or provider has a package. | Duration + success/error from the wrapper; tokens from the package's built-in `getAIMetricsFromResponse` extractor. | | **3 — Custom extractor + `trackMetricsOf`** | Same `trackMetricsOf` wrapper, but you write a small function that maps the provider response to `LDAIMetrics` (tokens + success). | No provider package exists (Anthropic direct, Gemini, Cohere, custom HTTP). | Duration + success/error from the wrapper; tokens from your extractor. | | **4 — Raw manual** | Separate calls to `trackDuration`, `trackTokens`, `trackSuccess` / `trackError`, plus `trackTimeToFirstToken` for streams. | Streaming with TTFT, unusual response shapes, partial tracking, anything Tier 2–3 can't cleanly wrap. | Only what you explicitly call — it's on you to not miss one. | diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md index 9373cad..d1a7b34 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/anthropic-tracking.md @@ -25,7 +25,7 @@ client = anthropic.Anthropic() def anthropic_extractor(response) -> LDAIMetrics: return LDAIMetrics( success=True, - usage=TokenUsage( + tokens=TokenUsage( total=response.usage.input_tokens + response.usage.output_tokens, input=response.usage.input_tokens, output=response.usage.output_tokens, diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md index 5b2ed4e..249b77b 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/bedrock-tracking.md @@ -25,7 +25,7 @@ def bedrock_converse_extractor(response) -> LDAIMetrics: usage = response.get("usage", {}) return LDAIMetrics( success=True, - usage=TokenUsage( + tokens=TokenUsage( total=usage.get("totalTokens", 0), input=usage.get("inputTokens", 0), output=usage.get("outputTokens", 0), @@ -105,7 +105,7 @@ def invoke_model_extractor(response) -> LDAIMetrics: if "usage" in body: return LDAIMetrics( success=True, - usage=TokenUsage( + tokens=TokenUsage( total=body["usage"]["input_tokens"] + body["usage"]["output_tokens"], input=body["usage"]["input_tokens"], output=body["usage"]["output_tokens"], @@ -113,7 +113,7 @@ def invoke_model_extractor(response) -> LDAIMetrics: ) # Llama / Titan — use the fields on the specific body shape # ... - return LDAIMetrics(success=True, usage=TokenUsage(total=0, input=0, output=0)) + return LDAIMetrics(success=True, tokens=TokenUsage(total=0, input=0, output=0)) ``` This is a good reason to migrate to Converse if you can. diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md index 78b20fb..c2ede4b 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/gemini-tracking.md @@ -29,7 +29,7 @@ def gemini_metrics(response) -> LDAIMetrics: usage = response.usage_metadata return LDAIMetrics( success=True, - usage=TokenUsage( + tokens=TokenUsage( total=usage.total_token_count or 0, input=usage.prompt_token_count or 0, output=usage.candidates_token_count or 0, diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md index 094f5dd..c67fc74 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/langchain-tracking.md @@ -175,7 +175,7 @@ try: result = await tracker.track_metrics_of_async( lambda res: LDAIMetrics( success=True, - usage=sum_token_usage_from_messages(res.get("messages", [])), + tokens=sum_token_usage_from_messages(res.get("messages", [])), ), lambda: agent.ainvoke( {"messages": [{"role": "user", "content": user_prompt}]}, diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md index d9e7422..9e8b1ef 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/openai-tracking.md @@ -4,7 +4,7 @@ OpenAI is covered by a first-class LaunchDarkly provider package in both Python ## Tier 1 — Managed runner (chat apps) -The simplest path for conversational OpenAI calls. Zero tracker calls — duration, tokens, and success/error are all captured by `invoke()`. +The simplest path for conversational OpenAI calls. Zero tracker calls — duration, tokens, and success/error are all captured by `run()`. **Python** — `ManagedModel` via `ai_client.create_model()`: @@ -27,8 +27,8 @@ async def handle_turn(ai_client: LDAIClient, context: Context, user_input: str) ) if not model: return "Feature is currently unavailable." - response = await model.invoke(user_input) - return response.message.content + response = await model.run(user_input) + return response.content ``` **Node** — `ManagedModel` via `aiClient.createModel()`: @@ -147,7 +147,7 @@ from ldai.providers.types import LDAIMetrics, TokenUsage def my_openai_extractor(response) -> LDAIMetrics: return LDAIMetrics( success=True, - usage=TokenUsage( + tokens=TokenUsage( total=response.usage.total_tokens, input=response.usage.prompt_tokens, output=response.usage.completion_tokens, diff --git a/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md b/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md index 0c64f58..95135e0 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md +++ b/skills/ai-configs/aiconfig-ai-metrics/references/strands-tracking.md @@ -61,7 +61,7 @@ def strands_extractor(result) -> LDAIMetrics: total = usage.get("totalTokens", 0) or (input_tokens + output_tokens) return LDAIMetrics( success=True, - usage=TokenUsage(input=input_tokens, output=output_tokens, total=total), + tokens=TokenUsage(input=input_tokens, output=output_tokens, total=total), ) diff --git a/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md b/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md index 83856cd..c9e401f 100644 --- a/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md +++ b/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md @@ -278,7 +278,7 @@ from ldai.providers.types import LDAIMetrics, TokenUsage def anthropic_extractor(response) -> LDAIMetrics: return LDAIMetrics( success=True, - usage=TokenUsage( + tokens=TokenUsage( total=response.usage.input_tokens + response.usage.output_tokens, input=response.usage.input_tokens, output=response.usage.output_tokens, @@ -332,7 +332,7 @@ const response = await tracker.trackMetricsOf( For chat-loop applications, both SDKs expose a higher-level API that handles tracking end-to-end with no tracker calls at all: -- Python: `ai_client.create_model(...)` → `ManagedModel`, then `await model.invoke(user_input)` +- Python: `ai_client.create_model(...)` → `ManagedModel`, then `await model.run(user_input)` - Node: `aiClient.createModel(...)` → `ManagedModel`, then `await model.run(userInput)` The managed runner handles message history, provider dispatch (via the installed provider package — OpenAI, LangChain, Vercel), and tracker wiring. The runner creates its own tracker internally via the factory — you do **not** pass a tracker in. If the migration target is conversational, this is the right tier and you don't need anything from the tables above. diff --git a/skills/ai-configs/aiconfig-online-evals/SKILL.md b/skills/ai-configs/aiconfig-online-evals/SKILL.md index 631c76f..da05f0d 100644 --- a/skills/ai-configs/aiconfig-online-evals/SKILL.md +++ b/skills/ai-configs/aiconfig-online-evals/SKILL.md @@ -280,7 +280,7 @@ class AIConfigJudges: ## SDK: Automatic Evaluation -When using `create_chat()` + `invoke()`, attached judges evaluate automatically: +When using `create_model()` + `run()`, attached judges evaluate automatically: ```python import os @@ -307,25 +307,25 @@ async def async_main(): default_value = AICompletionConfigDefault(enabled=False) - # create_chat() initializes with judges from AI Config - chat = await aiclient.create_chat(ai_config_key, context, default_value, {}) + # create_model() initializes with judges from AI Config + model = await aiclient.create_model(ai_config_key, context, default_value, {}) - if not chat: - print(f"AI chat configuration not enabled for: {ai_config_key}") + if not model: + print(f"AI configuration not enabled for: {ai_config_key}") return user_input = 'How can LaunchDarkly help me?' - # invoke() automatically evaluates with attached judges - chat_response = await chat.invoke(user_input) - print("Response:", chat_response.message.content) + # run() automatically evaluates with attached judges + result = await model.run(user_input) + print("Response:", result.content) # Await evaluation results - if chat_response.evaluations and len(chat_response.evaluations) > 0: - eval_results = await asyncio.gather(*chat_response.evaluations) + if result.evaluations and len(result.evaluations) > 0: + eval_results = await asyncio.gather(*result.evaluations) results_to_display = [ - result.to_dict() if result is not None else "not evaluated" - for result in eval_results + r.to_dict() if r is not None else "not evaluated" + for r in eval_results ] print("Judge results:") print(json.dumps(results_to_display, indent=2, default=str)) @@ -448,7 +448,7 @@ After attaching judges: **Python SDK examples:** - [direct_judge_example.py](https://github.com/launchdarkly/hello-python-ai/blob/main/examples/direct_judge_example.py) - Evaluate input/output pairs directly -- [chat_judge_example.py](https://github.com/launchdarkly/hello-python-ai/blob/main/examples/chat_judge_example.py) - Automatic evaluation with create_chat/invoke +- [chat_judge_example.py](https://github.com/launchdarkly/hello-python-ai/blob/main/examples/chat_judge_example.py) - Automatic evaluation with create_model/run **Node.js SDK examples:** - [judge-evaluation](https://github.com/launchdarkly/js-core/blob/main/packages/sdk/server-ai/examples/judge-evaluation/src/index.ts) - Both direct evaluation and automatic chat-based evaluation From 9a327caff97332d2b75928271bac4b16bc70a4bb Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Fri, 15 May 2026 14:10:30 -0500 Subject: [PATCH 7/8] fix: Update stale example links to post-AIC-1934 / post-#1379 layout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Python `hello-python-ai/examples/` directory was emptied during the AIC-1934 reorg (examples moved under `features/` and `getting_started/`); the Node `js-core/packages/sdk/server-ai/examples/judge-evaluation/` dir was removed during the EXAM-SDK-example restructure in js-core #1379 (examples split into `features/` and `getting-started/`). Update the broken links in aiconfig-online-evals/SKILL.md: - direct_judge_example.py → features/create_judge/create_judge_example.py - chat_judge_example.py → features/create_model/create_model_example.py (chat-judge demo is now the managed-model example; attached judges fire automatically during `run`) - js-core judge-evaluation → split into features/create-judge (direct) and features/create-model (auto) All four target URLs verified via gh api. Refs AIC-2383 Co-Authored-By: Claude Opus 4.7 (1M context) --- skills/ai-configs/aiconfig-online-evals/SKILL.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/skills/ai-configs/aiconfig-online-evals/SKILL.md b/skills/ai-configs/aiconfig-online-evals/SKILL.md index da05f0d..8153d89 100644 --- a/skills/ai-configs/aiconfig-online-evals/SKILL.md +++ b/skills/ai-configs/aiconfig-online-evals/SKILL.md @@ -447,8 +447,9 @@ After attaching judges: - [Custom Judges](https://docs.launchdarkly.com/home/ai-configs/custom-judges) **Python SDK examples:** -- [direct_judge_example.py](https://github.com/launchdarkly/hello-python-ai/blob/main/examples/direct_judge_example.py) - Evaluate input/output pairs directly -- [chat_judge_example.py](https://github.com/launchdarkly/hello-python-ai/blob/main/examples/chat_judge_example.py) - Automatic evaluation with create_model/run +- [create_judge_example.py](https://github.com/launchdarkly/hello-python-ai/blob/main/features/create_judge/create_judge_example.py) - Evaluate input/output pairs directly via `create_judge` + `evaluate` +- [create_model_example.py](https://github.com/launchdarkly/hello-python-ai/blob/main/features/create_model/create_model_example.py) - Automatic evaluation with `create_model` + `run` (attached judges fire during the run) **Node.js SDK examples:** -- [judge-evaluation](https://github.com/launchdarkly/js-core/blob/main/packages/sdk/server-ai/examples/judge-evaluation/src/index.ts) - Both direct evaluation and automatic chat-based evaluation +- [features/create-judge](https://github.com/launchdarkly/js-core/blob/main/packages/sdk/server-ai/examples/features/create-judge/src/index.ts) - Evaluate input/output pairs directly via `createJudge` + `evaluate` +- [features/create-model](https://github.com/launchdarkly/js-core/blob/main/packages/sdk/server-ai/examples/features/create-model/src/index.ts) - Automatic evaluation with `createModel` + `run` (attached judges fire during the run) From ae5c0ce662a75ad584bfa31756050a5a2100d694 Mon Sep 17 00:00:00 2001 From: jsonbailey Date: Fri, 15 May 2026 17:08:05 -0500 Subject: [PATCH 8/8] fix: Correct create_judge usage and add pre-0.20 migration step MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three correctness fixes to the AI Configs skills, surfaced by sub-agent test runs against the modernized skill content: - Drop `await` from `ai_client.create_judge(...)` in three places (aiconfig-online-evals/SKILL.md, aiconfig-migrate/SKILL.md, and aiconfig-migrate/references/sdk-ai-tracker-patterns.md). The SDK's `create_judge` is synchronous and returns `Optional[Judge]`; awaiting it raises TypeError. `Judge.evaluate(...)` is still correctly awaited. - In the aiconfig-online-evals Direct Judge example, swap the default passed to `create_judge` from `AICompletionConfigDefault(enabled=False)` to `AIJudgeConfigDefault(enabled=False)`. The `default` parameter is typed `Optional[AIJudgeConfigDefault]` and the rule is already documented in sdk-ai-tracker-patterns.md — the example just wasn't following it. - Add an "On the current SDK API?" checklist item to step 1 of aiconfig-ai-metrics so that when the call site uses pre-0.20 `aiclient.config(...)` / `AIConfig(...)`, the agent migrates to `completion_config()`/`agent_config()` and `AICompletionConfigDefault`/ `AIAgentConfigDefault` before adding tracking, instead of stacking modern tracking on top of stale call shapes. Co-Authored-By: Claude Opus 4.7 --- skills/ai-configs/aiconfig-ai-metrics/SKILL.md | 5 +++++ skills/ai-configs/aiconfig-migrate/SKILL.md | 2 +- .../aiconfig-migrate/references/sdk-ai-tracker-patterns.md | 2 +- skills/ai-configs/aiconfig-online-evals/SKILL.md | 6 +++--- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/skills/ai-configs/aiconfig-ai-metrics/SKILL.md b/skills/ai-configs/aiconfig-ai-metrics/SKILL.md index b8b3448..5d4aadc 100644 --- a/skills/ai-configs/aiconfig-ai-metrics/SKILL.md +++ b/skills/ai-configs/aiconfig-ai-metrics/SKILL.md @@ -39,6 +39,11 @@ Before picking a tier, find the provider call and answer these questions: - [ ] **Streaming?** If yes, you'll need TTFT tracking, which means Tier 4 for the TTFT part even if the rest is Tier 2. - [ ] **Language?** Python or Node? Provider-package coverage differs between them. - [ ] **Already using an AI Config?** If not, route to `aiconfig-create` first — tracking requires a tracker, which is obtained by calling `create_tracker()` / `createTracker()` on the config object returned by `completion_config()` / `completionConfig()` / `createModel()`. +- [ ] **On the current SDK API?** If the call site uses `aiclient.config(...)` / `aiClient.config(...)` or constructs an `AIConfig(...)` / `LDAIConfig` default, it's on the pre-0.20 surface. Migrate it as part of this work before adding tracking: + - `aiclient.config(...)` → `aiclient.completion_config(...)` for one-shot/chat or `aiclient.agent_config(...)` for agent mode (mirror the call signature). Node is the same with camelCase. + - `AIConfig(...)` default → `AICompletionConfigDefault(...)` or `AIAgentConfigDefault(...)` (Node: `LDAICompletionConfigDefault` / `LDAIAgentConfigDefault`). `AIConfig` is the base class the SDK returns; it isn't a valid default-value constructor — the typed `*Default` variants are. + - If the result was being tuple-unpacked (`config, tracker = aiclient.config(...)`), drop the unpack — the new methods return a single config object. Obtain the tracker via `config.create_tracker()` / `aiConfig.createTracker()`. + - For deeper rewrites (call sites with hardcoded model/prompt as well), hand off to `aiconfig-migrate` instead of doing the full migration here. ### 2. Look up your Tier-2 option diff --git a/skills/ai-configs/aiconfig-migrate/SKILL.md b/skills/ai-configs/aiconfig-migrate/SKILL.md index 7b4b5bc..296f466 100644 --- a/skills/ai-configs/aiconfig-migrate/SKILL.md +++ b/skills/ai-configs/aiconfig-migrate/SKILL.md @@ -458,7 +458,7 @@ Hand off: print the AI Config key, variation key, provider, and whether the call ```python from ldai.client import AIJudgeConfigDefault - judge = await ai_client.create_judge( + judge = ai_client.create_judge( judge_key, # judge AI Config key in LD ld_context, AIJudgeConfigDefault(enabled=False), # fallback: skip eval on SDK miss diff --git a/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md b/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md index c9e401f..f39d813 100644 --- a/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md +++ b/skills/ai-configs/aiconfig-migrate/references/sdk-ai-tracker-patterns.md @@ -210,7 +210,7 @@ The full programmatic direct-judge pattern (Python): ```python from ldai.client import AIJudgeConfigDefault -judge = await ai_client.create_judge( +judge = ai_client.create_judge( judge_key, # judge AI Config key in LD ld_context, AIJudgeConfigDefault(enabled=False), # fallback: skip eval on SDK miss diff --git a/skills/ai-configs/aiconfig-online-evals/SKILL.md b/skills/ai-configs/aiconfig-online-evals/SKILL.md index 8153d89..051b37a 100644 --- a/skills/ai-configs/aiconfig-online-evals/SKILL.md +++ b/skills/ai-configs/aiconfig-online-evals/SKILL.md @@ -347,7 +347,7 @@ import asyncio import ldclient from ldclient import Context from ldclient.config import Config -from ldai import LDAIClient, AICompletionConfigDefault +from ldai import LDAIClient, AIJudgeConfigDefault sdk_key = os.getenv('LAUNCHDARKLY_SDK_KEY') judge_key = os.getenv('LAUNCHDARKLY_AI_JUDGE_KEY', 'sample-ai-judge-accuracy') @@ -363,10 +363,10 @@ async def async_main(): .build() ) - judge_default_value = AICompletionConfigDefault(enabled=False) + judge_default_value = AIJudgeConfigDefault(enabled=False) # Get judge configuration from LaunchDarkly - judge = await aiclient.create_judge(judge_key, context, judge_default_value) + judge = aiclient.create_judge(judge_key, context, judge_default_value) if not judge: print(f"AI judge configuration not enabled for key: {judge_key}")