diff --git a/lib/braintrust/api/internal/experiments.rb b/lib/braintrust/api/internal/experiments.rb index 9c09f92..b2e2ea5 100644 --- a/lib/braintrust/api/internal/experiments.rb +++ b/lib/braintrust/api/internal/experiments.rb @@ -22,7 +22,8 @@ def initialize(state) # @param tags [Array, nil] Optional tags # @param metadata [Hash, nil] Optional metadata # @return [Hash] Experiment data with "id", "name", "project_id", etc. - def create(name:, project_id:, ensure_new: true, tags: nil, metadata: nil) + def create(name:, project_id:, ensure_new: true, tags: nil, metadata: nil, + dataset_id: nil, dataset_version: nil) uri = URI("#{@state.api_url}/v1/experiment") payload = { @@ -32,6 +33,8 @@ def create(name:, project_id:, ensure_new: true, tags: nil, metadata: nil) } payload[:tags] = tags if tags payload[:metadata] = metadata if metadata + payload[:dataset_id] = dataset_id if dataset_id + payload[:dataset_version] = dataset_version if dataset_version request = Net::HTTP::Post.new(uri) request["Content-Type"] = "application/json" diff --git a/lib/braintrust/eval.rb b/lib/braintrust/eval.rb index 0f59326..d5c154a 100644 --- a/lib/braintrust/eval.rb +++ b/lib/braintrust/eval.rb @@ -220,8 +220,14 @@ def run(project:, experiment:, task:, scorers:, api.login # Resolve dataset to cases if dataset parameter provided + dataset_id = nil + dataset_version = nil + if dataset - cases = resolve_dataset(dataset, project, api) + resolved = resolve_dataset(dataset, project, api) + cases = resolved[:cases] + dataset_id = resolved[:dataset_id] + dataset_version = resolved[:dataset_version] end # Register project and experiment via internal API @@ -234,7 +240,9 @@ def run(project:, experiment:, task:, scorers:, project_id: project_result["id"], ensure_new: !update, tags: tags, - metadata: metadata + metadata: metadata, + dataset_id: dataset_id, + dataset_version: dataset_version ) experiment_id = experiment_result["id"] @@ -292,11 +300,11 @@ def validate_params!(project:, experiment:, cases:, dataset:, task:, scorers:) end end - # Resolve dataset parameter to an array of case records + # Resolve dataset parameter to cases with metadata for experiment linking # @param dataset [String, Hash, Dataset] Dataset specifier or instance # @param project [String] Project name (used as default if not specified) # @param api [API] Braintrust API client - # @return [Array] Array of case records + # @return [Hash] Hash with :cases, :dataset_id, and :dataset_version def resolve_dataset(dataset, project, api) limit = nil @@ -315,7 +323,15 @@ def resolve_dataset(dataset, project, api) raise ArgumentError, "dataset must be String, Hash, or Dataset, got #{dataset.class}" end - dataset_obj.fetch_all(limit: limit) + cases = dataset_obj.fetch_all(limit: limit) + + # Use pinned version if available, otherwise compute from max(_xact_id) + version = dataset_obj.version + version ||= cases + .filter_map { |c| c[:origin] && JSON.parse(c[:origin])["_xact_id"] } + .max + + {cases: cases, dataset_id: dataset_obj.id, dataset_version: version} end end end diff --git a/test/braintrust/eval_test.rb b/test/braintrust/eval_test.rb index 17511f8..a9d6530 100644 --- a/test/braintrust/eval_test.rb +++ b/test/braintrust/eval_test.rb @@ -837,6 +837,44 @@ def test_eval_with_remote_dataset_sets_origin_from_api_response assert origin["id"], "origin.id (record id) should be present" assert origin["_xact_id"], "origin._xact_id should be present" end + + # Verify experiment was linked to dataset via the actual HTTP request + assert_requested :post, %r{v1/experiment} do |req| + body = JSON.parse(req.body) + assert_equal dataset_id, body["dataset_id"], + "Expected dataset_id in experiment creation payload" + assert body["dataset_version"], + "Expected dataset_version in experiment creation payload" + end + end + end + + def test_eval_run_without_dataset_does_not_send_dataset_fields + # When no dataset is provided, dataset_id and dataset_version should be nil + VCR.use_cassette("eval/run_basic") do + api = get_integration_test_api + + task = ->(input) { input.upcase } + scorer = Braintrust::Eval.scorer("exact") { |i, e, o| (o == e) ? 1.0 : 0.0 } + + Braintrust::Eval.run( + project: "ruby-sdk-test", + experiment: "test-ruby-sdk-basic", + cases: [{input: "hello", expected: "HELLO"}], + task: task, + scorers: [scorer], + api: api, + quiet: true + ) + + # Verify experiment creation did not include dataset fields + assert_requested :post, /v1\/experiment/ do |req| + body = JSON.parse(req.body) + assert_nil body["dataset_id"], + "Expected no dataset_id when no dataset provided" + assert_nil body["dataset_version"], + "Expected no dataset_version when no dataset provided" + end end end end