Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion lib/braintrust/api/internal/experiments.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def initialize(state)
# @param tags [Array<String>, nil] Optional tags
# @param metadata [Hash, nil] Optional metadata
# @return [Hash] Experiment data with "id", "name", "project_id", etc.
def create(name:, project_id:, ensure_new: true, tags: nil, metadata: nil)
def create(name:, project_id:, ensure_new: true, tags: nil, metadata: nil,
dataset_id: nil, dataset_version: nil)
uri = URI("#{@state.api_url}/v1/experiment")

payload = {
Expand All @@ -32,6 +33,8 @@ def create(name:, project_id:, ensure_new: true, tags: nil, metadata: nil)
}
payload[:tags] = tags if tags
payload[:metadata] = metadata if metadata
payload[:dataset_id] = dataset_id if dataset_id
payload[:dataset_version] = dataset_version if dataset_version

request = Net::HTTP::Post.new(uri)
request["Content-Type"] = "application/json"
Expand Down
26 changes: 21 additions & 5 deletions lib/braintrust/eval.rb
Original file line number Diff line number Diff line change
Expand Up @@ -220,8 +220,14 @@ def run(project:, experiment:, task:, scorers:,
api.login

# Resolve dataset to cases if dataset parameter provided
dataset_id = nil
dataset_version = nil

if dataset
cases = resolve_dataset(dataset, project, api)
resolved = resolve_dataset(dataset, project, api)
cases = resolved[:cases]
dataset_id = resolved[:dataset_id]
dataset_version = resolved[:dataset_version]
end

# Register project and experiment via internal API
Expand All @@ -234,7 +240,9 @@ def run(project:, experiment:, task:, scorers:,
project_id: project_result["id"],
ensure_new: !update,
tags: tags,
metadata: metadata
metadata: metadata,
dataset_id: dataset_id,
dataset_version: dataset_version
)

experiment_id = experiment_result["id"]
Expand Down Expand Up @@ -292,11 +300,11 @@ def validate_params!(project:, experiment:, cases:, dataset:, task:, scorers:)
end
end

# Resolve dataset parameter to an array of case records
# Resolve dataset parameter to cases with metadata for experiment linking
# @param dataset [String, Hash, Dataset] Dataset specifier or instance
# @param project [String] Project name (used as default if not specified)
# @param api [API] Braintrust API client
# @return [Array<Hash>] Array of case records
# @return [Hash] Hash with :cases, :dataset_id, and :dataset_version
def resolve_dataset(dataset, project, api)
limit = nil

Expand All @@ -315,7 +323,15 @@ def resolve_dataset(dataset, project, api)
raise ArgumentError, "dataset must be String, Hash, or Dataset, got #{dataset.class}"
end

dataset_obj.fetch_all(limit: limit)
cases = dataset_obj.fetch_all(limit: limit)

# Use pinned version if available, otherwise compute from max(_xact_id)
version = dataset_obj.version
version ||= cases
.filter_map { |c| c[:origin] && JSON.parse(c[:origin])["_xact_id"] }
.max

{cases: cases, dataset_id: dataset_obj.id, dataset_version: version}
end
end
end
Expand Down
38 changes: 38 additions & 0 deletions test/braintrust/eval_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -837,6 +837,44 @@ def test_eval_with_remote_dataset_sets_origin_from_api_response
assert origin["id"], "origin.id (record id) should be present"
assert origin["_xact_id"], "origin._xact_id should be present"
end

# Verify experiment was linked to dataset via the actual HTTP request
assert_requested :post, %r{v1/experiment} do |req|
body = JSON.parse(req.body)
assert_equal dataset_id, body["dataset_id"],
"Expected dataset_id in experiment creation payload"
assert body["dataset_version"],
"Expected dataset_version in experiment creation payload"
end
end
end

def test_eval_run_without_dataset_does_not_send_dataset_fields
# When no dataset is provided, dataset_id and dataset_version should be nil
VCR.use_cassette("eval/run_basic") do
api = get_integration_test_api

task = ->(input) { input.upcase }
scorer = Braintrust::Eval.scorer("exact") { |i, e, o| (o == e) ? 1.0 : 0.0 }

Braintrust::Eval.run(
project: "ruby-sdk-test",
experiment: "test-ruby-sdk-basic",
cases: [{input: "hello", expected: "HELLO"}],
task: task,
scorers: [scorer],
api: api,
quiet: true
)

# Verify experiment creation did not include dataset fields
assert_requested :post, /v1\/experiment/ do |req|
body = JSON.parse(req.body)
assert_nil body["dataset_id"],
"Expected no dataset_id when no dataset provided"
assert_nil body["dataset_version"],
"Expected no dataset_version when no dataset provided"
end
end
end
end