EvaluationExperiments
Experiment Runs
Start experiment runs, poll for completion, and view results across TypeScript, Python, and Java SDKs.
Experiment Runs
Create a Run
const run = await experimentRuns.create(
'experiment-id-abc', // experimentId
'output', // llmColumnName
5, // concurrency (1-100)
{ temperature: 0.0 } // optional runConfig
);
console.log(run.id, run.status); // 'PENDING'run = client.experiment_runs.create(
"cmnr8ovww005mif07uuxoafrq", # experiment_id
)
print(run["id"], run["status"])import com.browserstack.aisdk.eval.model.CreateExperimentRunRequest;
import com.browserstack.aisdk.eval.model.ExperimentRunResponse;
ExperimentRunResponse run = experimentRuns.create(
CreateExperimentRunRequest.builder()
.experimentId(experiment.getId())
.build()
);
System.out.println("Run ID: " + run.getId());
System.out.println("Status: " + run.getStatus());Poll for Completion
const result = await experimentRuns.subscribe(
run.id,
120_000, // timeout in ms
5_000 // poll interval in ms
);
if (result.finalStatus === 'COMPLETED') {
console.log('Run completed:', result.experimentRunData);
} else {
console.error('Run failed or timed out:', result.finalStatus);
}// Default: polls every 5s, times out after 5 minutes
ExperimentRunResponse finalRun = experimentRuns.subscribe(
run.getId(),
update -> System.out.println("Status: " + update.getStatus())
);
System.out.println("Final status: " + finalRun.getStatus());With a custom timeout:
long tenMinutes = 10 * 60 * 1_000L;
ExperimentRunResponse finalRun = experimentRuns.subscribe(
run.getId(),
update -> System.out.println("[" + update.getStatus() + "] " + update.getProgress()),
tenMinutes
);Without a callback:
ExperimentRunResponse finalRun = experimentRuns.subscribe(run.getId());Get a Run
const run = await experimentRuns.find('run-id-abc');
console.log(run.status);run = client.experiment_runs.find("run-id")ExperimentRunResponse run = experimentRuns.get("run_abc123");List Runs
const result = await experimentRuns.list(
'experiment-id-abc', // optional filter
1, // page
20 // limit
);
for (const r of result.experimentRuns) {
console.log(r.id, r.status, r.createdAt);
}runs = client.experiment_runs.list(experiment_id="experiment-id")// Runs for a specific experiment
ListExperimentRunsResponse runs = experimentRuns.list("exp_abc123");
// With pagination
ListExperimentRunsResponse runs = experimentRuns.list("exp_abc123", 1, 20);
// All runs (no experiment filter)
ListExperimentRunsResponse runs = experimentRuns.list(null, 1, 50);Update a Run
import { ExperimentRunStatus } from '@browserstack/ai-sdk';
await experimentRuns.update('run-id-abc', {
status: ExperimentRunStatus.COMPLETED,
result: { score: 0.87 },
metadata: { note: 'Manual override' },
});Delete a Run
await experimentRuns.delete('run-id-abc');Run Status Values
enum ExperimentRunStatus {
PENDING = 'PENDING',
RUNNING = 'RUNNING',
COMPLETED = 'COMPLETED',
FAILED = 'FAILED',
CANCELLED = 'CANCELLED',
}Complete End-to-End Example
import { AISDK, ExperimentRunStatus } from '@browserstack/ai-sdk';
const testOps = new AISDK({
publicKey: process.env.AISDK_PUBLIC_KEY,
secretKey: process.env.AISDK_SECRET_KEY,
});
async function runExperiment() {
const experiments = testOps.experiments;
const experimentRuns = testOps.experimentRuns;
// 1. Create the experiment
const experiment = await experiments.create({
name: `accuracy-test-${Date.now()}`,
description: 'Automated accuracy evaluation',
promptId: process.env.PROMPT_ID!,
datasetId: process.env.DATASET_ID!,
evaluatorListId: process.env.EVALUATOR_LIST_ID!,
concurrency: 5,
});
console.log('Created experiment:', experiment.id);
// 2. Start a run
const run = await experimentRuns.create(experiment.id, 'output', 5);
console.log('Started run:', run.id, 'status:', run.status);
// 3. Wait for completion
const result = await experimentRuns.subscribe(run.id, 300_000, 10_000);
if (result.finalStatus === 'COMPLETED') {
console.log('Experiment completed successfully!');
console.log('Run details:', result.experimentRunData);
} else {
console.error('Experiment did not complete:', result.finalStatus);
process.exit(1);
}
await testOps.shutdown();
}
runExperiment();import os
from browserstack_ai_sdk import AISDK
client = AISDK(
public_key=os.environ["AISDK_PUBLIC_KEY"],
secret_key=os.environ["AISDK_SECRET_KEY"],
)
# 1. Create dataset (see Datasets page)
client.datasets.create(name="support-qa")
client.datasets.create_items(
dataset_name="support-qa",
items=[
{"input": {"q": "How do I reset my password?"}, "expectedOutput": "Go to Settings > Security > Reset."},
{"input": {"q": "What are your business hours?"}, "expectedOutput": "We are open 9am–6pm EST."},
],
)
# 2. Create experiment
experiment = client.experiments.create({
"name": "support-qa-eval",
"evaluatorListId": "<evaluator-list-id>",
"datasetId": "<dataset-id>",
"promptId": "<prompt-id>",
})
# 3. Create a run
run = client.experiment_runs.create({
"experimentId": experiment["id"],
"name": "initial-run",
})
print(f"Experiment run created: {run['id']}")import com.browserstack.aisdk.TestOps;
import com.browserstack.aisdk.eval.model.*;
import java.util.List;
public class ExperimentExample {
public static void main(String[] args) throws InterruptedException {
TestOps sdk = TestOps.fromEnv();
// 1. Create a dataset
sdk.datasets().create("aurora-qa", "Q&A about Aurora Borealis");
sdk.datasets().createItems("aurora-qa", List.of(
CreateDatasetItemRequest.builder()
.input("What causes Northern Lights?")
.expectedOutput("Solar wind particles colliding with atmospheric gases.")
.build()
));
// 2. Create an experiment
ExperimentResponse experiment = sdk.experiments().create(
CreateExperimentRequest.builder()
.name("aurora-qa-v1")
.evaluatorListId("evl_abc123")
.promptId("prm_xyz789")
.datasetId("aurora-qa")
.build()
);
// 3. Start a run
ExperimentRunResponse run = sdk.experimentRuns().create(
CreateExperimentRunRequest.builder()
.experimentId(experiment.getId())
.build()
);
// 4. Wait for completion
ExperimentRunResponse result = sdk.experimentRuns().subscribe(
run.getId(),
r -> System.out.printf("[%s] %s%n", r.getStatus(), r.getId())
);
System.out.println("Done: " + result.getStatus());
sdk.shutdown();
}
}