Create an Experiment

You can create experiments from the Dashboard UI, the SDK (TypeScript, Python, Java), or the REST API. The UI supports all three output options (Prompt + Dataset, API, Dataset Run Tag); the SDK supports Prompt + Dataset and Dataset Run Tag.

From the Dashboard

Open the Experiments page

Navigate to Evaluation > Experiments in the left sidebar. Click New Experiment in the top-right. (On first use, the empty state shows a Create Experiment button with onboarding copy.)

Step 1 — Experiment Details

Fill in the basic details:

Experiment Name (required) — unique name for this experiment (up to 500 characters)
Description (optional) — free-form notes
Concurrency (required) — number of items evaluated in parallel: 1, 2, 3, 5, or 10. Default is 3

Click Next.

Step 2 — Dataset and Configuration

First pick the dataset to run the experiment against:

Dataset (required) — choose from your existing datasets, or click New dataset at the bottom of the dropdown to create one inline.

Then choose how outputs will be generated. Three options appear as cards:

Option	When to use
Prompt	Generate outputs by running a prompt (with a model) against each dataset item.
API	Call an HTTP endpoint for each item and evaluate the response. Use this for RAG, agents, or any custom pipeline.
Dataset Run Tag	Skip generation — evaluate outputs from an existing tagged dataset run. Only appears when the dataset has tags.

Click Next.

Step 3 — Select Evaluator List

Choose the evaluator list that will score the outputs.

A warning appears if the list mixes single-turn and multi-turn evaluators — invalid evaluators will error during the run.
If any evaluator needs an LLM but no API key is configured for its provider, a fallback notice appears: the evaluator will use your project's Default Evaluation Provider instead.
If the dataset has multi-scoring enabled, an extra section lets you map scoring criteria to specific evaluators.

Click Create experiment.

Run the experiment

After creation, the experiment appears in the list. Click into it to start a run or view results.

From the SDK

The SDK supports the Prompt + Dataset and Dataset Run Tag options. The API option is UI-only.

The SDK provides two equivalent ways to access experiment methods:

import { AISDK, Evaluate } from '@browserstack/ai-sdk';

const client = new AISDK();

// Direct access
await client.experiments.create({ ... });
await client.experimentRuns.create('experiment-id');

// Via evaluate namespace on client instance
await client.evaluate.experiment.create({ ... });
await client.evaluate.experimentRun.create('experiment-id');

// Standalone (reads keys from env, no client needed)
await Evaluate.experiment.create({ ... });
await Evaluate.experimentRun.create('experiment-id');

from browserstack_ai_sdk import AISDK, Evaluate

client = AISDK()

# Direct access
client.experiments.create({ ... })
client.experiment_runs.create("experiment-id")

# Via evaluate namespace on client instance
client.evaluate.experiment.create({ ... })
client.evaluate.experiment_run.create("experiment-id")

# Standalone (reads keys from env, no client needed)
Evaluate.experiment.create({ ... })
Evaluate.experiment_run.create("experiment-id")

Both paths call the same underlying API. The examples below use the direct access pattern.

Setup

import { AISDK } from '@browserstack/ai-sdk';

const client = new AISDK({
  publicKey: process.env.AISDK_PUBLIC_KEY,
  secretKey: process.env.AISDK_SECRET_KEY,
});

const experiments = client.experiments;       // Experiments
const experimentRuns = client.experimentRuns; // ExperimentRuns

import os
from browserstack_ai_sdk import AISDK

client = AISDK(
    public_key=os.environ["AISDK_PUBLIC_KEY"],
    secret_key=os.environ["AISDK_SECRET_KEY"],
)

import com.browserstack.aisdk.TestOps;
import com.browserstack.aisdk.eval.ExperimentsClient;
import com.browserstack.aisdk.eval.ExperimentRunsClient;

TestOps sdk = TestOps.fromEnv();
ExperimentsClient experiments = sdk.experiments();
ExperimentRunsClient experimentRuns = sdk.experimentRuns();

With a Prompt and Dataset

An experiment requires a name, an evaluator list, and either datasetRunTagId or both promptId + datasetId.

import { AISDK } from '@browserstack/ai-sdk';

const client = new AISDK({
  publicKey: process.env.AISDK_PUBLIC_KEY,
  secretKey: process.env.AISDK_SECRET_KEY,
});

const experiment = await client.experiments.create({
      name: `prompt-comparison-${Date.now()}`,
      promptId: promptId,
      datasetId: dataset.id,
      evaluatorListId: evaluatorListId,
    });

console.log(experiment.id);

experiment = client.experiments.create({
    "name": "rag-eval-v2",
    "evaluatorListId": "eval-list-id",
    "datasetId": "dataset-id",
    "promptId": "prompt-id",
})

import com.browserstack.aisdk.eval.model.CreateExperimentRequest;
import com.browserstack.aisdk.eval.model.ExperimentResponse;

ExperimentResponse experiment = experiments.create(
    CreateExperimentRequest.builder()
        .name("gpt-4o-faithfulness-v2")
        .evaluatorListId("evl_abc123")
        .promptId("prm_xyz789")
        .datasetId("ds_def456")
        .concurrency(5)
        .build()
);

System.out.println("Experiment ID: " + experiment.getId());

You must provide either datasetRunTagId alone, or both promptId and datasetId together. Mixing them throws IllegalArgumentException.

With a Dataset Run Tag

const experiment = await client.experiments.create({
  name: `tag-experiment-${Date.now()}`,
  datasetRunTagId: tagId,
  evaluatorListId: evaluatorListId,
});
console.log('Experiment created:', experiment.id);

experiment = client.experiments.create({
    "name": "rag-eval-v1",
    "evaluatorListId": "eval-list-id",
    "datasetRunTagId": "tag-id-from-dataset-run",
})
print(f"Experiment created: {experiment['id']}")

Complete Example

This example creates all prerequisites (dataset, prompt, evaluator list) and then creates an experiment end-to-end.

import { AISDK } from '@browserstack/ai-sdk';

const client = new AISDK({
  publicKey: process.env.AISDK_PUBLIC_KEY,
  secretKey: process.env.AISDK_SECRET_KEY,
});

// 1. Create a dataset
const dataset = await client.datasets.create(
  'qa-evaluation-dataset',
  'Dataset for QA model evaluation'
);
console.log(`Dataset created: ${dataset.id}`);

// 2. Create a prompt
const prompt = await client.prompt.create({
  name: 'docs_exp_prompt',
  type: 'text',
  prompt: 'Answer: {{question}}',
});
const promptId = prompt.promptResponse.id;
console.log(`Prompt created: ${promptId}`);

// 3. Get an evaluator list (or create one)
const evalLists = await client.evalsList.list(5);
for (const el of evalLists.evaluators) {
  console.log(el.id, el.name);
}
const evaluatorListId = evalLists.evaluators[0].id;

// 4. Create the experiment with prompt and dataset
const experiment = await client.experiments.create({
  name: `prompt-comparison-${Date.now()}`,
  promptId: promptId,
  datasetId: dataset.id,
  evaluatorListId: evaluatorListId,
});
console.log(`Experiment created: ${experiment.id}`);

import os
from browserstack_ai_sdk import AISDK

client = AISDK(
    public_key=os.environ["AISDK_PUBLIC_KEY"],
    secret_key=os.environ["AISDK_SECRET_KEY"],
)

# 1. Create a dataset
dataset = client.datasets.create(
    "qa-evaluation-dataset",
    "Dataset for QA model evaluation",
)
print(f"Dataset created: {dataset['id']}")

# 2. Create a prompt
prompt = client.prompt.create(
    name="sample_prompt",
    type="text",
    prompt="Answer: {{question}}",
)
# prompt_id = prompt["promptResponse"]["id"]
print(f"Prompt created: ", prompt["id"])

# 3. Get an evaluator list (or create one)
eval_lists = client.evaluator_lists.list(5)
for el in eval_lists["evaluators"]:
    print(el["id"], el["name"])
evaluator_list_id = eval_lists["evaluators"][0]["id"]

# 4. Create the experiment
experiment = client.experiments.create({
    "name": "qa-eval-experiment",
    "promptId": prompt["id"],
    "datasetId": dataset["id"],
    "evaluatorListId": evaluator_list_id,
})
print(f"Experiment created: {experiment['id']}")

List Experiments

const result = await experiments.list(
  20, // limit
  1   // page
);

for (const exp of result.experiments) {
  console.log(exp.id, exp.name, exp.createdAt);
}
console.log('Total:', result.totalCount);

result = client.experiments.list(limit=10, page=1)

for exp in result["experiments"]:
    print(exp["id"], exp["name"], exp["createdAt"])

print("Total:", result["totalCount"])

// Default (limit=50, page=1)
ListExperimentsResponse list = experiments.list();

// With pagination
ListExperimentsResponse list = experiments.list(20, 1);

list.getData().forEach(e -> System.out.println(e.getId() + " " + e.getName()));

Get an Experiment

const experiment = await experiments.find('experiment-id-abc');
console.log(experiment.name);

experiment = client.experiments.get("experiment-id")
print(experiment["name"])

ExperimentResponse experiment = experiments.find("exp_abc123");
System.out.println("Status: " + experiment.getStatus());

Create an Experiment

Prompt configuration

API configuration

Dataset Run Tag configuration

On this page