BrowserStack AI Evals
EvaluationEvaluators

Create Evaluator Lists

Build evaluator lists with metrics and parameter configurations across TypeScript, Python, and Java SDKs.

Create Evaluator Lists

Setup

import { AISDK } from '@browserstack/ai-sdk';

const testOps = new AISDK({
  publicKey: process.env.AISDK_PUBLIC_KEY,
  secretKey: process.env.AISDK_SECRET_KEY,
});

const evalsList = testOps.evalsList; // EvaluatorLists
const evals = testOps.evals;         // EvaluationExecution
import os
from browserstack_ai_sdk import AISDK

client = AISDK(
    public_key=os.environ["AISDK_PUBLIC_KEY"],
    secret_key=os.environ["AISDK_SECRET_KEY"],
)
import com.browserstack.aisdk.TestOps;
import com.browserstack.aisdk.eval.EvaluatorListsClient;
import com.browserstack.aisdk.eval.model.*;

TestOps sdk = TestOps.fromEnv();
EvaluatorListsClient evaluatorLists = sdk.evaluatorLists();

Create an Evaluator List

import { EvaluatorParamDataType } from '@browserstack/ai-sdk';

const list = await evalsList.create({
  name: 'rag-quality-metrics',
  description: 'Evaluators for RAG pipeline quality',
  evaluators: [
    {
      evaluatorId: 'faithfulness-evaluator-id',
      params: [
        { key: 'threshold', value: '0.8', dataType: EvaluatorParamDataType.FLOAT },
      ],
    },
    {
      evaluatorId: 'relevance-evaluator-id',
      params: [
        { key: 'model', value: 'gpt-4o', dataType: EvaluatorParamDataType.STRING },
      ],
    },
  ],
});

console.log(list.id, list.name);

Parameter data types:

enum EvaluatorParamDataType {
  STRING        = 'string',
  INTEGER       = 'integer',
  FLOAT         = 'float',
  BOOLEAN       = 'boolean',
  STRING_ARRAY  = 'string[]',
  INTEGER_ARRAY = 'integer[]',
  FLOAT_ARRAY   = 'float[]',
  BOOLEAN_ARRAY = 'boolean[]',
  OBJECT        = 'object',
}
result = client.evaluator_lists.create({
    "name": "qa-evaluators",
    "evaluators": [
        {
            "evaluatorId": "correctness-evaluator-id",
            "params": [
                {"key": "threshold", "dataType": "float"},
                {"key": "strict", "dataType": "boolean"},
            ],
        },
        {
            "evaluatorId": "faithfulness-evaluator-id",
            "params": [
                {"key": "context_key", "dataType": "string"},
            ],
        },
    ],
})

print(result)

Supported dataType values: string, integer, float, boolean, string[], integer[], float[], boolean[], list, dict

import com.browserstack.aisdk.eval.model.CreateEvaluatorListRequest;
import com.browserstack.aisdk.eval.model.CreateEvaluatorListRequest.EvaluatorConfig;
import java.util.List;
import java.util.Map;

CreateEvaluatorListRequest request = CreateEvaluatorListRequest.builder()
    .name("rag-quality-suite")
    .evaluators(List.of(
        EvaluatorConfig.builder()
            .evaluatorId("faithfulness")
            .build(),

        EvaluatorConfig.builder()
            .evaluatorId("answer-relevancy")
            .build(),

        EvaluatorConfig.builder()
            .evaluatorId("llm-judge")
            .params(List.of(
                Map.of(
                    "key", "rubric",
                    "value", "Score 1 if the answer is factually correct, 0 otherwise.",
                    "dataType", "STRING"
                )
            ))
            .build()
    ))
    .build();

EvaluatorListResponse result = evaluatorLists.create(request);
System.out.println("Created evaluator list: " + result.getId());

Each parameter map must have key (String), dataType (one of STRING, NUMBER, BOOLEAN, JSON), and optionally value.

List Evaluator Lists

const result = await evalsList.list(
  20,                                    // limit (1-100)
  1,                                     // page
  { column: 'createdAt', order: 'DESC' } // optional sort
);

for (const list of result.evaluators) {
  console.log(list.id, list.name, list.evaluatorConfigs.length, 'evaluators');
}
console.log('Total:', result.totalCount);
result = client.evaluator_lists.list(page=1, limit=50)

print(f"Total: {result.get('total')}")
for ev_list in result.get("data", []):
    print(ev_list["name"], ev_list["id"])

With ordering:

result = client.evaluator_lists.list(
    page=1,
    limit=20,
    order_by={"column": "createdAt", "order": "desc"},
)
// Default (limit=50, page=1)
ListEvaluatorListsResponse list = evaluatorLists.list();

// Custom pagination
ListEvaluatorListsResponse list = evaluatorLists.list(20, 1);

// With ordering
ListEvaluatorListsResponse list = evaluatorLists.list(
    20, 1,
    EvaluatorListsClient.OrderBy.of("createdAt", "DESC")
);

list.getData().forEach(el ->
    System.out.println(el.getId() + " — " + el.getName())
);

Get an Evaluator List

const list = await evalsList.get('evaluator-list-id-abc');

console.log(list.name);
for (const config of list.evaluatorConfigs) {
  console.log(config.evaluator?.name, config.params);
}
ev_list = client.evaluator_lists.get("evaluator-list-id")
print(ev_list)
EvaluatorListResponse el = evaluatorLists.find("evl_abc123");
System.out.println("Name: " + el.getName());
System.out.println("Evaluators: " + el.getEvaluators().size());

Delete an Evaluator List

await evalsList.delete('evaluator-list-id-abc');
deleted = client.evaluator_lists.delete("evaluator-list-id")
print(deleted)
EvaluatorListResponse deleted = evaluatorLists.deleteEvaluatorList("evl_abc123");

Standalone Usage (Python)

EvaluatorListsClient can also be used directly without an AISDK instance:

import os
from browserstack_ai_sdk import EvaluatorListsClient
from browserstack_ai_sdk.config import TestOpsConfig

config = TestOpsConfig(
    public_key=os.environ["AISDK_PUBLIC_KEY"],
    secret_key=os.environ["AISDK_SECRET_KEY"],
)

client = EvaluatorListsClient(config=config)

result = client.create({
    "name": "minimal-list",
    "evaluators": [
        {
            "evaluatorId": "exact-match-id",
            "params": [{"key": "case_sensitive", "dataType": "boolean"}],
        }
    ],
})