EvaluationEvaluators
Create Evaluator Lists
Build evaluator lists with metrics and parameter configurations across TypeScript, Python, and Java SDKs.
Create Evaluator Lists
Setup
import { AISDK } from '@browserstack/ai-sdk';
const testOps = new AISDK({
publicKey: process.env.AISDK_PUBLIC_KEY,
secretKey: process.env.AISDK_SECRET_KEY,
});
const evalsList = testOps.evalsList; // EvaluatorLists
const evals = testOps.evals; // EvaluationExecutionimport os
from browserstack_ai_sdk import AISDK
client = AISDK(
public_key=os.environ["AISDK_PUBLIC_KEY"],
secret_key=os.environ["AISDK_SECRET_KEY"],
)import com.browserstack.aisdk.TestOps;
import com.browserstack.aisdk.eval.EvaluatorListsClient;
import com.browserstack.aisdk.eval.model.*;
TestOps sdk = TestOps.fromEnv();
EvaluatorListsClient evaluatorLists = sdk.evaluatorLists();Create an Evaluator List
import { EvaluatorParamDataType } from '@browserstack/ai-sdk';
const list = await evalsList.create({
name: 'rag-quality-metrics',
description: 'Evaluators for RAG pipeline quality',
evaluators: [
{
evaluatorId: 'faithfulness-evaluator-id',
params: [
{ key: 'threshold', value: '0.8', dataType: EvaluatorParamDataType.FLOAT },
],
},
{
evaluatorId: 'relevance-evaluator-id',
params: [
{ key: 'model', value: 'gpt-4o', dataType: EvaluatorParamDataType.STRING },
],
},
],
});
console.log(list.id, list.name);Parameter data types:
enum EvaluatorParamDataType {
STRING = 'string',
INTEGER = 'integer',
FLOAT = 'float',
BOOLEAN = 'boolean',
STRING_ARRAY = 'string[]',
INTEGER_ARRAY = 'integer[]',
FLOAT_ARRAY = 'float[]',
BOOLEAN_ARRAY = 'boolean[]',
OBJECT = 'object',
}result = client.evaluator_lists.create({
"name": "qa-evaluators",
"evaluators": [
{
"evaluatorId": "correctness-evaluator-id",
"params": [
{"key": "threshold", "dataType": "float"},
{"key": "strict", "dataType": "boolean"},
],
},
{
"evaluatorId": "faithfulness-evaluator-id",
"params": [
{"key": "context_key", "dataType": "string"},
],
},
],
})
print(result)Supported dataType values: string, integer, float, boolean, string[], integer[], float[], boolean[], list, dict
import com.browserstack.aisdk.eval.model.CreateEvaluatorListRequest;
import com.browserstack.aisdk.eval.model.CreateEvaluatorListRequest.EvaluatorConfig;
import java.util.List;
import java.util.Map;
CreateEvaluatorListRequest request = CreateEvaluatorListRequest.builder()
.name("rag-quality-suite")
.evaluators(List.of(
EvaluatorConfig.builder()
.evaluatorId("faithfulness")
.build(),
EvaluatorConfig.builder()
.evaluatorId("answer-relevancy")
.build(),
EvaluatorConfig.builder()
.evaluatorId("llm-judge")
.params(List.of(
Map.of(
"key", "rubric",
"value", "Score 1 if the answer is factually correct, 0 otherwise.",
"dataType", "STRING"
)
))
.build()
))
.build();
EvaluatorListResponse result = evaluatorLists.create(request);
System.out.println("Created evaluator list: " + result.getId());Each parameter map must have key (String), dataType (one of STRING, NUMBER, BOOLEAN, JSON), and optionally value.
List Evaluator Lists
const result = await evalsList.list(
20, // limit (1-100)
1, // page
{ column: 'createdAt', order: 'DESC' } // optional sort
);
for (const list of result.evaluators) {
console.log(list.id, list.name, list.evaluatorConfigs.length, 'evaluators');
}
console.log('Total:', result.totalCount);result = client.evaluator_lists.list(page=1, limit=50)
print(f"Total: {result.get('total')}")
for ev_list in result.get("data", []):
print(ev_list["name"], ev_list["id"])With ordering:
result = client.evaluator_lists.list(
page=1,
limit=20,
order_by={"column": "createdAt", "order": "desc"},
)// Default (limit=50, page=1)
ListEvaluatorListsResponse list = evaluatorLists.list();
// Custom pagination
ListEvaluatorListsResponse list = evaluatorLists.list(20, 1);
// With ordering
ListEvaluatorListsResponse list = evaluatorLists.list(
20, 1,
EvaluatorListsClient.OrderBy.of("createdAt", "DESC")
);
list.getData().forEach(el ->
System.out.println(el.getId() + " — " + el.getName())
);Get an Evaluator List
const list = await evalsList.get('evaluator-list-id-abc');
console.log(list.name);
for (const config of list.evaluatorConfigs) {
console.log(config.evaluator?.name, config.params);
}ev_list = client.evaluator_lists.get("evaluator-list-id")
print(ev_list)EvaluatorListResponse el = evaluatorLists.find("evl_abc123");
System.out.println("Name: " + el.getName());
System.out.println("Evaluators: " + el.getEvaluators().size());Delete an Evaluator List
await evalsList.delete('evaluator-list-id-abc');deleted = client.evaluator_lists.delete("evaluator-list-id")
print(deleted)EvaluatorListResponse deleted = evaluatorLists.deleteEvaluatorList("evl_abc123");Standalone Usage (Python)
EvaluatorListsClient can also be used directly without an AISDK instance:
import os
from browserstack_ai_sdk import EvaluatorListsClient
from browserstack_ai_sdk.config import TestOpsConfig
config = TestOpsConfig(
public_key=os.environ["AISDK_PUBLIC_KEY"],
secret_key=os.environ["AISDK_SECRET_KEY"],
)
client = EvaluatorListsClient(config=config)
result = client.create({
"name": "minimal-list",
"evaluators": [
{
"evaluatorId": "exact-match-id",
"params": [{"key": "case_sensitive", "dataType": "boolean"}],
}
],
})