import {
BaseTestEvaluator,
type Evaluation,
} from '@autoblocks/client/testing';
/**
* An evaluator is a class that subclasses BaseTestEvaluator.
*
* It must specify an ID, which is a unique identifier for the evaluator.
*
* It has two required type parameters:
* - TestCaseType: The type of your test cases.
* - OutputType: The type of the output returned by the function you are testing.
*/
class HasAllSubstrings extends BaseTestEvaluator<MyTestCase, string> {
id = 'has-all-substrings';
/**
* Evaluates the output of a test case.
*
* Required to be implemented by subclasses of BaseTestEvaluator.
* This method can be synchronous or asynchronous.
*/
evaluateTestCase(args: { testCase: MyTestCase; output: string }): Evaluation {
const missingSubstrings = args.testCase.expectedSubstrings.filter(
(s) => !args.output.includes(s),
);
const score = missingSubstrings.length ? 0 : 1;
return {
score,
threshold: {
// If the score is not greater than or equal to 1,
// this evaluation will be marked as a failure.
gte: 1,
},
metadata: {
// Include the missing substrings as metadata
// so that we can easily see which strings were
// missing when viewing a failed evaluation
// in the Autoblocks UI.
missingSubstrings,
},
};
}
}
class IsFriendly extends BaseTestEvaluator<MyTestCase, string> {
id = 'is-friendly';
// The maximum number of concurrent calls to `evaluateTestCase` allowed for this evaluator.
// Useful to avoid rate limiting from external services, such as an LLM provider.
maxConcurrency = 5;
async getScore(output: string): Promise<number> {
// Simulate doing work
await new Promise((resolve) => setTimeout(resolve, Math.random() * 1000));
// Simulate a friendliness score, e.g. as determined by an LLM.
return Math.random();
}
/**
* This can also be an async function. This is useful if you are interacting
* with an external service that requires async calls, such as OpenAI, or if
* the evaluation you are performing could benefit from concurrency.
*/
async evaluateTestCase(args: {
testCase: MyTestCase;
output: string;
}): Promise<Evaluation> {
const score = await this.getScore(args.output);
return {
score,
// Evaluations don't need thresholds attached to them.
// In this case, the evaluation will just consist of the score.
};
}
}