Integrated Offline and Online Evaluations

Autoblocks gives control to the developer to reuse evaluators across testing and production.

Offline and online evaluation logic can be defined on a single evaluator class, allowing teams to easily reuse the same code across testing and production contexts. This provides a consistent and clean interface for specifying evaluators in both scenarios by encapsulating all logic in a single place, making it easy to maintain and update.

import {
  BaseEvaluator,
  type Evaluation,
  type TracerEvent,
} from '@autoblocks/client/testing';

type TestCaseType = { input: string };
type OutputType = string;

class IsProfessionalTone extends BaseEvaluator<TestCaseType, OutputType> {
  id = 'is-professional-tone';

  private async isProfessionalToneLlmScore(content: string): Promise<Evaluation> {
    // Your call to an LLM. Omitted for brevity
    return { score: 0.5, threshold: { gte: 0.5 } };
  }

  async evaluateTestCase(args: {
    testCase: TestCaseType;
    output: OutputType;
  }): Promise<Evaluation> {
    return await this.isProfessionalToneLlmScore(args.output);
  }

  async evaluateEvent(args: { event: TracerEvent }): Promise<Evaluation> {
    return await this.isProfessionalToneLlmScore(
      args.event.properties["response"]
    );
  }
}