import {
  BaseTestEvaluator,
  type Evaluation,
} from '@autoblocks/client/testing';
/**
 * An evaluator is a class that subclasses BaseTestEvaluator.
 *
 * It must specify an ID, which is a unique identifier for the evaluator.
 *
 * It has two required type parameters:
 * - TestCaseType: The type of your test cases.
 * - OutputType: The type of the output returned by the function you are testing.
 */
class HasAllSubstrings extends BaseTestEvaluator<MyTestCase, string> {
  id = 'has-all-substrings';
  /**
   * Evaluates the output of a test case.
   *
   * Required to be implemented by subclasses of BaseTestEvaluator.
   * This method can be synchronous or asynchronous.
   */
  evaluateTestCase(args: { testCase: MyTestCase; output: string }): Evaluation {
    const missingSubstrings = args.testCase.expectedSubstrings.filter(
      (s) => !args.output.includes(s),
    );
    const score = missingSubstrings.length ? 0 : 1;
    return {
      score,
      threshold: {
        // If the score is not greater than or equal to 1,
        // this evaluation will be marked as a failure.
        gte: 1,
      },
      metadata: {
        // Include the missing substrings as metadata
        // so that we can easily see which strings were
        // missing when viewing a failed evaluation
        // in the Autoblocks UI.
        missingSubstrings,
      },
    };
  }
}
class IsFriendly extends BaseTestEvaluator<MyTestCase, string> {
  id = 'is-friendly';
  // The maximum number of concurrent calls to `evaluateTestCase` allowed for this evaluator.
  // Useful to avoid rate limiting from external services, such as an LLM provider.
  maxConcurrency = 5;
  async getScore(output: string): Promise<number> {
    // Simulate doing work
    await new Promise((resolve) => setTimeout(resolve, Math.random() * 1000));
    // Simulate a friendliness score, e.g. as determined by an LLM.
    return Math.random();
  }
  /**
   * This can also be an async function. This is useful if you are interacting
   * with an external service that requires async calls, such as OpenAI, or if
   * the evaluation you are performing could benefit from concurrency.
   */
  async evaluateTestCase(args: {
    testCase: MyTestCase;
    output: string;
  }): Promise<Evaluation> {
    const score = await this.getScore(args.output);
    return {
      score,
      // Evaluations don't need thresholds attached to them.
      // In this case, the evaluation will just consist of the score.
    };
  }
}