import random
import asyncio
from autoblocks.testing.models import BaseTestEvaluator
from autoblocks.testing.models import Evaluation
from autoblocks.testing.models import Threshold
class HasAllSubstrings(BaseTestEvaluator):
"""
An evaluator is a class that subclasses BaseTestEvaluator.
It must specify an ID, which is a unique identifier for the evaluator.
"""
id = "has-all-substrings"
def evaluate_test_case(self, test_case: MyTestCase, output: str) -> Evaluation:
"""
Evaluates the output of a test case.
Required to be implemented by subclasses of BaseTestEvaluator.
This method can be synchronous or asynchronous.
"""
missing_substrings = [s for s in test_case.expected_substrings if s not in output]
score = 0 if missing_substrings else 1
return Evaluation(
score=score,
# If the score is not greater than or equal to 1,
# this evaluation will be marked as a failure.
threshold=Threshold(gte=1),
metadata=dict(
# Include the missing substrings as metadata
# so that we can easily see which strings were
# missing when viewing a failed evaluation
# in the Autoblocks UI.
missing_substrings=missing_substrings,
),
)
class IsFriendly(BaseTestEvaluator):
id = "is-friendly"
# The maximum number of concurrent calls to `evaluate_test_case` allowed for this evaluator.
# Useful to avoid rate limiting from external services, such as an LLM provider.
max_concurrency = 5
async def get_score(self, output: str) -> float:
# Simulate doing work
await asyncio.sleep(random.random())
# Simulate a friendliness score, e.g. as determined by an LLM.
return random.random()
async def evaluate_test_case(self, test_case: BaseTestCase, output: str) -> Evaluation:
"""
This can also be an async function. This is useful if you are interacting
with an external service that requires async calls, such as OpenAI,
or if the evaluation you are performing could benefit from concurrency.
"""
score = await self.get_score(output)
return Evaluation(
score=score,
# Evaluations don't need thresholds attached to them.
# In this case, the evaluation will just consist of the score.
)