The schemas of your test case and output as they exist in your codebase often contain implementation details that are not relevant to a human reviewer.
Each SDK provides methods that allow you to transform your test cases and outputs into human-readable formats.
from dataclasses import dataclassfrom uuid import UUIDfrom autoblocks.testing.models import BaseTestCasefrom autoblocks.testing.models import HumanReviewFieldfrom autoblocks.testing.models import HumanReviewFieldContentTypefrom autoblocks.testing.util import md5@dataclassclassDocument: uuid: UUID # Not relevant for human review, so we don't include it below title:str content:str@dataclassclassMyCustomTestCase(BaseTestCase): user_question:str documents:list[Document]defhash(self)->str:return md5(self.user_question)defserialize_for_human_review(self)->list[HumanReviewField]:return[ HumanReviewField( name="Question", value=self.user_question, content_type=HumanReviewFieldContentType.TEXT,),]+[ HumanReviewField( name=f"Document {i +1}: {doc.title}", value=doc.content, content_type=HumanReviewFieldContentType.TEXT,)for i, doc inenumerate(self.documents)]@dataclassclassMyCustomOutput: answer:str reason:str# These fields are implementation details not needed# for human review, so they will be omitted below x:int y:int z:intdefserialize_for_human_review(self)->list[HumanReviewField]:return[ HumanReviewField( name="Answer", value=self.answer, content_type=HumanReviewFieldContentType.TEXT), HumanReviewField( name="Reason", value=self.reason, content_type=HumanReviewFieldContentType.TEXT,),]
There are four different content types you can use to control the rendering in the Autoblocks UI:
TEXT
HTML
MARKDOWN
LINK
This is often a good starting point when setting up a test suite for the first time.
Developers can run the test without any code-based evaluators and review the results manually
to understand the responses being generated by the LLM.
from dataclasses import dataclassfrom autoblocks.testing.evaluators import BaseHasAllSubstringsfrom autoblocks.testing.models import BaseTestCasefrom autoblocks.testing.models import CreateHumanReviewJobfrom autoblocks.testing.run import run_test_suitefrom autoblocks.testing.util import md5@dataclassclassTestCase(BaseTestCase):input:str expected_substrings:list[str]defhash(self)->str:return md5(self.input)# Unique identifier for a test caseclassHasAllSubstrings(BaseHasAllSubstrings[TestCase,str]):id="has-all-substrings"deftest_case_mapper(self, test_case: TestCase)->list[str]:return test_case.expected_substringsdefoutput_mapper(self, output:str)->str:return outputrun_test_suite(id="my-test-suite", test_cases=[ TestCase(input="hello world", expected_substrings=["hello","world"],)],# Replace with your test cases fn=lambda test_case: test_case.input,# Replace with your LLM call evaluators=[HasAllSubstrings()],# Replace with your evaluators human_review_job=CreateHumanReviewJob( assignee_email_address="example@example.com", name="Review for accuracy",))
from dataclasses import dataclassfrom autoblocks.testing.models import BaseTestCasefrom autoblocks.testing.models import HumanReviewFieldfrom autoblocks.testing.models import HumanReviewFieldContentTypefrom autoblocks.testing.run import RunManagerfrom autoblocks.testing.util import md5# Update with your test case type@dataclassclassTestCase(BaseTestCase):input:strdefserialize_for_human_review(self)->list[HumanReviewField]:return[ HumanReviewField( name="Input", value=self.input, content_type=HumanReviewFieldContentType.TEXT,),]defhash(self)->str:return md5(self.input)# Update with your output type@dataclassclassOutput: output:strdefserialize_for_human_review(self)->list[HumanReviewField]:return[ HumanReviewField( name="Output", value=self.output, content_type=HumanReviewFieldContentType.TEXT,),]run = RunManager[TestCase, Output]( test_id="test-id",)run.start()# Add results from your test suite hererun.add_result( test_case=TestCase(input="Hello, world!"), output=Output(output="Hi, world!"),)run.end()run.create_human_review_job( assignee_email_address="${emailAddress}", name="Review for accuracy",)