Source code for rhesis.sdk.synthesizers.multi_turn.base

from pathlib import Path
from typing import Any, Dict, List, Optional, Union

from jinja2 import Environment, FileSystemLoader, Template
from pydantic import BaseModel

from rhesis.sdk.entities.test import TestConfiguration
from rhesis.sdk.entities.test_set import TestSet
from rhesis.sdk.enums import TestType
from rhesis.sdk.models import get_model
from rhesis.sdk.models.base import BaseLLM
from rhesis.sdk.synthesizers.utils import create_test_set


[docs] class GenerationConfig(BaseModel): generation_prompt: str behaviors: Optional[list[str]] = None categories: Optional[list[str]] = None topics: Optional[list[str]] = None additional_context: Optional[str] = None
[docs] class Test(BaseModel): test_configuration: TestConfiguration behavior: str category: str topic: str
# Note: test_type is NOT included in the schema sent to the LLM # It will be added programmatically after generation
[docs] class Tests(BaseModel): tests: List[Test]
# Flat schema for LLM batch generation (easier for the model to produce). # Repacked to nested Test structure after generation.
[docs] class FlatTest(BaseModel): test_configuration_goal: str test_configuration_instructions: str test_configuration_restrictions: str test_configuration_scenario: str behavior: str category: str topic: str
[docs] class FlatTests(BaseModel): tests: List[FlatTest]
[docs] class MultiTurnSynthesizer: prompt_template_file: str = "base.jinja"
[docs] def __init__( self, config: GenerationConfig, model: Optional[Union[str, BaseLLM]] = None, batch_size: int = 10, ): self.config = config self.batch_size = batch_size if isinstance(model, str) or model is None: self.model = get_model(model) else: self.model = model
[docs] def load_prompt_template(self, prompt_template_file: str) -> "Template": """Load prompt template from assets or use custom prompt.""" templates_path = Path(__file__).parent / "templates" environment = Environment(loader=FileSystemLoader(templates_path)) template = environment.get_template(prompt_template_file) return template
def _flat_test_to_nested(self, flat: Dict[str, Any]) -> Dict[str, Any]: """Repack a flat test dict (LLM output) into the nested Test structure.""" return { "test_configuration": { "goal": flat["test_configuration_goal"], "instructions": flat["test_configuration_instructions"], "restrictions": flat["test_configuration_restrictions"], "scenario": flat["test_configuration_scenario"], }, "behavior": flat["behavior"], "category": flat["category"], "topic": flat["topic"], } def _generate_batch(self) -> List[dict]: """Generate a single batch of tests.""" prompt_template = self.load_prompt_template(self.prompt_template_file) template_context = { "num_tests": self.batch_size, **self.config.model_dump(), } prompt = prompt_template.render(template_context) # Use flat schema for LLM (easier to generate), then repack to nested response = self.model.generate(prompt, schema=FlatTests) flat_tests = response["tests"] batch_tests = [ { **self._flat_test_to_nested(flat), "test_type": TestType.MULTI_TURN.value, } for flat in flat_tests ] return batch_tests
[docs] def generate(self, num_tests: int = 5) -> TestSet: num_batches = num_tests // self.batch_size if num_batches == 0: num_batches = 1 self.batch_size = num_tests all_tests = [] for _ in range(num_batches): all_tests.extend(self._generate_batch()) test_set = create_test_set( tests=all_tests, model=self.model, synthesizer_name="MultiTurnSynthesizer", batch_size=self.batch_size, num_tests=len(all_tests), requested_tests=num_tests, generation_prompt=self.config.generation_prompt, ) test_set.test_set_type = TestType.MULTI_TURN if test_set.name: test_set.name = f"{test_set.name} (Multi-Turn)" return test_set