# AI Model Evaluation Configuration # Copy this file to .env and fill in your values # ============================================================================= # MODEL UNDER TEST (MUT) - The model being evaluated # ============================================================================= # OpenAI-compatible API endpoint for the model under test MUT_ENDPOINT=http://localhost:11434 # API key for the model under test (optional for local endpoints like Ollama) MUT_API_KEY= # Model name/identifier to test # Supports multiple models separated by commas for batch testing: # MUT_MODEL=qwen3:4b-q4_K_M,qwen3:4b-q8_0,qwen3:4b-fp16,qwen3:8b-q4_K_M # Or specify a single model: MUT_MODEL=qwen3:4b-q4_K_M # ============================================================================= # EVALUATOR API - Used for non-interactive mode to automatically score responses # ============================================================================= # OpenAI-compatible API endpoint for the evaluator model EVALUATOR_ENDPOINT=http://localhost:11434 # API key for the evaluator API EVALUATOR_API_KEY= # Evaluator model name (should be a capable model for evaluation tasks) EVALUATOR_MODEL=qwen3:14b # Temperature for evaluator (lower = more consistent scoring) EVALUATOR_TEMPERATURE=0.3 # ============================================================================= # TEST CONFIGURATION # ============================================================================= # Path to test suite YAML file TEST_SUITE=test_suite.yaml # Output directory for results OUTPUT_DIR=results # Filter tests by category (optional, leave empty for all categories) FILTER_CATEGORY= # ============================================================================= # EXECUTION MODE # ============================================================================= # Run in non-interactive mode (true/false) # When true, uses EVALUATOR_* settings for automated scoring # When false, prompts user for manual evaluation NON_INTERACTIVE=false