improvements
This commit is contained in:
52
.env.example
Normal file
52
.env.example
Normal file
@@ -0,0 +1,52 @@
|
||||
# AI Model Evaluation Configuration
|
||||
# Copy this file to .env and fill in your values
|
||||
|
||||
# =============================================================================
|
||||
# MODEL UNDER TEST (MUT) - The model being evaluated
|
||||
# =============================================================================
|
||||
# OpenAI-compatible API endpoint for the model under test
|
||||
MUT_ENDPOINT=http://localhost:11434
|
||||
|
||||
# API key for the model under test (optional for local endpoints like Ollama)
|
||||
MUT_API_KEY=
|
||||
|
||||
# Model name/identifier to test
|
||||
# Supports multiple models separated by commas for batch testing:
|
||||
# MUT_MODEL=qwen3:4b-q4_K_M,qwen3:4b-q8_0,qwen3:4b-fp16,qwen3:8b-q4_K_M
|
||||
# Or specify a single model:
|
||||
MUT_MODEL=qwen3:4b-q4_K_M
|
||||
|
||||
# =============================================================================
|
||||
# EVALUATOR API - Used for non-interactive mode to automatically score responses
|
||||
# =============================================================================
|
||||
# OpenAI-compatible API endpoint for the evaluator model
|
||||
EVALUATOR_ENDPOINT=http://localhost:11434
|
||||
|
||||
# API key for the evaluator API
|
||||
EVALUATOR_API_KEY=
|
||||
|
||||
# Evaluator model name (should be a capable model for evaluation tasks)
|
||||
EVALUATOR_MODEL=qwen3:14b
|
||||
|
||||
# Temperature for evaluator (lower = more consistent scoring)
|
||||
EVALUATOR_TEMPERATURE=0.3
|
||||
|
||||
# =============================================================================
|
||||
# TEST CONFIGURATION
|
||||
# =============================================================================
|
||||
# Path to test suite YAML file
|
||||
TEST_SUITE=test_suite.yaml
|
||||
|
||||
# Output directory for results
|
||||
OUTPUT_DIR=results
|
||||
|
||||
# Filter tests by category (optional, leave empty for all categories)
|
||||
FILTER_CATEGORY=
|
||||
|
||||
# =============================================================================
|
||||
# EXECUTION MODE
|
||||
# =============================================================================
|
||||
# Run in non-interactive mode (true/false)
|
||||
# When true, uses EVALUATOR_* settings for automated scoring
|
||||
# When false, prompts user for manual evaluation
|
||||
NON_INTERACTIVE=false
|
||||
Reference in New Issue
Block a user