#!/bin/bash # Batch Test Script for AI Model Evaluation # Tests multiple models and generates comparison report # Configuration ENDPOINT="${ENDPOINT:-http://localhost:11434}" API_KEY="${API_KEY:-}" # Color output GREEN='\033[0;32m' BLUE='\033[0;34m' YELLOW='\033[1;33m' NC='\033[0m' # No Color echo -e "${BLUE}========================================${NC}" echo -e "${BLUE}AI Model Batch Testing${NC}" echo -e "${BLUE}========================================${NC}" echo "" echo "Endpoint: $ENDPOINT" echo "API Key: ${API_KEY:0:10}${API_KEY:+...}" echo "" # Function to run test run_test() { local model=$1 echo -e "${GREEN}Testing: $model${NC}" if [ -z "$API_KEY" ]; then python ai_eval.py --endpoint "$ENDPOINT" --model "$model" else python ai_eval.py --endpoint "$ENDPOINT" --api-key "$API_KEY" --model "$model" fi if [ $? -eq 0 ]; then echo -e "${GREEN}✓ Completed: $model${NC}" else echo -e "${YELLOW}⚠ Failed or interrupted: $model${NC}" fi echo "" } # Test qwen3:4b models with different quantizations echo -e "${BLUE}=== Testing qwen3:4b with different quantizations ===${NC}" echo "" models_4b=( "qwen3:4b-q4_K_M" "qwen3:4b-q8_0" "qwen3:4b-fp16" ) for model in "${models_4b[@]}"; do run_test "$model" done # Test different model sizes with q4_K_M quantization echo -e "${BLUE}=== Testing different model sizes (q4_K_M) ===${NC}" echo "" models_sizes=( "qwen3:4b-q4_K_M" "qwen3:8b-q4_K_M" "qwen3:14b-q4_K_M" ) for model in "${models_sizes[@]}"; do run_test "$model" done # Generate comparison report echo -e "${BLUE}========================================${NC}" echo -e "${BLUE}Generating Comparison Report${NC}" echo -e "${BLUE}========================================${NC}" echo "" python analyze_results.py --compare python analyze_results.py --export batch_comparison.csv echo "" echo -e "${GREEN}========================================${NC}" echo -e "${GREEN}Batch Testing Complete!${NC}" echo -e "${GREEN}========================================${NC}" echo "" echo "Results saved in ./results/" echo "Comparison CSV: ./results/batch_comparison.csv"