improvements

This commit is contained in:
overcuriousity
2025-08-01 13:59:06 +02:00
parent 8c9bdf0710
commit 1b9d9b437b
2 changed files with 176 additions and 121 deletions

View File

@@ -13,68 +13,60 @@ OIDC_ENDPOINT=https://your-oidc-provider.com
OIDC_CLIENT_ID=your-client-id
OIDC_CLIENT_SECRET=your-client-secret
# === AI Configuration ===
# ===================================================================
# AI CONFIGURATION - Complete Reference for Improved Pipeline
# ===================================================================
# Selector AI (for selection stage, choode a good model)
AI_SELECTOR_ENDPOINT=https://llm.mikoshi.de
AI_SELECTOR_API_KEY=sk-DzREDACTEDHA
AI_SELECTOR_MODEL=mistral/mistral-medium-latest
# === CORE AI ENDPOINTS & MODELS ===
AI_API_ENDPOINT=https://llm.mikoshi.de
AI_API_KEY=sREDACTED3w
AI_MODEL='mistral/mistral-small-latest'
# Analyzer AI (for analysis stage, choose a smaller model)
# === IMPROVED PIPELINE: Use separate analyzer model (mistral-small is fine) ===
AI_ANALYZER_ENDPOINT=https://llm.mikoshi.de
AI_ANALYZER_API_KEY=sk-DzREDACTEDnHA
AI_ANALYZER_MODEL=mistral/mistral-small-latest
AI_ANALYZER_API_KEY=skREDACTEDw3w
AI_ANALYZER_MODEL='mistral/mistral-small-latest'
# === Embeddings Configuration ===
# Enable/disable semantic embeddings pre-selection
# === EMBEDDINGS CONFIGURATION ===
AI_EMBEDDINGS_ENABLED=true
# Embeddings API (Mistral recommended)
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
AI_EMBEDDINGS_API_KEY=ZSpREDACTED3wL
AI_EMBEDDINGS_API_KEY=ZREDACTED3wL
AI_EMBEDDINGS_MODEL=mistral-embed
# Embeddings performance settings
AI_EMBEDDINGS_BATCH_SIZE=20
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
AI_EMBEDDING_CANDIDATES=30
AI_SIMILARITY_THRESHOLD=0.3
# Delay between micro-tasks to respect rate limits (milliseconds)
AI_MICRO_TASK_DELAY_MS=500
# === PIPELINE: VectorIndex (HNSW) Configuration ===
AI_MAX_SELECTED_ITEMS=60 # Tools visible to each micro-task
AI_EMBEDDING_CANDIDATES=60 # VectorIndex candidates (HNSW is more efficient)
AI_SIMILARITY_THRESHOLD=0.3 # Not used by VectorIndex (uses cosine distance internally)
# Micro-task specific rate limiting (requests per minute per user)
AI_MICRO_TASK_RATE_LIMIT=30
# === MICRO-TASK CONFIGURATION ===
AI_MICRO_TASK_DELAY_MS=500 # Delay between micro-tasks
AI_MICRO_TASK_TIMEOUT_MS=25000 # Timeout per micro-task (increased for full context)
# Maximum parallel micro-tasks (for future parallel processing)
AI_MAX_PARALLEL_TASKS=3
# === RATE LIMITING ===
AI_RATE_LIMIT_DELAY_MS=3000 # Main rate limit delay
AI_RATE_LIMIT_MAX_REQUESTS=6 # Main requests per minute (reduced - fewer but richer calls)
AI_MICRO_TASK_RATE_LIMIT=15 # Micro-task requests per minute (was 30)
# Micro-task timeout settings (milliseconds)
AI_MICRO_TASK_TIMEOUT_MS=15000
# ENHANCED: Rate Limiting Configuration
# Main query rate limiting (reduced due to micro-tasks)
AI_RATE_LIMIT_DELAY_MS=3000
AI_RATE_LIMIT_MAX_REQUESTS=8
# Smart prompting rate limiting
AI_SMART_PROMPTING_RATE_LIMIT=5
AI_SMART_PROMPTING_WINDOW_MS=60000
# Queue management settings
# === QUEUE MANAGEMENT ===
AI_QUEUE_MAX_SIZE=50
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
# === Performance & Monitoring ===
# Enable detailed micro-task logging
AI_MICRO_TASK_DEBUG=false
# Enable performance metrics collection
# === PERFORMANCE & MONITORING ===
AI_MICRO_TASK_DEBUG=true
AI_PERFORMANCE_METRICS=true
# Cache settings for AI responses
AI_RESPONSE_CACHE_TTL_MS=3600000
# ===================================================================
# LEGACY VARIABLES (still used but less important)
# ===================================================================
# These are still used by other parts of the system:
AI_RESPONSE_CACHE_TTL_MS=3600000 # For caching responses
AI_QUEUE_MAX_SIZE=50 # Queue management
AI_QUEUE_CLEANUP_INTERVAL_MS=300000 # Queue cleanup
# === Application Configuration ===
PUBLIC_BASE_URL=http://localhost:4321
NODE_ENV=development