fix embeddings truncation

This commit is contained in:
overcuriousity
2025-08-04 20:03:49 +02:00
parent 3a5e8e88b2
commit 7c3cc7ec9a
6 changed files with 121 additions and 460 deletions

View File

@@ -42,32 +42,35 @@ AI_EMBEDDINGS_MODEL=mistral-embed
# How many similar tools/concepts embeddings search returns as candidates
# 🔍 This is the FIRST filter - vector similarity matching
# Lower = faster, less comprehensive | Higher = slower, more comprehensive
AI_EMBEDDING_CANDIDATES=40
AI_EMBEDDING_CANDIDATES=50
# Minimum similarity score threshold (0.0-1.0)
# Lower = more results but less relevant | Higher = fewer but more relevant
AI_SIMILARITY_THRESHOLD=0.3
# === AI SELECTION FROM EMBEDDINGS ===
# When embeddings are enabled, how many top tools to send with full context
# 🎯 This is the SECOND filter - take best N from embeddings results
AI_EMBEDDING_SELECTION_LIMIT=30
AI_EMBEDDING_CONCEPTS_LIMIT=15
# === AI SELECTION STAGE ===
# Maximum tools the AI can select from embedding candidates
# 🤖 This is the SECOND filter - AI intelligent selection
# Should be ≤ AI_EMBEDDING_CANDIDATES
AI_MAX_SELECTED_ITEMS=25
# Maximum tools sent to AI for detailed analysis (micro-tasks)
# 📋 This is the FINAL context size sent to AI models
# Lower = less AI context, faster responses | Higher = more context, slower
AI_MAX_TOOLS_TO_ANALYZE=20
# === EMBEDDINGS EFFICIENCY THRESHOLDS ===
# Minimum tools required for embeddings to be considered useful
AI_EMBEDDINGS_MIN_TOOLS=8
# Maximum concepts sent to AI for background knowledge selection
# 📚 Concepts are smaller than tools, so can be higher
AI_MAX_CONCEPTS_TO_ANALYZE=10
# Maximum percentage of total tools that embeddings can return to be considered "filtering"
AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75
# === CONTEXT FLOW SUMMARY ===
# 1. Vector Search: 111 total tools → AI_EMBEDDING_CANDIDATES (40) most similar
# 2. AI Selection: 40 candidates → AI_MAX_SELECTED_ITEMS (25) best matches
# 3. AI Analysis: 25 selected → AI_MAX_TOOLS_TO_ANALYZE (20) for micro-tasks
# 4. Final Output: Recommendations based on analyzed subset
# 3. Final Output: Recommendations based on analyzed subset
# ============================================================================
# 4. AI PERFORMANCE & RATE LIMITING
@@ -107,12 +110,6 @@ AI_MAX_CONTEXT_TOKENS=3000
# Larger = more context per call | Smaller = faster responses
AI_MAX_PROMPT_TOKENS=1200
# Timeout for individual micro-tasks (milliseconds)
AI_MICRO_TASK_TIMEOUT_MS=25000
# Maximum size of the processing queue
AI_QUEUE_MAX_SIZE=50
# ============================================================================
# 6. AUTHENTICATION & AUTHORIZATION (OPTIONAL)
# ============================================================================
@@ -183,15 +180,6 @@ FORENSIC_AUDIT_RETENTION_HOURS=24
# Maximum audit entries per request
FORENSIC_AUDIT_MAX_ENTRIES=50
# Enable detailed AI pipeline logging
AI_PIPELINE_DEBUG=false
# Enable performance metrics collection
AI_PERFORMANCE_METRICS=false
# Enable detailed micro-task debugging
AI_MICRO_TASK_DEBUG=false
# ============================================================================
# 10. QUALITY CONTROL & BIAS DETECTION (ADVANCED)
# ============================================================================
@@ -207,37 +195,6 @@ CONFIDENCE_MINIMUM_THRESHOLD=40
CONFIDENCE_MEDIUM_THRESHOLD=60
CONFIDENCE_HIGH_THRESHOLD=80
# Bias detection settings
BIAS_DETECTION_ENABLED=false
BIAS_POPULARITY_THRESHOLD=0.7
BIAS_DIVERSITY_MINIMUM=0.6
BIAS_CELEBRITY_TOOLS=""
# Quality control thresholds
QUALITY_MIN_RESPONSE_LENGTH=50
QUALITY_MIN_SELECTION_COUNT=1
QUALITY_MAX_PROCESSING_TIME_MS=30000
# ============================================================================
# 11. USER INTERFACE DEFAULTS (OPTIONAL)
# ============================================================================
# Default UI behavior (users can override)
UI_SHOW_AUDIT_TRAIL_DEFAULT=false
UI_SHOW_CONFIDENCE_SCORES=true
UI_SHOW_BIAS_WARNINGS=true
UI_AUDIT_TRAIL_COLLAPSIBLE=true
# ============================================================================
# 12. CACHING & PERFORMANCE (OPTIONAL)
# ============================================================================
# Cache AI responses (milliseconds)
AI_RESPONSE_CACHE_TTL_MS=3600000
# Queue cleanup interval (milliseconds)
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
# ============================================================================
# PERFORMANCE TUNING PRESETS
# ============================================================================
@@ -245,21 +202,18 @@ AI_QUEUE_CLEANUP_INTERVAL_MS=300000
# 🚀 FOR FASTER RESPONSES (less comprehensive):
# AI_EMBEDDING_CANDIDATES=20
# AI_MAX_SELECTED_ITEMS=15
# AI_MAX_TOOLS_TO_ANALYZE=10
# AI_MICRO_TASK_DELAY_MS=200
# AI_MAX_CONTEXT_TOKENS=2000
# 🎯 FOR BETTER QUALITY (more comprehensive):
# AI_EMBEDDING_CANDIDATES=60
# AI_MAX_SELECTED_ITEMS=40
# AI_MAX_TOOLS_TO_ANALYZE=30
# AI_MICRO_TASK_DELAY_MS=800
# AI_MAX_CONTEXT_TOKENS=4000
# 🔋 FOR LOW-POWER SYSTEMS (minimal resources):
# AI_EMBEDDING_CANDIDATES=15
# AI_MAX_SELECTED_ITEMS=10
# AI_MAX_TOOLS_TO_ANALYZE=8
# AI_RATE_LIMIT_MAX_REQUESTS=2
# AI_MICRO_TASK_DELAY_MS=1000
@@ -285,7 +239,6 @@ AI_QUEUE_CLEANUP_INTERVAL_MS=300000
# 🔍 WITH FULL MONITORING:
# - Enable FORENSIC_AUDIT_ENABLED=true
# - Enable AI_PIPELINE_DEBUG=true
# - Configure audit retention and detail level
# ============================================================================