fix embeddings truncation

2025-08-04 20:03:49 +02:00
parent 3a5e8e88b2
commit 7c3cc7ec9a
6 changed files with 121 additions and 460 deletions
--- a/.env.example
+++ b/.env.example
@@ -42,32 +42,35 @@ AI_EMBEDDINGS_MODEL=mistral-embed
 # How many similar tools/concepts embeddings search returns as candidates
 # 🔍 This is the FIRST filter - vector similarity matching
 # Lower = faster, less comprehensive | Higher = slower, more comprehensive
-AI_EMBEDDING_CANDIDATES=40
+AI_EMBEDDING_CANDIDATES=50

 # Minimum similarity score threshold (0.0-1.0)
 # Lower = more results but less relevant | Higher = fewer but more relevant
 AI_SIMILARITY_THRESHOLD=0.3

+# === AI SELECTION FROM EMBEDDINGS ===
+# When embeddings are enabled, how many top tools to send with full context
+# 🎯 This is the SECOND filter - take best N from embeddings results
+AI_EMBEDDING_SELECTION_LIMIT=30
+AI_EMBEDDING_CONCEPTS_LIMIT=15
+
 # === AI SELECTION STAGE ===
 # Maximum tools the AI can select from embedding candidates
 # 🤖 This is the SECOND filter - AI intelligent selection
 # Should be ≤ AI_EMBEDDING_CANDIDATES
 AI_MAX_SELECTED_ITEMS=25

-# Maximum tools sent to AI for detailed analysis (micro-tasks)
-# 📋 This is the FINAL context size sent to AI models
-# Lower = less AI context, faster responses | Higher = more context, slower
-AI_MAX_TOOLS_TO_ANALYZE=20
+# === EMBEDDINGS EFFICIENCY THRESHOLDS ===
+# Minimum tools required for embeddings to be considered useful
+AI_EMBEDDINGS_MIN_TOOLS=8

-# Maximum concepts sent to AI for background knowledge selection
-# 📚 Concepts are smaller than tools, so can be higher
-AI_MAX_CONCEPTS_TO_ANALYZE=10
+# Maximum percentage of total tools that embeddings can return to be considered "filtering"
+AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75

 # === CONTEXT FLOW SUMMARY ===
 # 1. Vector Search: 111 total tools → AI_EMBEDDING_CANDIDATES (40) most similar
 # 2. AI Selection: 40 candidates → AI_MAX_SELECTED_ITEMS (25) best matches  
-# 3. AI Analysis: 25 selected → AI_MAX_TOOLS_TO_ANALYZE (20) for micro-tasks
-# 4. Final Output: Recommendations based on analyzed subset
+# 3. Final Output: Recommendations based on analyzed subset

 # ============================================================================
 # 4. AI PERFORMANCE & RATE LIMITING
@@ -107,12 +110,6 @@ AI_MAX_CONTEXT_TOKENS=3000
 # Larger = more context per call | Smaller = faster responses
 AI_MAX_PROMPT_TOKENS=1200

-# Timeout for individual micro-tasks (milliseconds)
-AI_MICRO_TASK_TIMEOUT_MS=25000
-
-# Maximum size of the processing queue
-AI_QUEUE_MAX_SIZE=50
-
 # ============================================================================
 # 6. AUTHENTICATION & AUTHORIZATION (OPTIONAL)
 # ============================================================================
@@ -183,15 +180,6 @@ FORENSIC_AUDIT_RETENTION_HOURS=24
 # Maximum audit entries per request
 FORENSIC_AUDIT_MAX_ENTRIES=50

-# Enable detailed AI pipeline logging
-AI_PIPELINE_DEBUG=false
-
-# Enable performance metrics collection
-AI_PERFORMANCE_METRICS=false
-
-# Enable detailed micro-task debugging
-AI_MICRO_TASK_DEBUG=false
-
 # ============================================================================
 # 10. QUALITY CONTROL & BIAS DETECTION (ADVANCED)
 # ============================================================================
@@ -207,37 +195,6 @@ CONFIDENCE_MINIMUM_THRESHOLD=40
 CONFIDENCE_MEDIUM_THRESHOLD=60
 CONFIDENCE_HIGH_THRESHOLD=80

-# Bias detection settings
-BIAS_DETECTION_ENABLED=false
-BIAS_POPULARITY_THRESHOLD=0.7
-BIAS_DIVERSITY_MINIMUM=0.6
-BIAS_CELEBRITY_TOOLS=""
-
-# Quality control thresholds
-QUALITY_MIN_RESPONSE_LENGTH=50
-QUALITY_MIN_SELECTION_COUNT=1
-QUALITY_MAX_PROCESSING_TIME_MS=30000
-
-# ============================================================================
-# 11. USER INTERFACE DEFAULTS (OPTIONAL)
-# ============================================================================
-
-# Default UI behavior (users can override)
-UI_SHOW_AUDIT_TRAIL_DEFAULT=false
-UI_SHOW_CONFIDENCE_SCORES=true
-UI_SHOW_BIAS_WARNINGS=true
-UI_AUDIT_TRAIL_COLLAPSIBLE=true
-
-# ============================================================================
-# 12. CACHING & PERFORMANCE (OPTIONAL)
-# ============================================================================
-
-# Cache AI responses (milliseconds)
-AI_RESPONSE_CACHE_TTL_MS=3600000
-
-# Queue cleanup interval (milliseconds)
-AI_QUEUE_CLEANUP_INTERVAL_MS=300000
-
 # ============================================================================
 # PERFORMANCE TUNING PRESETS
 # ============================================================================
@@ -245,21 +202,18 @@ AI_QUEUE_CLEANUP_INTERVAL_MS=300000
 # 🚀 FOR FASTER RESPONSES (less comprehensive):
 # AI_EMBEDDING_CANDIDATES=20
 # AI_MAX_SELECTED_ITEMS=15  
-# AI_MAX_TOOLS_TO_ANALYZE=10
 # AI_MICRO_TASK_DELAY_MS=200
 # AI_MAX_CONTEXT_TOKENS=2000

 # 🎯 FOR BETTER QUALITY (more comprehensive):
 # AI_EMBEDDING_CANDIDATES=60
 # AI_MAX_SELECTED_ITEMS=40
-# AI_MAX_TOOLS_TO_ANALYZE=30
 # AI_MICRO_TASK_DELAY_MS=800
 # AI_MAX_CONTEXT_TOKENS=4000

 # 🔋 FOR LOW-POWER SYSTEMS (minimal resources):
 # AI_EMBEDDING_CANDIDATES=15
 # AI_MAX_SELECTED_ITEMS=10
-# AI_MAX_TOOLS_TO_ANALYZE=8
 # AI_RATE_LIMIT_MAX_REQUESTS=2
 # AI_MICRO_TASK_DELAY_MS=1000

@@ -285,7 +239,6 @@ AI_QUEUE_CLEANUP_INTERVAL_MS=300000

 # 🔍 WITH FULL MONITORING:
 # - Enable FORENSIC_AUDIT_ENABLED=true
-# - Enable AI_PIPELINE_DEBUG=true
 # - Configure audit retention and detail level

 # ============================================================================