From 7c3cc7ec9a29b6dcda6e763702aacd17f881ef92 Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Mon, 4 Aug 2025 20:03:49 +0200
Subject: [PATCH 1/4] fix embeddings truncation

---
 .env.example                  |  73 +++---------
 src/config/forensic.config.ts | 126 --------------------
 src/utils/aiPipeline.ts       | 209 +++++++++++++++++-----------------
 src/utils/dataService.ts      | 138 +---------------------
 src/utils/rateLimitedQueue.ts |   9 --
 src/utils/toolHelpers.ts      |  26 -----
 6 files changed, 121 insertions(+), 460 deletions(-)
 delete mode 100644 src/config/forensic.config.ts

diff --git a/.env.example b/.env.example
index b127600..8b45de2 100644
--- a/.env.example
+++ b/.env.example
@@ -42,32 +42,35 @@ AI_EMBEDDINGS_MODEL=mistral-embed
 # How many similar tools/concepts embeddings search returns as candidates
 # 🔍 This is the FIRST filter - vector similarity matching
 # Lower = faster, less comprehensive | Higher = slower, more comprehensive
-AI_EMBEDDING_CANDIDATES=40
+AI_EMBEDDING_CANDIDATES=50
 
 # Minimum similarity score threshold (0.0-1.0)
 # Lower = more results but less relevant | Higher = fewer but more relevant
 AI_SIMILARITY_THRESHOLD=0.3
 
+# === AI SELECTION FROM EMBEDDINGS ===
+# When embeddings are enabled, how many top tools to send with full context
+# 🎯 This is the SECOND filter - take best N from embeddings results
+AI_EMBEDDING_SELECTION_LIMIT=30
+AI_EMBEDDING_CONCEPTS_LIMIT=15
+
 # === AI SELECTION STAGE ===
 # Maximum tools the AI can select from embedding candidates
 # 🤖 This is the SECOND filter - AI intelligent selection
 # Should be ≤ AI_EMBEDDING_CANDIDATES
 AI_MAX_SELECTED_ITEMS=25
 
-# Maximum tools sent to AI for detailed analysis (micro-tasks)
-# 📋 This is the FINAL context size sent to AI models
-# Lower = less AI context, faster responses | Higher = more context, slower
-AI_MAX_TOOLS_TO_ANALYZE=20
+# === EMBEDDINGS EFFICIENCY THRESHOLDS ===
+# Minimum tools required for embeddings to be considered useful
+AI_EMBEDDINGS_MIN_TOOLS=8
 
-# Maximum concepts sent to AI for background knowledge selection
-# 📚 Concepts are smaller than tools, so can be higher
-AI_MAX_CONCEPTS_TO_ANALYZE=10
+# Maximum percentage of total tools that embeddings can return to be considered "filtering"
+AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75
 
 # === CONTEXT FLOW SUMMARY ===
 # 1. Vector Search: 111 total tools → AI_EMBEDDING_CANDIDATES (40) most similar
 # 2. AI Selection: 40 candidates → AI_MAX_SELECTED_ITEMS (25) best matches  
-# 3. AI Analysis: 25 selected → AI_MAX_TOOLS_TO_ANALYZE (20) for micro-tasks
-# 4. Final Output: Recommendations based on analyzed subset
+# 3. Final Output: Recommendations based on analyzed subset
 
 # ============================================================================
 # 4. AI PERFORMANCE & RATE LIMITING
@@ -107,12 +110,6 @@ AI_MAX_CONTEXT_TOKENS=3000
 # Larger = more context per call | Smaller = faster responses
 AI_MAX_PROMPT_TOKENS=1200
 
-# Timeout for individual micro-tasks (milliseconds)
-AI_MICRO_TASK_TIMEOUT_MS=25000
-
-# Maximum size of the processing queue
-AI_QUEUE_MAX_SIZE=50
-
 # ============================================================================
 # 6. AUTHENTICATION & AUTHORIZATION (OPTIONAL)
 # ============================================================================
@@ -183,15 +180,6 @@ FORENSIC_AUDIT_RETENTION_HOURS=24
 # Maximum audit entries per request
 FORENSIC_AUDIT_MAX_ENTRIES=50
 
-# Enable detailed AI pipeline logging
-AI_PIPELINE_DEBUG=false
-
-# Enable performance metrics collection
-AI_PERFORMANCE_METRICS=false
-
-# Enable detailed micro-task debugging
-AI_MICRO_TASK_DEBUG=false
-
 # ============================================================================
 # 10. QUALITY CONTROL & BIAS DETECTION (ADVANCED)
 # ============================================================================
@@ -207,37 +195,6 @@ CONFIDENCE_MINIMUM_THRESHOLD=40
 CONFIDENCE_MEDIUM_THRESHOLD=60
 CONFIDENCE_HIGH_THRESHOLD=80
 
-# Bias detection settings
-BIAS_DETECTION_ENABLED=false
-BIAS_POPULARITY_THRESHOLD=0.7
-BIAS_DIVERSITY_MINIMUM=0.6
-BIAS_CELEBRITY_TOOLS=""
-
-# Quality control thresholds
-QUALITY_MIN_RESPONSE_LENGTH=50
-QUALITY_MIN_SELECTION_COUNT=1
-QUALITY_MAX_PROCESSING_TIME_MS=30000
-
-# ============================================================================
-# 11. USER INTERFACE DEFAULTS (OPTIONAL)
-# ============================================================================
-
-# Default UI behavior (users can override)
-UI_SHOW_AUDIT_TRAIL_DEFAULT=false
-UI_SHOW_CONFIDENCE_SCORES=true
-UI_SHOW_BIAS_WARNINGS=true
-UI_AUDIT_TRAIL_COLLAPSIBLE=true
-
-# ============================================================================
-# 12. CACHING & PERFORMANCE (OPTIONAL)
-# ============================================================================
-
-# Cache AI responses (milliseconds)
-AI_RESPONSE_CACHE_TTL_MS=3600000
-
-# Queue cleanup interval (milliseconds)
-AI_QUEUE_CLEANUP_INTERVAL_MS=300000
-
 # ============================================================================
 # PERFORMANCE TUNING PRESETS
 # ============================================================================
@@ -245,21 +202,18 @@ AI_QUEUE_CLEANUP_INTERVAL_MS=300000
 # 🚀 FOR FASTER RESPONSES (less comprehensive):
 # AI_EMBEDDING_CANDIDATES=20
 # AI_MAX_SELECTED_ITEMS=15  
-# AI_MAX_TOOLS_TO_ANALYZE=10
 # AI_MICRO_TASK_DELAY_MS=200
 # AI_MAX_CONTEXT_TOKENS=2000
 
 # 🎯 FOR BETTER QUALITY (more comprehensive):
 # AI_EMBEDDING_CANDIDATES=60
 # AI_MAX_SELECTED_ITEMS=40
-# AI_MAX_TOOLS_TO_ANALYZE=30
 # AI_MICRO_TASK_DELAY_MS=800
 # AI_MAX_CONTEXT_TOKENS=4000
 
 # 🔋 FOR LOW-POWER SYSTEMS (minimal resources):
 # AI_EMBEDDING_CANDIDATES=15
 # AI_MAX_SELECTED_ITEMS=10
-# AI_MAX_TOOLS_TO_ANALYZE=8
 # AI_RATE_LIMIT_MAX_REQUESTS=2
 # AI_MICRO_TASK_DELAY_MS=1000
 
@@ -285,7 +239,6 @@ AI_QUEUE_CLEANUP_INTERVAL_MS=300000
 
 # 🔍 WITH FULL MONITORING:
 # - Enable FORENSIC_AUDIT_ENABLED=true
-# - Enable AI_PIPELINE_DEBUG=true
 # - Configure audit retention and detail level
 
 # ============================================================================
diff --git a/src/config/forensic.config.ts b/src/config/forensic.config.ts
deleted file mode 100644
index 5723854..0000000
--- a/src/config/forensic.config.ts
+++ /dev/null
@@ -1,126 +0,0 @@
-// src/config/forensic.config.ts
-// Centralized configuration for forensic RAG enhancements
-
-export const FORENSIC_CONFIG = {
-  audit: {
-    enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
-    detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as 'minimal' | 'standard' | 'verbose') || 'standard',
-    retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10),
-    maxEntriesPerRequest: parseInt(process.env.FORENSIC_AUDIT_MAX_ENTRIES || '50', 10)
-  },
-  confidence: {
-    embeddingsWeight: parseFloat(process.env.CONFIDENCE_EMBEDDINGS_WEIGHT || '0.3'),
-    consensusWeight: parseFloat(process.env.CONFIDENCE_CONSENSUS_WEIGHT || '0.25'),
-    domainMatchWeight: parseFloat(process.env.CONFIDENCE_DOMAIN_MATCH_WEIGHT || '0.25'),
-    freshnessWeight: parseFloat(process.env.CONFIDENCE_FRESHNESS_WEIGHT || '0.2'),
-    minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
-    highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10),
-    mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10)
-  },
-  bias: {
-    enabled: process.env.BIAS_DETECTION_ENABLED === 'true',
-    popularityThreshold: parseFloat(process.env.BIAS_POPULARITY_THRESHOLD || '0.7'),
-    diversityMinimum: parseFloat(process.env.BIAS_DIVERSITY_MINIMUM || '0.6'),
-    domainMismatchThreshold: parseFloat(process.env.BIAS_DOMAIN_MISMATCH_THRESHOLD || '0.3'),
-    warningThreshold: parseInt(process.env.BIAS_WARNING_THRESHOLD || '3', 10),
-    celebrityTools: (process.env.BIAS_CELEBRITY_TOOLS || 'Volatility 3,Wireshark,Autopsy,Maltego').split(',').map(t => t.trim())
-  },
-  // Quality thresholds for various metrics
-  quality: {
-    minResponseLength: parseInt(process.env.QUALITY_MIN_RESPONSE_LENGTH || '50', 10),
-    minSelectionCount: parseInt(process.env.QUALITY_MIN_SELECTION_COUNT || '1', 10),
-    maxProcessingTime: parseInt(process.env.QUALITY_MAX_PROCESSING_TIME_MS || '30000', 10)
-  },
-  // Display preferences
-  ui: {
-    showAuditTrailByDefault: process.env.UI_SHOW_AUDIT_TRAIL_DEFAULT === 'true',
-    showConfidenceScores: process.env.UI_SHOW_CONFIDENCE_SCORES !== 'false',
-    showBiasWarnings: process.env.UI_SHOW_BIAS_WARNINGS !== 'false',
-    auditTrailCollapsible: process.env.UI_AUDIT_TRAIL_COLLAPSIBLE !== 'false'
-  }
-};
-
-// Validation function to ensure configuration is valid
-export function validateForensicConfig(): { valid: boolean; errors: string[] } {
-  const errors: string[] = [];
-  
-  // Validate audit configuration
-  if (FORENSIC_CONFIG.audit.retentionHours < 1 || FORENSIC_CONFIG.audit.retentionHours > 168) {
-    errors.push('FORENSIC_AUDIT_RETENTION_HOURS must be between 1 and 168 (1 week)');
-  }
-  
-  if (!['minimal', 'standard', 'verbose'].includes(FORENSIC_CONFIG.audit.detailLevel)) {
-    errors.push('FORENSIC_AUDIT_DETAIL_LEVEL must be one of: minimal, standard, verbose');
-  }
-  
-  // Validate confidence weights sum to approximately 1.0
-  const weightSum = FORENSIC_CONFIG.confidence.embeddingsWeight + 
-                    FORENSIC_CONFIG.confidence.consensusWeight + 
-                    FORENSIC_CONFIG.confidence.domainMatchWeight + 
-                    FORENSIC_CONFIG.confidence.freshnessWeight;
-  
-  if (Math.abs(weightSum - 1.0) > 0.05) {
-    errors.push(`Confidence weights must sum to 1.0 (currently ${weightSum.toFixed(3)})`);
-  }
-  
-  // Validate threshold ranges
-  if (FORENSIC_CONFIG.confidence.minimumThreshold < 0 || FORENSIC_CONFIG.confidence.minimumThreshold > 100) {
-    errors.push('CONFIDENCE_MINIMUM_THRESHOLD must be between 0 and 100');
-  }
-  
-  if (FORENSIC_CONFIG.confidence.highThreshold <= FORENSIC_CONFIG.confidence.mediumThreshold) {
-    errors.push('CONFIDENCE_HIGH_THRESHOLD must be greater than CONFIDENCE_MEDIUM_THRESHOLD');
-  }
-  
-  // Validate bias thresholds
-  if (FORENSIC_CONFIG.bias.popularityThreshold < 0 || FORENSIC_CONFIG.bias.popularityThreshold > 1) {
-    errors.push('BIAS_POPULARITY_THRESHOLD must be between 0 and 1');
-  }
-  
-  if (FORENSIC_CONFIG.bias.diversityMinimum < 0 || FORENSIC_CONFIG.bias.diversityMinimum > 1) {
-    errors.push('BIAS_DIVERSITY_MINIMUM must be between 0 and 1');
-  }
-  
-  return {
-    valid: errors.length === 0,
-    errors
-  };
-}
-
-// Helper functions for configuration access
-export function isAuditEnabled(): boolean {
-  return FORENSIC_CONFIG.audit.enabled;
-}
-
-export function getAuditDetailLevel(): 'minimal' | 'standard' | 'verbose' {
-  return FORENSIC_CONFIG.audit.detailLevel;
-}
-
-export function getConfidenceThresholds() {
-  return {
-    minimum: FORENSIC_CONFIG.confidence.minimumThreshold,
-    medium: FORENSIC_CONFIG.confidence.mediumThreshold,
-    high: FORENSIC_CONFIG.confidence.highThreshold
-  };
-}
-
-export function isBiasDetectionEnabled(): boolean {
-  return FORENSIC_CONFIG.bias.enabled;
-}
-
-// Initialize and validate configuration on module load
-const configValidation = validateForensicConfig();
-if (!configValidation.valid) {
-  console.warn('[FORENSIC CONFIG] Configuration validation failed:', configValidation.errors);
-  // In development, we might want to throw an error
-  if (process.env.NODE_ENV === 'development') {
-    throw new Error(`Forensic configuration invalid: ${configValidation.errors.join(', ')}`);
-  }
-}
-
-console.log('[FORENSIC CONFIG] Configuration loaded:', {
-  auditEnabled: FORENSIC_CONFIG.audit.enabled,
-  confidenceEnabled: true, // Always enabled
-  biasDetectionEnabled: FORENSIC_CONFIG.bias.enabled,
-  detailLevel: FORENSIC_CONFIG.audit.detailLevel
-});
\ No newline at end of file
diff --git a/src/utils/aiPipeline.ts b/src/utils/aiPipeline.ts
index 003523d..761ea4b 100644
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -78,17 +78,25 @@ class ImprovedMicroTaskAIPipeline {
   private similarityThreshold: number;
   private microTaskDelay: number;
   
+  // NEW: Embedding selection limits (top N from pre-filtered candidates)
+  private embeddingSelectionLimit: number;
+  private embeddingConceptsLimit: number;
+  
+  // NEW: Embeddings efficiency thresholds
+  private embeddingsMinTools: number;
+  private embeddingsMaxReductionRatio: number;
+  
   private maxContextTokens: number;
   private maxPromptTokens: number;
   
-  // NEW: Audit Configuration
+  // Audit Configuration
   private auditConfig: {
     enabled: boolean;
     detailLevel: 'minimal' | 'standard' | 'verbose';
     retentionHours: number;
   };
   
-  // NEW: Temporary audit storage for pre-context operations
+  // Temporary audit storage for pre-context operations
   private tempAuditEntries: AuditEntry[] = [];
 
   constructor() {
@@ -98,20 +106,38 @@ class ImprovedMicroTaskAIPipeline {
       model: this.getEnv('AI_ANALYZER_MODEL')
     };
 
-    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
-    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '60', 10); 
-    this.similarityThreshold = 0.3; 
+    // Core pipeline configuration
+    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '25', 10);
+    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '50', 10); 
+    this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
     this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
     
+    // NEW: Embedding selection limits (top N from pre-filtered candidates)
+    this.embeddingSelectionLimit = parseInt(process.env.AI_EMBEDDING_SELECTION_LIMIT || '30', 10);
+    this.embeddingConceptsLimit = parseInt(process.env.AI_EMBEDDING_CONCEPTS_LIMIT || '15', 10);
+    
+    // NEW: Embeddings efficiency thresholds
+    this.embeddingsMinTools = parseInt(process.env.AI_EMBEDDINGS_MIN_TOOLS || '8', 10);
+    this.embeddingsMaxReductionRatio = parseFloat(process.env.AI_EMBEDDINGS_MAX_REDUCTION_RATIO || '0.75');
+    
+    // Context management
     this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
     this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
     
-    // NEW: Initialize Audit Configuration
+    // Audit configuration
     this.auditConfig = {
       enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
       detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as any) || 'standard',
       retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
     };
+    
+    // Log configuration for debugging
+    console.log('[AI PIPELINE] Configuration loaded:', {
+      embeddingCandidates: this.embeddingCandidates,
+      embeddingSelection: `${this.embeddingSelectionLimit} tools, ${this.embeddingConceptsLimit} concepts`,
+      embeddingsThresholds: `min ${this.embeddingsMinTools} tools, max ${this.embeddingsMaxReductionRatio * 100}% of total`,
+      auditEnabled: this.auditConfig.enabled
+    });
   }
 
   private getEnv(key: string): string {
@@ -272,50 +298,49 @@ class ImprovedMicroTaskAIPipeline {
         userQuery, 
         this.embeddingCandidates, 
         this.similarityThreshold
-      ) as SimilarityResult[]; // Type assertion for similarity property
+      ) as SimilarityResult[];
       
-      console.log(`[IMPROVED PIPELINE] Embeddings found ${similarItems.length} similar items`);
+      console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
       
-      // FIXED: Create lookup maps for O(1) access while preserving original data
+      // Create lookup maps for O(1) access while preserving original data
       const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
       const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
       
-      // FIXED: Process in similarity order, preserving the ranking
+      // Process in similarity order, preserving the ranking
       const similarTools = similarItems
         .filter((item): item is SimilarityResult => item.type === 'tool')
         .map(item => toolsMap.get(item.name))
-        .filter((tool): tool is any => tool !== undefined); // Proper type guard
+        .filter((tool): tool is any => tool !== undefined);
       
       const similarConcepts = similarItems
         .filter((item): item is SimilarityResult => item.type === 'concept')
         .map(item => conceptsMap.get(item.name))
-        .filter((concept): concept is any => concept !== undefined); // Proper type guard
+        .filter((concept): concept is any => concept !== undefined);
       
-      console.log(`[IMPROVED PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
+      console.log(`[AI PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
       
-      // Log the first few tools to verify ordering is preserved
-      if (similarTools.length > 0) {
-        console.log(`[IMPROVED PIPELINE] Top similar tools (in similarity order):`);
-        similarTools.slice(0, 5).forEach((tool, idx) => {
-          const originalSimilarItem = similarItems.find(item => item.name === tool.name);
-          console.log(`  ${idx + 1}. ${tool.name} (similarity: ${originalSimilarItem?.similarity?.toFixed(4) || 'N/A'})`);
-        });
-      }
+      // FIXED: Better threshold logic - only use embeddings if we get meaningful filtering
+      const totalAvailableTools = toolsData.tools.length;
+      const reductionRatio = similarTools.length / totalAvailableTools;
       
-      if (similarTools.length >= 15) {
+      if (similarTools.length >= this.embeddingsMinTools && reductionRatio <= this.embeddingsMaxReductionRatio) {
         candidateTools = similarTools;
         candidateConcepts = similarConcepts;
         selectionMethod = 'embeddings_candidates';
         
-        console.log(`[IMPROVED PIPELINE] Using embeddings candidates in similarity order: ${candidateTools.length} tools`);
+        console.log(`[AI PIPELINE] Using embeddings filtering: ${totalAvailableTools} → ${similarTools.length} tools (${(reductionRatio * 100).toFixed(1)}% reduction)`);
       } else {
-        console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${similarTools.length} < 15), using full dataset`);
+        if (similarTools.length < this.embeddingsMinTools) {
+          console.log(`[AI PIPELINE] Embeddings found too few tools (${similarTools.length} < ${this.embeddingsMinTools}), using full dataset`);
+        } else {
+          console.log(`[AI PIPELINE] Embeddings didn't filter enough (${(reductionRatio * 100).toFixed(1)}% > ${(this.embeddingsMaxReductionRatio * 100).toFixed(1)}%), using full dataset`);
+        }
         candidateTools = toolsData.tools;
         candidateConcepts = toolsData.concepts;
         selectionMethod = 'full_dataset';
       }
       
-      // NEW: Add Audit Entry for Embeddings Search with ordering verification
+      // Enhanced audit entry with reduction statistics
       if (this.auditConfig.enabled) {
         this.addAuditEntry(null, 'retrieval', 'embeddings-search', 
           { query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates }, 
@@ -323,21 +348,29 @@ class ImprovedMicroTaskAIPipeline {
             candidatesFound: similarItems.length, 
             toolsInOrder: similarTools.slice(0, 3).map((t: any) => t.name),
             conceptsInOrder: similarConcepts.slice(0, 3).map((c: any) => c.name),
-            orderingPreserved: true
+            reductionRatio: reductionRatio,
+            usingEmbeddings: selectionMethod === 'embeddings_candidates',
+            totalAvailable: totalAvailableTools,
+            filtered: similarTools.length
           },
-          similarTools.length >= 15 ? 85 : 60,
+          selectionMethod === 'embeddings_candidates' ? 85 : 60,
           embeddingsStart,
-          { selectionMethod, embeddingsEnabled: true, orderingFixed: true }
+          { 
+            selectionMethod, 
+            embeddingsEnabled: true, 
+            reductionAchieved: selectionMethod === 'embeddings_candidates',
+            tokenSavingsExpected: selectionMethod === 'embeddings_candidates'
+          }
         );
       }
     } else {
-      console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
+      console.log(`[AI PIPELINE] Embeddings disabled, using full dataset`);
       candidateTools = toolsData.tools;
       candidateConcepts = toolsData.concepts;
       selectionMethod = 'full_dataset';
     }
 
-    console.log(`[IMPROVED PIPELINE] AI will analyze ${candidateTools.length} candidate tools (ordering preserved: ${selectionMethod === 'embeddings_candidates'})`);
+    console.log(`[AI PIPELINE] AI will analyze ${candidateTools.length} candidate tools (method: ${selectionMethod})`);
     const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
     
     return {
@@ -387,15 +420,37 @@ class ImprovedMicroTaskAIPipeline {
       related_software: concept.related_software || []
     }));
 
-    // Generate the German prompt with tool data
+    // CORRECTED LOGIC: 
+    let toolsToSend: any[];
+    let conceptsToSend: any[];
+    
+    if (selectionMethod === 'embeddings_candidates') {
+      // WITH EMBEDDINGS: Take top N from pre-filtered candidates
+      toolsToSend = toolsWithFullData.slice(0, this.embeddingSelectionLimit);
+      conceptsToSend = conceptsWithFullData.slice(0, this.embeddingConceptsLimit);
+      
+      console.log(`[AI PIPELINE] Embeddings enabled: sending top ${toolsToSend.length} pre-filtered tools`);
+    } else {
+      // WITHOUT EMBEDDINGS: Send entire compressed database (original behavior)
+      toolsToSend = toolsWithFullData; // ALL tools from database
+      conceptsToSend = conceptsWithFullData; // ALL concepts from database
+      
+      console.log(`[AI PIPELINE] Embeddings disabled: sending entire database (${toolsToSend.length} tools, ${conceptsToSend.length} concepts)`);
+    }
+
+    // Generate the German prompt with appropriately selected tool data
     const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.maxSelectedItems);
     const prompt = `${basePrompt}
 
 VERFÜGBARE TOOLS (mit vollständigen Daten):
-${JSON.stringify(toolsWithFullData.slice(0, 30), null, 2)}
+${JSON.stringify(toolsToSend, null, 2)}
 
 VERFÜGBARE KONZEPTE (mit vollständigen Daten):
-${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
+${JSON.stringify(conceptsToSend, null, 2)}`;
+
+    // Log token usage for monitoring
+    const estimatedTokens = this.estimateTokens(prompt);
+    console.log(`[AI PIPELINE] Method: ${selectionMethod}, Tools: ${toolsToSend.length}, Tokens: ~${estimatedTokens}`);
 
     try {
       const response = await this.callAI(prompt, 2500);
@@ -403,16 +458,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
       const result = this.safeParseJSON(response, null);
       
       if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
-        console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
+        console.error('[AI PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
         
-        // NEW: Add Audit Entry for Failed Selection
         if (this.auditConfig.enabled) {
           this.addAuditEntry(null, 'selection', 'ai-tool-selection-failed',
             { candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
             { error: 'Invalid JSON structure', response: response.slice(0, 200) },
-            10, // Very low confidence
+            10,
             selectionStart,
-            { aiModel: this.config.model, selectionMethod }
+            { aiModel: this.config.model, selectionMethod, tokensSent: estimatedTokens, toolsSent: toolsToSend.length }
           );
         }
         
@@ -421,19 +475,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
 
       const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
       if (totalSelected === 0) {
-        console.error('[IMPROVED PIPELINE] AI selection returned no tools');
+        console.error('[AI PIPELINE] AI selection returned no tools');
         throw new Error('AI selection returned empty selection');
       }
       
-      console.log(`[IMPROVED PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts`);
-      console.log(`[IMPROVED PIPELINE] AI reasoning: ${result.reasoning}`);
+      console.log(`[AI PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts from ${toolsToSend.length} candidates`);
 
       const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
       const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));
       
-      console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
-      
-      // NEW: Add Audit Entry for Successful Selection
       if (this.auditConfig.enabled) {
         const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
         
@@ -443,11 +493,12 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
             selectedToolCount: result.selectedTools.length, 
             selectedConceptCount: result.selectedConcepts.length,
             reasoning: result.reasoning?.slice(0, 200) + '...',
-            finalToolNames: selectedTools.map(t => t.name)
+            finalToolNames: selectedTools.map(t => t.name),
+            selectionEfficiency: `${toolsToSend.length} → ${result.selectedTools.length}`
           },
           confidence,
           selectionStart,
-          { aiModel: this.config.model, selectionMethod, promptTokens: this.estimateTokens(prompt) }
+          { aiModel: this.config.model, selectionMethod, promptTokens: estimatedTokens, toolsSent: toolsToSend.length }
         );
       }
       
@@ -457,69 +508,21 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
       };
 
     } catch (error) {
-      console.error('[IMPROVED PIPELINE] AI selection failed:', error);
+      console.error('[AI PIPELINE] AI selection failed:', error);
       
-      // NEW: Add Audit Entry for Selection Error
       if (this.auditConfig.enabled) {
         this.addAuditEntry(null, 'selection', 'ai-tool-selection-error',
           { candidateCount: candidateTools.length, mode },
           { error: error.message },
-          5, // Very low confidence
+          5,
           selectionStart,
-          { aiModel: this.config.model, selectionMethod }
+          { aiModel: this.config.model, selectionMethod, tokensSent: estimatedTokens }
         );
       }
-      
-      console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
-      return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
+      throw error;
     }
   }
 
-  private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
-    const emergencyStart = Date.now();
-    
-    const queryLower = userQuery.toLowerCase();
-    const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
-    
-    const scoredTools = candidateTools.map(tool => {
-      const toolText = (
-        tool.name + ' ' + 
-        tool.description + ' ' + 
-        (tool.tags || []).join(' ') + ' ' +
-        (tool.platforms || []).join(' ') + ' ' +
-        (tool.domains || []).join(' ')
-      ).toLowerCase();
-      
-      const score = keywords.reduce((acc, keyword) => {
-        return acc + (toolText.includes(keyword) ? 1 : 0);
-      }, 0);
-      
-      return { tool, score };
-    }).filter(item => item.score > 0)
-      .sort((a, b) => b.score - a.score);
-    
-    const maxTools = mode === 'workflow' ? 20 : 8;
-    const selectedTools = scoredTools.slice(0, maxTools).map(item => item.tool);
-    
-    console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
-    
-    // NEW: Add Audit Entry for Emergency Selection
-    if (this.auditConfig.enabled) {
-      this.addAuditEntry(null, 'selection', 'emergency-keyword-selection',
-        { keywords: keywords.slice(0, 10), candidateCount: candidateTools.length },
-        { selectedCount: selectedTools.length, topScores: scoredTools.slice(0, 5).map(s => ({ name: s.tool.name, score: s.score })) },
-        40, // Moderate confidence for emergency selection
-        emergencyStart,
-        { selectionMethod: 'emergency_keyword' }
-      );
-    }
-    
-    return {
-      selectedTools,
-      selectedConcepts: candidateConcepts.slice(0, 3)
-    };
-  }
-
   private async delay(ms: number): Promise<void> {
     return new Promise(resolve => setTimeout(resolve, ms));
   }
@@ -826,7 +829,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
     // NEW: Clear any previous temporary audit entries
     this.tempAuditEntries = [];
     
-    console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
+    console.log(`[AI PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
 
     try {
       // Stage 1: Get intelligent candidates (embeddings + AI selection)
@@ -848,7 +851,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
       // NEW: Merge any temporary audit entries from pre-context operations
       this.mergeTemporaryAuditEntries(context);
 
-      console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
+      console.log(`[AI PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
 
       // NEW: Add initial audit entry
       this.addAuditEntry(context, 'initialization', 'pipeline-start',
@@ -925,9 +928,9 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
         contextContinuityUsed: true
       };
 
-      console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
-      console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
-      console.log(`[IMPROVED PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
+      console.log(`[AI PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
+      console.log(`[AI PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
+      console.log(`[AI PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
 
       return {
         recommendation: {
@@ -939,7 +942,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
       };
 
     } catch (error) {
-      console.error('[IMPROVED PIPELINE] Processing failed:', error);
+      console.error('[AI PIPELINE] Processing failed:', error);
       
       // NEW: Ensure temp audit entries are cleared even on error
       this.tempAuditEntries = [];
diff --git a/src/utils/dataService.ts b/src/utils/dataService.ts
index 7f6b6a0..8b80f33 100644
--- a/src/utils/dataService.ts
+++ b/src/utils/dataService.ts
@@ -77,33 +77,8 @@ interface EnhancedCompressedToolsData {
   domains: any[];
   phases: any[];
   'domain-agnostic-software': any[];
-  scenarios?: any[]; // Optional for AI processing
+  scenarios?: any[];
   skill_levels: any;
-  // Enhanced context for micro-tasks
-  domain_relationships: DomainRelationship[];
-  phase_dependencies: PhaseDependency[];
-  tool_compatibility_matrix: CompatibilityMatrix[];
-}
-
-interface DomainRelationship {
-  domain_id: string;
-  tool_count: number;
-  common_tags: string[];
-  skill_distribution: Record<string, number>;
-}
-
-interface PhaseDependency {
-  phase_id: string;
-  order: number;
-  depends_on: string | null;
-  enables: string | null;
-  is_parallel_capable: boolean;
-  typical_duration: string;
-}
-
-interface CompatibilityMatrix {
-  type: string;
-  groups: Record<string, string[]>;
 }
 
 let cachedData: ToolsData | null = null;
@@ -146,104 +121,6 @@ function generateDataVersion(data: any): string {
   return Math.abs(hash).toString(36);
 }
 
-// Enhanced: Generate domain relationships for better AI understanding
-function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
-  const relationships: DomainRelationship[] = [];
-  
-  for (const domain of domains) {
-    const domainTools = tools.filter(tool => 
-      tool.domains && tool.domains.includes(domain.id)
-    );
-    
-    const commonTags = domainTools
-      .flatMap(tool => tool.tags || [])
-      .reduce((acc: any, tag: string) => {
-        acc[tag] = (acc[tag] || 0) + 1;
-        return acc;
-      }, {});
-      
-    const topTags = Object.entries(commonTags)
-      .sort(([,a], [,b]) => (b as number) - (a as number))
-      .slice(0, 5)
-      .map(([tag]) => tag);
-    
-    relationships.push({
-      domain_id: domain.id,
-      tool_count: domainTools.length,
-      common_tags: topTags,
-      skill_distribution: domainTools.reduce((acc: any, tool: any) => {
-        acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
-        return acc;
-      }, {})
-    });
-  }
-  
-  return relationships;
-}
-
-// Enhanced: Generate phase dependencies
-function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
-  const dependencies: PhaseDependency[] = [];
-  
-  for (let i = 0; i < phases.length; i++) {
-    const phase = phases[i];
-    const nextPhase = phases[i + 1];
-    const prevPhase = phases[i - 1];
-    
-    dependencies.push({
-      phase_id: phase.id,
-      order: i + 1,
-      depends_on: prevPhase?.id || null,
-      enables: nextPhase?.id || null,
-      is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
-      typical_duration: phase.id === 'data-collection' ? 'hours-days' :
-                       phase.id === 'examination' ? 'hours-weeks' :
-                       phase.id === 'analysis' ? 'days-weeks' :
-                       'hours-days'
-    });
-  }
-  
-  return dependencies;
-}
-
-// Enhanced: Generate tool compatibility matrix
-function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
-  const matrix: CompatibilityMatrix[] = [];
-  
-  // Group tools by common characteristics
-  const platformGroups = tools.reduce((acc: any, tool: any) => {
-    if (tool.platforms) {
-      tool.platforms.forEach((platform: string) => {
-        if (!acc[platform]) acc[platform] = [];
-        acc[platform].push(tool.name);
-      });
-    }
-    return acc;
-  }, {});
-  
-  const phaseGroups = tools.reduce((acc: any, tool: any) => {
-    if (tool.phases) {
-      tool.phases.forEach((phase: string) => {
-        if (!acc[phase]) acc[phase] = [];
-        acc[phase].push(tool.name);
-      });
-    }
-    return acc;
-  }, {});
-  
-  matrix.push({
-    type: 'platform_compatibility',
-    groups: platformGroups
-  });
-  
-  matrix.push({
-    type: 'phase_synergy',
-    groups: phaseGroups
-  });
-  
-  return matrix;
-}
-
 async function loadRawData(): Promise<ToolsData> {
   if (!cachedData) {
     const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
@@ -337,27 +214,16 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
         };
       });
     
-    // Enhanced: Add rich context data
-    const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
-    const phaseDependencies = generatePhaseDependencies(data.phases);
-    const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
-    
     cachedCompressedData = {
       tools: compressedTools,
       concepts: concepts,
       domains: data.domains,
       phases: data.phases,
       'domain-agnostic-software': data['domain-agnostic-software'],
-      scenarios: data.scenarios, // Include scenarios for context
+      scenarios: data.scenarios,
       skill_levels: data.skill_levels || {},
-      // Enhanced context for micro-tasks
-      domain_relationships: domainRelationships,
-      phase_dependencies: phaseDependencies,
-      tool_compatibility_matrix: toolCompatibilityMatrix
     };
     
-    console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
-    console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
   }
   
   return cachedCompressedData;
diff --git a/src/utils/rateLimitedQueue.ts b/src/utils/rateLimitedQueue.ts
index 3d9b96b..603807a 100644
--- a/src/utils/rateLimitedQueue.ts
+++ b/src/utils/rateLimitedQueue.ts
@@ -157,15 +157,6 @@ class RateLimitedQueue {
     return status;
   }
 
-  setDelay(ms: number): void {
-    if (!Number.isFinite(ms) || ms < 0) return;
-    this.delayMs = ms;
-  }
-
-  getDelay(): number {
-    return this.delayMs;
-  }
-
   private async processQueue(): Promise<void> {
     if (this.isProcessing) {
       return;
diff --git a/src/utils/toolHelpers.ts b/src/utils/toolHelpers.ts
index 54431b3..47f572f 100644
--- a/src/utils/toolHelpers.ts
+++ b/src/utils/toolHelpers.ts
@@ -1,8 +1,3 @@
-/**
- * CONSOLIDATED Tool utility functions for consistent tool operations across the app
- * Works in both server (Node.js) and client (browser) environments
- */
-
 export interface Tool {
   name: string;
   type?: 'software' | 'method' | 'concept';
@@ -18,10 +13,6 @@ export interface Tool {
   related_concepts?: string[];
 }
 
-/**
- * Creates a URL-safe slug from a tool name
- * Used for URLs, IDs, and file names consistently across the app
- */
 export function createToolSlug(toolName: string): string {
   if (!toolName || typeof toolName !== 'string') {
     console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
@@ -35,9 +26,6 @@ export function createToolSlug(toolName: string): string {
     .replace(/^-|-$/g, '');           // Remove leading/trailing hyphens
 }
 
-/**
- * Finds a tool by name or slug from tools array
- */
 export function findToolByIdentifier(tools: Tool[], identifier: string): Tool | undefined {
   if (!identifier || !Array.isArray(tools)) return undefined;
   
@@ -47,23 +35,9 @@ export function findToolByIdentifier(tools: Tool[], identifier: string): Tool |
   );
 }
 
-/**
- * Checks if tool has a valid project URL (hosted on CC24 server)
- */
 export function isToolHosted(tool: Tool): boolean {
   return tool.projectUrl !== undefined && 
          tool.projectUrl !== null && 
          tool.projectUrl !== "" && 
          tool.projectUrl.trim() !== "";
-}
-
-/**
- * Determines tool category for styling/logic
- */
-export function getToolCategory(tool: Tool): 'concept' | 'method' | 'hosted' | 'oss' | 'proprietary' {
-  if (tool.type === 'concept') return 'concept';
-  if (tool.type === 'method') return 'method';
-  if (isToolHosted(tool)) return 'hosted';
-  if (tool.license && tool.license !== 'Proprietary') return 'oss';
-  return 'proprietary';
 }
\ No newline at end of file

From 4b0d208ef5d6265d7dbc1254d3e4b383d7021b9a Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Mon, 4 Aug 2025 20:35:21 +0200
Subject: [PATCH 2/4] Context too long, using prompt only

---
 .env.example                      |  38 +++---
 src/data/tools.yaml               |  60 ---------
 src/pages/api/ai/enhance-input.ts |   8 +-
 src/utils/aiPipeline.ts           | 204 ++++++++++++++++++------------
 src/utils/dataService.ts          |   5 -
 5 files changed, 147 insertions(+), 168 deletions(-)

diff --git a/.env.example b/.env.example
index 8b45de2..00b00bc 100644
--- a/.env.example
+++ b/.env.example
@@ -54,6 +54,11 @@ AI_SIMILARITY_THRESHOLD=0.3
 AI_EMBEDDING_SELECTION_LIMIT=30
 AI_EMBEDDING_CONCEPTS_LIMIT=15
 
+# Maximum tools/concepts sent to AI when embeddings are DISABLED
+# Set to 0 for no limit (WARNING: may cause token overflow with large datasets)
+AI_NO_EMBEDDINGS_TOOL_LIMIT=0
+AI_NO_EMBEDDINGS_CONCEPT_LIMIT=0
+
 # === AI SELECTION STAGE ===
 # Maximum tools the AI can select from embedding candidates
 # 🤖 This is the SECOND filter - AI intelligent selection
@@ -98,17 +103,21 @@ AI_EMBEDDINGS_BATCH_SIZE=10
 # Delay between embedding batches (milliseconds)
 AI_EMBEDDINGS_BATCH_DELAY_MS=1000
 
+# Maximum tools sent to AI for detailed analysis (micro-tasks)
+AI_MAX_TOOLS_TO_ANALYZE=20
+AI_MAX_CONCEPTS_TO_ANALYZE=10
+
 # ============================================================================
 # 5. AI CONTEXT & TOKEN MANAGEMENT
 # ============================================================================
 
 # Maximum context tokens to maintain across micro-tasks
 # Controls how much conversation history is preserved between AI calls
-AI_MAX_CONTEXT_TOKENS=3000
+AI_MAX_CONTEXT_TOKENS=4000
 
 # Maximum tokens per individual AI prompt
 # Larger = more context per call | Smaller = faster responses
-AI_MAX_PROMPT_TOKENS=1200
+AI_MAX_PROMPT_TOKENS=1500
 
 # ============================================================================
 # 6. AUTHENTICATION & AUTHORIZATION (OPTIONAL)
@@ -169,7 +178,7 @@ GIT_API_TOKEN=your-git-api-token
 # ============================================================================
 
 # Enable detailed audit trail of AI decision-making
-FORENSIC_AUDIT_ENABLED=false
+FORENSIC_AUDIT_ENABLED=true
 
 # Audit detail level: minimal, standard, verbose
 FORENSIC_AUDIT_DETAIL_LEVEL=standard
@@ -199,23 +208,16 @@ CONFIDENCE_HIGH_THRESHOLD=80
 # PERFORMANCE TUNING PRESETS
 # ============================================================================
 
-# 🚀 FOR FASTER RESPONSES (less comprehensive):
-# AI_EMBEDDING_CANDIDATES=20
-# AI_MAX_SELECTED_ITEMS=15  
-# AI_MICRO_TASK_DELAY_MS=200
-# AI_MAX_CONTEXT_TOKENS=2000
+# 🚀 FOR FASTER RESPONSES (prevent token overflow):
+# AI_NO_EMBEDDINGS_TOOL_LIMIT=25
+# AI_NO_EMBEDDINGS_CONCEPT_LIMIT=10
 
-# 🎯 FOR BETTER QUALITY (more comprehensive):
-# AI_EMBEDDING_CANDIDATES=60
-# AI_MAX_SELECTED_ITEMS=40
-# AI_MICRO_TASK_DELAY_MS=800
-# AI_MAX_CONTEXT_TOKENS=4000
+# 🎯 FOR FULL DATABASE ACCESS (risk of truncation):
+# AI_NO_EMBEDDINGS_TOOL_LIMIT=0
+# AI_NO_EMBEDDINGS_CONCEPT_LIMIT=0
 
-# 🔋 FOR LOW-POWER SYSTEMS (minimal resources):
-# AI_EMBEDDING_CANDIDATES=15
-# AI_MAX_SELECTED_ITEMS=10
-# AI_RATE_LIMIT_MAX_REQUESTS=2
-# AI_MICRO_TASK_DELAY_MS=1000
+# 🔋 FOR LOW-POWER SYSTEMS:
+# AI_NO_EMBEDDINGS_TOOL_LIMIT=15
 
 # ============================================================================
 # FEATURE COMBINATIONS GUIDE
diff --git a/src/data/tools.yaml b/src/data/tools.yaml
index fae358a..f3862bd 100644
--- a/src/data/tools.yaml
+++ b/src/data/tools.yaml
@@ -113,64 +113,6 @@ tools:
     accessType: download
     license: VSL
     knowledgebase: false
-  - name: TheHive 5
-    icon: 🐝
-    type: software
-    description: >-
-      Die zentrale Incident-Response-Plattform orchestriert komplexe 
-      Sicherheitsvorfälle vom ersten Alert bis zum Abschlussbericht. Jeder Case 
-      wird strukturiert durch Observables (IOCs), Tasks und Zeitleisten 
-      abgebildet. Die Cortex-Integration automatisiert Analysen durch Dutzende 
-      Analyzer - von VirusTotal-Checks bis Sandbox-Detonation. 
-      MISP-Synchronisation reichert Cases mit Threat-Intelligence an. Das 
-      ausgeklügelte Rollen- und Rechtesystem ermöglicht sichere Zusammenarbeit 
-      zwischen SOC-Analysten, Forensikern und Management. Templates 
-      standardisieren Response-Prozesse nach Incident-Typ. Die RESTful API 
-      integriert nahtlos mit SIEM, SOAR und Ticketing-Systemen. Metrics und
-      KPIs  messen die Team-Performance. Die Community Edition bleibt kostenlos
-      für  kleinere Teams, während Gold/Platinum-Lizenzen Enterprise-Features
-      bieten.
-    domains:
-      - incident-response
-      - static-investigations
-      - malware-analysis
-      - network-forensics
-      - fraud-investigation
-    phases:
-      - data-collection
-      - examination
-      - analysis
-      - reporting
-    platforms:
-      - Web
-    related_software:
-      - MISP
-      - Cortex
-      - Elasticsearch
-    domain-agnostic-software:
-      - collaboration-general
-    skillLevel: intermediate
-    accessType: server-based
-    url: https://strangebee.com/thehive/
-    projectUrl: ''
-    license: Community Edition (Discontinued) / Commercial
-    knowledgebase: false
-    statusUrl: https://uptime.example.lab/api/badge/1/status
-    tags:
-      - web-interface
-      - case-management
-      - collaboration
-      - api
-      - workflow
-      - multi-user-support
-      - cortex-analyzer
-      - misp-integration
-      - playbooks
-      - metrics
-      - rbac
-      - template-driven
-    related_concepts:
-      - Digital Evidence Chain of Custody
   - name: MISP
     icon: 🌐
     type: software
@@ -223,7 +165,6 @@ tools:
     related_concepts:
       - Hash Functions & Digital Signatures
     related_software:
-      - TheHive 5
       - Cortex
       - OpenCTI
   - name: DFIR-IRIS
@@ -260,7 +201,6 @@ tools:
     platforms:
       - Web
     related_software:
-      - TheHive 5
       - MISP
       - OpenCTI
     domain-agnostic-software:
diff --git a/src/pages/api/ai/enhance-input.ts b/src/pages/api/ai/enhance-input.ts
index 86086e2..1ebb2a8 100644
--- a/src/pages/api/ai/enhance-input.ts
+++ b/src/pages/api/ai/enhance-input.ts
@@ -94,18 +94,15 @@ ${input}
   `.trim();
 }
 
-// Enhanced AI service call function
 async function callAIService(prompt: string): Promise<Response> {
   const endpoint = AI_ENDPOINT;
   const apiKey = AI_ANALYZER_API_KEY;
   const model = AI_ANALYZER_MODEL;
   
-  // Simple headers - add auth only if API key exists
   let headers: Record<string, string> = {
     'Content-Type': 'application/json'
   };
   
-  // Add authentication if API key is provided
   if (apiKey) {
     headers['Authorization'] = `Bearer ${apiKey}`;
     console.log('[ENHANCE API] Using API key authentication');
@@ -113,7 +110,6 @@ async function callAIService(prompt: string): Promise<Response> {
     console.log('[ENHANCE API] No API key - making request without authentication');
   }
   
-  // Simple request body
   const requestBody = {
     model,
     messages: [{ role: 'user', content: prompt }],
@@ -124,8 +120,6 @@ async function callAIService(prompt: string): Promise<Response> {
     presence_penalty: 0.1
   };
   
-  // FIXED: This function is already being called through enqueueApiCall in the main handler
-  // So we can use direct fetch here since the queuing happens at the caller level
   return fetch(`${endpoint}/v1/chat/completions`, {
     method: 'POST',
     headers,
@@ -214,7 +208,7 @@ export const POST: APIRoute = async ({ request }) => {
       success: true,
       questions,
       taskId,
-      inputComplete: questions.length === 0 // Flag to indicate if input seems complete
+      inputComplete: questions.length === 0 
     }), {
       status: 200,
       headers: { 'Content-Type': 'application/json' }
diff --git a/src/utils/aiPipeline.ts b/src/utils/aiPipeline.ts
index 761ea4b..34dd1ee 100644
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -31,7 +31,6 @@ interface AnalysisResult {
   };
 }
 
-// NEW: Audit Trail Types
 interface AuditEntry {
   timestamp: number;
   phase: string;           // 'retrieval', 'selection', 'micro-task-N'
@@ -40,10 +39,9 @@ interface AuditEntry {
   output: any;             // What came out of this step
   confidence: number;      // 0-100: How confident we are in this step
   processingTimeMs: number;
-  metadata: Record<string, any>; // Additional context
+  metadata: Record<string, any>;
 }
 
-// Enhanced AnalysisContext with Audit Trail
 interface AnalysisContext {
   userQuery: string;
   mode: string;
@@ -62,7 +60,6 @@ interface AnalysisContext {
   
   seenToolNames: Set<string>;
   
-  // NEW: Audit Trail
   auditTrail: AuditEntry[];
 }
 
@@ -78,25 +75,24 @@ class ImprovedMicroTaskAIPipeline {
   private similarityThreshold: number;
   private microTaskDelay: number;
   
-  // NEW: Embedding selection limits (top N from pre-filtered candidates)
   private embeddingSelectionLimit: number;
   private embeddingConceptsLimit: number;
+
+  private noEmbeddingsToolLimit: number;
+  private noEmbeddingsConceptLimit: number;
   
-  // NEW: Embeddings efficiency thresholds
   private embeddingsMinTools: number;
   private embeddingsMaxReductionRatio: number;
   
   private maxContextTokens: number;
   private maxPromptTokens: number;
   
-  // Audit Configuration
   private auditConfig: {
     enabled: boolean;
     detailLevel: 'minimal' | 'standard' | 'verbose';
     retentionHours: number;
   };
   
-  // Temporary audit storage for pre-context operations
   private tempAuditEntries: AuditEntry[] = [];
 
   constructor() {
@@ -106,36 +102,33 @@ class ImprovedMicroTaskAIPipeline {
       model: this.getEnv('AI_ANALYZER_MODEL')
     };
 
-    // Core pipeline configuration
     this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '25', 10);
     this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '50', 10); 
     this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
     this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
     
-    // NEW: Embedding selection limits (top N from pre-filtered candidates)
     this.embeddingSelectionLimit = parseInt(process.env.AI_EMBEDDING_SELECTION_LIMIT || '30', 10);
     this.embeddingConceptsLimit = parseInt(process.env.AI_EMBEDDING_CONCEPTS_LIMIT || '15', 10);
+
+    this.noEmbeddingsToolLimit = parseInt(process.env.AI_NO_EMBEDDINGS_TOOL_LIMIT || '0', 10);
+    this.noEmbeddingsConceptLimit = parseInt(process.env.AI_NO_EMBEDDINGS_CONCEPT_LIMIT || '0', 10);
     
-    // NEW: Embeddings efficiency thresholds
     this.embeddingsMinTools = parseInt(process.env.AI_EMBEDDINGS_MIN_TOOLS || '8', 10);
     this.embeddingsMaxReductionRatio = parseFloat(process.env.AI_EMBEDDINGS_MAX_REDUCTION_RATIO || '0.75');
     
-    // Context management
     this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
     this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
     
-    // Audit configuration
     this.auditConfig = {
       enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
       detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as any) || 'standard',
       retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
     };
     
-    // Log configuration for debugging
     console.log('[AI PIPELINE] Configuration loaded:', {
       embeddingCandidates: this.embeddingCandidates,
       embeddingSelection: `${this.embeddingSelectionLimit} tools, ${this.embeddingConceptsLimit} concepts`,
-      embeddingsThresholds: `min ${this.embeddingsMinTools} tools, max ${this.embeddingsMaxReductionRatio * 100}% of total`,
+      noEmbeddingsLimits: `${this.noEmbeddingsToolLimit || 'unlimited'} tools, ${this.noEmbeddingsConceptLimit || 'unlimited'} concepts`,
       auditEnabled: this.auditConfig.enabled
     });
   }
@@ -148,7 +141,6 @@ class ImprovedMicroTaskAIPipeline {
     return value;
   }
 
-  // NEW: Audit Trail Utility Functions
   private addAuditEntry(
     context: AnalysisContext | null, 
     phase: string, 
@@ -175,22 +167,18 @@ class ImprovedMicroTaskAIPipeline {
     if (context) {
       context.auditTrail.push(auditEntry);
     } else {
-      // Store in temporary array for later merging
       this.tempAuditEntries.push(auditEntry);
     }
     
-    // Log for debugging when audit is enabled
     console.log(`[AUDIT] ${phase}/${action}: ${confidence}% confidence, ${Date.now() - startTime}ms`);
   }
   
-  // NEW: Merge temporary audit entries into context
   private mergeTemporaryAuditEntries(context: AnalysisContext): void {
     if (!this.auditConfig.enabled || this.tempAuditEntries.length === 0) return;
     
     const entryCount = this.tempAuditEntries.length;
-    // Add temp entries to the beginning of the context audit trail
     context.auditTrail.unshift(...this.tempAuditEntries);
-    this.tempAuditEntries = []; // Clear temp storage
+    this.tempAuditEntries = []; 
     
     console.log(`[AUDIT] Merged ${entryCount} temporary audit entries into context`);
   }
@@ -222,15 +210,12 @@ class ImprovedMicroTaskAIPipeline {
     
     let confidence = 60; // Base confidence
     
-    // Good selection ratio (not too many, not too few)
     if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
     else if (selectionRatio <= 0.05) confidence -= 10; // Too few
     else confidence -= 15; // Too many
     
-    // Has detailed reasoning
     if (hasReasoning) confidence += 15;
     
-    // Selected tools have good distribution
     if (result.selectedConcepts && result.selectedConcepts.length > 0) confidence += 5;
     
     return Math.min(95, Math.max(25, confidence));
@@ -254,26 +239,106 @@ class ImprovedMicroTaskAIPipeline {
 
   private safeParseJSON(jsonString: string, fallback: any = null): any {
     try {
-      const cleaned = jsonString
+      let cleaned = jsonString
         .replace(/^```json\s*/i, '')
         .replace(/\s*```\s*$/g, '')
         .trim();
       
+      if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
+        console.warn('[AI PIPELINE] JSON appears truncated, attempting recovery...');
+        
+        let lastCompleteStructure = '';
+        let braceCount = 0;
+        let bracketCount = 0;
+        let inString = false;
+        let escaped = false;
+        
+        for (let i = 0; i < cleaned.length; i++) {
+          const char = cleaned[i];
+          
+          if (escaped) {
+            escaped = false;
+            continue;
+          }
+          
+          if (char === '\\') {
+            escaped = true;
+            continue;
+          }
+          
+          if (char === '"' && !escaped) {
+            inString = !inString;
+            continue;
+          }
+          
+          if (!inString) {
+            if (char === '{') braceCount++;
+            if (char === '}') braceCount--;
+            if (char === '[') bracketCount++;
+            if (char === ']') bracketCount--;
+            
+            if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
+              lastCompleteStructure = cleaned.substring(0, i + 1);
+            }
+          }
+        }
+        
+        if (lastCompleteStructure) {
+          console.log('[AI PIPELINE] Attempting to parse recovered JSON structure...');
+          cleaned = lastCompleteStructure;
+        } else {
+          if (braceCount > 0) {
+            cleaned += '}';
+            console.log('[AI PIPELINE] Added closing brace to truncated JSON');
+          }
+          if (bracketCount > 0) {
+            cleaned += ']';
+            console.log('[AI PIPELINE] Added closing bracket to truncated JSON');
+          }
+        }
+      }
+      
       const parsed = JSON.parse(cleaned);
+      
+      if (parsed && typeof parsed === 'object') {
+        if (parsed.selectedTools === undefined) parsed.selectedTools = [];
+        if (parsed.selectedConcepts === undefined) parsed.selectedConcepts = [];
+        
+        if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
+        if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
+      }
+      
       return parsed;
     } catch (error) {
       console.warn('[AI PIPELINE] JSON parsing failed:', error.message);
-      console.warn('[AI PIPELINE] Raw content:', jsonString.slice(0, 200));
+      console.warn('[AI PIPELINE] Raw content (first 300 chars):', jsonString.slice(0, 300));
+      console.warn('[AI PIPELINE] Raw content (last 300 chars):', jsonString.slice(-300));
+      
+      if (jsonString.includes('selectedTools')) {
+        const toolMatches = jsonString.match(/"([^"]+)"/g);
+        if (toolMatches && toolMatches.length > 0) {
+          console.log('[AI PIPELINE] Attempting partial recovery from broken JSON...');
+          const possibleTools = toolMatches
+            .map(match => match.replace(/"/g, ''))
+            .filter(name => name.length > 2 && !['selectedTools', 'selectedConcepts', 'reasoning'].includes(name))
+            .slice(0, 15); // Reasonable limit
+          
+          if (possibleTools.length > 0) {
+            console.log(`[AI PIPELINE] Recovered ${possibleTools.length} possible tool names from broken JSON`);
+            return {
+              selectedTools: possibleTools,
+              selectedConcepts: [],
+              reasoning: 'Recovered from truncated response'
+            };
+          }
+        }
+      }
+      
       return fallback;
     }
   }
 
-  private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
-    if (context.seenToolNames.has(tool.name)) {
-      console.log(`[AI PIPELINE] Skipping duplicate tool: ${tool.name}`);
-      return false;
-    }
-    
+  private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {   
     context.seenToolNames.add(tool.name);
     if (!context.selectedTools) context.selectedTools = [];
     
@@ -302,11 +367,9 @@ class ImprovedMicroTaskAIPipeline {
       
       console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
       
-      // Create lookup maps for O(1) access while preserving original data
       const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
       const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
       
-      // Process in similarity order, preserving the ranking
       const similarTools = similarItems
         .filter((item): item is SimilarityResult => item.type === 'tool')
         .map(item => toolsMap.get(item.name))
@@ -319,7 +382,6 @@ class ImprovedMicroTaskAIPipeline {
       
       console.log(`[AI PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
       
-      // FIXED: Better threshold logic - only use embeddings if we get meaningful filtering
       const totalAvailableTools = toolsData.tools.length;
       const reductionRatio = similarTools.length / totalAvailableTools;
       
@@ -340,7 +402,6 @@ class ImprovedMicroTaskAIPipeline {
         selectionMethod = 'full_dataset';
       }
       
-      // Enhanced audit entry with reduction statistics
       if (this.auditConfig.enabled) {
         this.addAuditEntry(null, 'retrieval', 'embeddings-search', 
           { query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates }, 
@@ -420,25 +481,29 @@ class ImprovedMicroTaskAIPipeline {
       related_software: concept.related_software || []
     }));
 
-    // CORRECTED LOGIC: 
     let toolsToSend: any[];
     let conceptsToSend: any[];
     
     if (selectionMethod === 'embeddings_candidates') {
-      // WITH EMBEDDINGS: Take top N from pre-filtered candidates
       toolsToSend = toolsWithFullData.slice(0, this.embeddingSelectionLimit);
       conceptsToSend = conceptsWithFullData.slice(0, this.embeddingConceptsLimit);
       
-      console.log(`[AI PIPELINE] Embeddings enabled: sending top ${toolsToSend.length} pre-filtered tools`);
+      console.log(`[AI PIPELINE] Embeddings enabled: sending top ${toolsToSend.length} similarity-ordered tools`);
     } else {
-      // WITHOUT EMBEDDINGS: Send entire compressed database (original behavior)
-      toolsToSend = toolsWithFullData; // ALL tools from database
-      conceptsToSend = conceptsWithFullData; // ALL concepts from database
+      const maxTools = this.noEmbeddingsToolLimit > 0 ? 
+        Math.min(this.noEmbeddingsToolLimit, candidateTools.length) : 
+        candidateTools.length;
       
-      console.log(`[AI PIPELINE] Embeddings disabled: sending entire database (${toolsToSend.length} tools, ${conceptsToSend.length} concepts)`);
+      const maxConcepts = this.noEmbeddingsConceptLimit > 0 ? 
+        Math.min(this.noEmbeddingsConceptLimit, candidateConcepts.length) : 
+        candidateConcepts.length;
+      
+      toolsToSend = toolsWithFullData.slice(0, maxTools);
+      conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
+      
+      console.log(`[AI PIPELINE] Embeddings disabled: sending ${toolsToSend.length}/${candidateTools.length} tools (limit: ${this.noEmbeddingsToolLimit || 'none'})`);
     }
 
-    // Generate the German prompt with appropriately selected tool data
     const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.maxSelectedItems);
     const prompt = `${basePrompt}
 
@@ -448,9 +513,12 @@ ${JSON.stringify(toolsToSend, null, 2)}
 VERFÜGBARE KONZEPTE (mit vollständigen Daten):
 ${JSON.stringify(conceptsToSend, null, 2)}`;
 
-    // Log token usage for monitoring
     const estimatedTokens = this.estimateTokens(prompt);
-    console.log(`[AI PIPELINE] Method: ${selectionMethod}, Tools: ${toolsToSend.length}, Tokens: ~${estimatedTokens}`);
+    console.log(`[AI PIPELINE] Method: ${selectionMethod}, Tools: ${toolsToSend.length}, Estimated tokens: ~${estimatedTokens}`);
+
+    if (estimatedTokens > 35000) {
+      console.warn(`[AI PIPELINE] WARNING: Prompt tokens (${estimatedTokens}) may exceed model limits`);
+    }
 
     try {
       const response = await this.callAI(prompt, 2500);
@@ -527,7 +595,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
     return new Promise(resolve => setTimeout(resolve, ms));
   }
 
-  private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 300): Promise<MicroTaskResult> {
+  private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 500): Promise<MicroTaskResult> {
     const startTime = Date.now();
     
     let contextPrompt = prompt;
@@ -552,11 +620,10 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
         success: true
       };
       
-      // NEW: Add Audit Entry for Successful Micro-Task
       this.addAuditEntry(context, 'micro-task', 'ai-analysis',
         { promptLength: contextPrompt.length, maxTokens },
         { responseLength: response.length, contentPreview: response.slice(0, 100) },
-        response.length > 50 ? 80 : 60, // Confidence based on response quality
+        response.length > 50 ? 80 : 60,
         startTime,
         { aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
       );
@@ -572,11 +639,10 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
         error: error.message
       };
       
-      // NEW: Add Audit Entry for Failed Micro-Task
       this.addAuditEntry(context, 'micro-task', 'ai-analysis-failed',
         { promptLength: contextPrompt.length, maxTokens },
         { error: error.message },
-        5, // Very low confidence
+        5,
         startTime,
         { aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
       );
@@ -589,7 +655,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
     const isWorkflow = context.mode === 'workflow';
     const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
 
-    const result = await this.callMicroTaskAI(prompt, context, 220);
+    const result = await this.callMicroTaskAI(prompt, context, 400);
     
     if (result.success) {
       if (isWorkflow) {
@@ -608,7 +674,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
     const isWorkflow = context.mode === 'workflow';
     const prompt = getPrompt('investigationApproach', isWorkflow, context.userQuery);
 
-    const result = await this.callMicroTaskAI(prompt, context, 220);
+    const result = await this.callMicroTaskAI(prompt, context, 400);
     
     if (result.success) {
       context.investigationApproach = result.content;
@@ -622,7 +688,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
     const isWorkflow = context.mode === 'workflow';
     const prompt = getPrompt('criticalConsiderations', isWorkflow, context.userQuery);
 
-    const result = await this.callMicroTaskAI(prompt, context, 180);
+    const result = await this.callMicroTaskAI(prompt, context, 350);
     
     if (result.success) {
       context.criticalConsiderations = result.content;
@@ -648,7 +714,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 
     const prompt = getPrompt('phaseToolSelection', context.userQuery, phase, phaseTools);
 
-    const result = await this.callMicroTaskAI(prompt, context, 450);
+    const result = await this.callMicroTaskAI(prompt, context, 800);
     
     if (result.success) {
       const selections = this.safeParseJSON(result.content, []);
@@ -665,7 +731,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
           }
         });
         
-        // NEW: Add audit entry for tool selection
         this.addAuditEntry(context, 'micro-task', 'phase-tool-selection',
           { phase: phase.id, availableTools: phaseTools.length },
           { validSelections: validSelections.length, selectedTools: validSelections.map(s => s.toolName) },
@@ -682,7 +747,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
   private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
     const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank);
 
-    const result = await this.callMicroTaskAI(prompt, context, 650);
+    const result = await this.callMicroTaskAI(prompt, context, 1200);
     
     if (result.success) {
       const evaluation = this.safeParseJSON(result.content, {
@@ -702,7 +767,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
         }
       }, 'evaluation', evaluation.suitability_score);
       
-      // NEW: Add audit entry for tool evaluation
       this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
         { toolName: tool.name, rank },
         { suitabilityScore: evaluation.suitability_score, hasExplanation: !!evaluation.detailed_explanation },
@@ -730,7 +794,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
     const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
     const prompt = getPrompt('backgroundKnowledgeSelection', context.userQuery, context.mode, selectedToolNames, availableConcepts);
 
-    const result = await this.callMicroTaskAI(prompt, context, 400);
+    const result = await this.callMicroTaskAI(prompt, context, 700);
     
     if (result.success) {
       const selections = this.safeParseJSON(result.content, []);
@@ -743,7 +807,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
           relevance: sel.relevance
         }));
         
-        // NEW: Add audit entry for background knowledge selection
         this.addAuditEntry(context, 'micro-task', 'background-knowledge-selection',
           { availableConcepts: availableConcepts.length },
           { selectedConcepts: context.backgroundKnowledge?.length || 0 },
@@ -761,21 +824,19 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
     const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
     const prompt = getPrompt('finalRecommendations', context.mode === 'workflow', context.userQuery, selectedToolNames);
 
-    const result = await this.callMicroTaskAI(prompt, context, 180);
+    const result = await this.callMicroTaskAI(prompt, context, 350);
     return result;
   }
 
-  private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
+  private async callAI(prompt: string, maxTokens: number = 1500): Promise<string> {
     const endpoint = this.config.endpoint;
     const apiKey = this.config.apiKey;
     const model = this.config.model;
     
-    // Simple headers - add auth only if API key exists
     let headers: Record<string, string> = {
       'Content-Type': 'application/json'
     };
     
-    // Add authentication if API key is provided
     if (apiKey) {
       headers['Authorization'] = `Bearer ${apiKey}`;
       console.log('[AI PIPELINE] Using API key authentication');
@@ -783,7 +844,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
       console.log('[AI PIPELINE] No API key - making request without authentication');
     }
     
-    // Simple request body
     const requestBody = {
       model,
       messages: [{ role: 'user', content: prompt }],
@@ -792,7 +852,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
     };
     
     try {
-      // FIXED: Use direct fetch since entire pipeline is already queued at query.ts level
       const response = await fetch(`${endpoint}/v1/chat/completions`, {
         method: 'POST',
         headers,
@@ -826,13 +885,11 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
     let completedTasks = 0;
     let failedTasks = 0;
     
-    // NEW: Clear any previous temporary audit entries
     this.tempAuditEntries = [];
     
     console.log(`[AI PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
 
     try {
-      // Stage 1: Get intelligent candidates (embeddings + AI selection)
       const toolsData = await getCompressedToolsDataForAI();
       const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
       
@@ -844,20 +901,17 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
         maxContextLength: this.maxContextTokens,
         currentContextLength: 0,
         seenToolNames: new Set<string>(),
-        // NEW: Initialize audit trail
         auditTrail: []
       };
 
-      // NEW: Merge any temporary audit entries from pre-context operations
       this.mergeTemporaryAuditEntries(context);
 
       console.log(`[AI PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
 
-      // NEW: Add initial audit entry
       this.addAuditEntry(context, 'initialization', 'pipeline-start',
         { userQuery, mode, toolsDataLoaded: !!toolsData },
         { candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length },
-        90, // High confidence for initialization
+        90, 
         startTime,
         { auditEnabled: this.auditConfig.enabled }
       );
@@ -896,19 +950,15 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
         }
       }
 
-      // Task 5: Background Knowledge Selection
       const knowledgeResult = await this.selectBackgroundKnowledge(context);
       if (knowledgeResult.success) completedTasks++; else failedTasks++;
       await this.delay(this.microTaskDelay);
 
-      // Task 6: Final Recommendations
       const finalResult = await this.generateFinalRecommendations(context);
       if (finalResult.success) completedTasks++; else failedTasks++;
 
-      // Build final recommendation
       const recommendation = this.buildRecommendation(context, mode, finalResult.content);
 
-      // NEW: Add final audit entry
       this.addAuditEntry(context, 'completion', 'pipeline-end',
         { completedTasks, failedTasks },
         { finalRecommendation: !!recommendation, auditEntriesGenerated: context.auditTrail.length },
@@ -935,7 +985,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
       return {
         recommendation: {
           ...recommendation,
-          // NEW: Include audit trail in response
           auditTrail: this.auditConfig.enabled ? context.auditTrail : undefined
         },
         processingStats
@@ -944,7 +993,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
     } catch (error) {
       console.error('[AI PIPELINE] Processing failed:', error);
       
-      // NEW: Ensure temp audit entries are cleared even on error
       this.tempAuditEntries = [];
       
       throw error;
diff --git a/src/utils/dataService.ts b/src/utils/dataService.ts
index 8b80f33..c0f8b1d 100644
--- a/src/utils/dataService.ts
+++ b/src/utils/dataService.ts
@@ -130,7 +130,6 @@ async function loadRawData(): Promise<ToolsData> {
     try {
       cachedData = ToolsDataSchema.parse(rawData);
       
-      // Enhanced: Add default skill level descriptions if not provided
       if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
         cachedData.skill_levels = {
           novice: "Minimal technical background required, guided interfaces",
@@ -178,21 +177,18 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
   if (!cachedCompressedData) {
     const data = await getToolsData();
     
-    // Enhanced: More detailed tool information for micro-tasks
     const compressedTools = data.tools
       .filter(tool => tool.type !== 'concept') 
       .map(tool => {
         const { projectUrl, statusUrl, ...compressedTool } = tool;
         return {
           ...compressedTool,
-          // Enhanced: Add computed fields for AI
           is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
           is_open_source: tool.license && tool.license !== 'Proprietary',
           complexity_score: tool.skillLevel === 'expert' ? 5 :
                            tool.skillLevel === 'advanced' ? 4 :
                            tool.skillLevel === 'intermediate' ? 3 :
                            tool.skillLevel === 'beginner' ? 2 : 1,
-          // Enhanced: Phase-specific suitability hints
           phase_suitability: tool.phases?.map(phase => ({
             phase,
             primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
@@ -206,7 +202,6 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
         const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
         return {
           ...compressedConcept,
-          // Enhanced: Learning difficulty indicator
           learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
                               concept.skillLevel === 'advanced' ? 'high' :
                               concept.skillLevel === 'intermediate' ? 'medium' :

From a0955c2e580b33db928fd06d9faa99b14aecb073 Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Mon, 4 Aug 2025 21:05:15 +0200
Subject: [PATCH 3/4] Progress

---
 src/components/AIQueryInterface.astro | 24 +++++------
 src/styles/global.css                 |  1 +
 src/utils/aiPipeline.ts               | 13 +++++-
 src/utils/embeddings.ts               | 57 ++++++++++++++++++++-------
 4 files changed, 67 insertions(+), 28 deletions(-)

diff --git a/src/components/AIQueryInterface.astro b/src/components/AIQueryInterface.astro
index 0c8d86e..9b80036 100644
--- a/src/components/AIQueryInterface.astro
+++ b/src/components/AIQueryInterface.astro
@@ -15,7 +15,7 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];
           <path d="M9 11H5a2 2 0 0 0-2 2v7a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7a2 2 0 0 0-2-2h-4"/>
           <path d="M9 11V7a3 3 0 0 1 6 0v4"/>
         </svg>
-        KI-gestützte Workflow-Empfehlungen
+        Forensic AI
       </h2>
       <p id="ai-description" class="text-muted" style="max-width: 700px; margin: 0 auto; line-height: 1.6;">
         Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen 
@@ -169,16 +169,16 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];
           <!-- Micro-task Progress -->
           <div id="micro-task-progress" class="micro-task-progress hidden">
             <div class="micro-task-header">
-              <span class="micro-task-label">🔬 Micro-Task Analyse</span>
+              <span class="micro-task-label">🔬 micro-Agent-Analysis</span>
               <span id="micro-task-counter" class="micro-task-counter">1/6</span>
             </div>
             <div class="micro-task-steps">
-              <div class="micro-step" data-step="scenario">📋 Szenario</div>
-              <div class="micro-step" data-step="approach">🎯 Ansatz</div>
-              <div class="micro-step" data-step="considerations">⚠️ Kritisches</div>
-              <div class="micro-step" data-step="tools">🔧 Tools</div>
-              <div class="micro-step" data-step="knowledge">📚 Wissen</div>
-              <div class="micro-step" data-step="final">✅ Final</div>
+              <div class="micro-step" data-step="scenario">📋 Problemanalyse</div>
+              <div class="micro-step" data-step="approach">🎯 Ermittlungsansatz</div>
+              <div class="micro-step" data-step="considerations">⚠️ Herausforderungen</div>
+              <div class="micro-step" data-step="tools">🔧 Methoden</div>
+              <div class="micro-step" data-step="knowledge">📚 Evaluation</div>
+              <div class="micro-step" data-step="final">✅ Audit-Trail</div>
             </div>
           </div>
           
@@ -292,13 +292,13 @@ class AIQueryInterface {
     return {
       workflow: {
         placeholder: "Beschreiben Sie Ihr forensisches Szenario... z.B. 'Verdacht auf Ransomware-Angriff auf Windows-Domänencontroller'",
-        description: "Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen.",
+        description: "Beschreiben Sie Ihre Untersuchungssituation und erhalten Empfehlungen für alle Phasen der Untersuchung.",
         submitText: "Empfehlungen generieren",
         loadingText: "Analysiere Szenario und generiere Empfehlungen..."
       },
       tool: {
         placeholder: "Beschreiben Sie Ihr Problem... z.B. 'Analyse von Android-Backups mit WhatsApp-Nachrichten'",
-        description: "Beschreiben Sie Ihr Problem und erhalten Sie 1-3 gezielt passende Empfehlungen.",
+        description: "Beschreiben Sie Ihre Untersuchungssituation und erhalten Empfehlungen für eine spezifische Aufgabenstellung.",
         submitText: "Empfehlungen finden",
         loadingText: "Analysiere Anforderungen und suche passende Methode..."
       }
@@ -706,7 +706,7 @@ class AIQueryInterface {
 
     const html = `
       <div class="workflow-container">
-        ${this.renderHeader('Empfohlener DFIR-Workflow', originalQuery)}
+        ${this.renderHeader('Untersuchungsansatz', originalQuery)}
         ${this.renderContextualAnalysis(recommendation, 'workflow')}
         ${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
         ${this.renderWorkflowPhases(toolsByPhase, phaseOrder, phaseNames)}
@@ -721,7 +721,7 @@ class AIQueryInterface {
   displayToolResults(recommendation, originalQuery) {
     const html = `
       <div class="tool-results-container">
-        ${this.renderHeader('Passende Empfehlungen', originalQuery)}
+        ${this.renderHeader('Handlungsempfehlung', originalQuery)}
         ${this.renderContextualAnalysis(recommendation, 'tool')}
         ${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
         ${this.renderToolRecommendations(recommendation.recommended_tools)}
diff --git a/src/styles/global.css b/src/styles/global.css
index ca7ceee..79d1dce 100644
--- a/src/styles/global.css
+++ b/src/styles/global.css
@@ -2015,6 +2015,7 @@ input[type="checkbox"] {
   gap: 1rem;
   max-width: 1200px;
   margin: 0 auto;
+  margin-top: 1rem;
 }
 
 .phase-header {
diff --git a/src/utils/aiPipeline.ts b/src/utils/aiPipeline.ts
index 34dd1ee..5b14b4d 100644
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -357,6 +357,17 @@ class ImprovedMicroTaskAIPipeline {
     let candidateConcepts: any[] = [];
     let selectionMethod = 'unknown';
     
+    // WAIT for embeddings initialization if embeddings are enabled
+    if (process.env.AI_EMBEDDINGS_ENABLED === 'true') {
+      try {
+        console.log('[AI PIPELINE] Waiting for embeddings initialization...');
+        await embeddingsService.waitForInitialization();
+        console.log('[AI PIPELINE] Embeddings ready, proceeding with similarity search');
+      } catch (error) {
+        console.error('[AI PIPELINE] Embeddings initialization failed, falling back to full dataset:', error);
+      }
+    }
+    
     if (embeddingsService.isEnabled()) {
       const embeddingsStart = Date.now();
       const similarItems = await embeddingsService.findSimilar(
@@ -425,7 +436,7 @@ class ImprovedMicroTaskAIPipeline {
         );
       }
     } else {
-      console.log(`[AI PIPELINE] Embeddings disabled, using full dataset`);
+      console.log(`[AI PIPELINE] Embeddings disabled or not ready, using full dataset`);
       candidateTools = toolsData.tools;
       candidateConcepts = toolsData.concepts;
       selectionMethod = 'full_dataset';
diff --git a/src/utils/embeddings.ts b/src/utils/embeddings.ts
index b7bbbaa..d70d0ca 100644
--- a/src/utils/embeddings.ts
+++ b/src/utils/embeddings.ts
@@ -31,6 +31,7 @@ interface SimilarityResult extends EmbeddingData {
 class EmbeddingsService {
   private embeddings: EmbeddingData[] = [];
   private isInitialized = false;
+  private initializationPromise: Promise<void> | null = null; // ADD THIS LINE
   private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
   private readonly batchSize: number;
   private readonly batchDelay: number;
@@ -42,7 +43,25 @@ class EmbeddingsService {
     this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
   }
 
+  // REPLACE the existing initialize method with this:
   async initialize(): Promise<void> {
+    // If initialization is already in progress, wait for it
+    if (this.initializationPromise) {
+      return this.initializationPromise;
+    }
+
+    // If already initialized, return immediately
+    if (this.isInitialized) {
+      return Promise.resolve();
+    }
+
+    // Start initialization and store the promise
+    this.initializationPromise = this.performInitialization();
+    return this.initializationPromise;
+  }
+
+  // ADD THIS NEW METHOD:
+  private async performInitialization(): Promise<void> {
     if (!this.enabled) {
       console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
       return;
@@ -74,9 +93,29 @@ class EmbeddingsService {
     } catch (error) {
       console.error('[EMBEDDINGS] Failed to initialize:', error);
       this.isInitialized = false;
+      throw error;
+    } finally {
+      this.initializationPromise = null;
     }
   }
 
+  async waitForInitialization(): Promise<void> {
+    if (!this.enabled) {
+      return Promise.resolve();
+    }
+
+    if (this.isInitialized) {
+      return Promise.resolve();
+    }
+
+    if (this.initializationPromise) {
+      await this.initializationPromise;
+      return;
+    }
+
+    return this.initialize();
+  }
+
   private hashData(data: any): string {
     return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
   }
@@ -127,7 +166,6 @@ class EmbeddingsService {
       'Content-Type': 'application/json'
     };
 
-    // API key is optional for Ollama but required for Mistral/OpenAI
     if (apiKey) {
       headers['Authorization'] = `Bearer ${apiKey}`;
     }
@@ -148,12 +186,10 @@ class EmbeddingsService {
 
     const data = await response.json();
 
-    // Detect Ollama format
     if (Array.isArray(data.embeddings)) {
       return data.embeddings;
     }
 
-    // Detect OpenAI/Mistral format
     if (Array.isArray(data.data)) {
       return data.data.map((item: any) => item.embedding);
     }
@@ -170,7 +206,6 @@ class EmbeddingsService {
     const contents = allItems.map(item => this.createContentString(item));
     this.embeddings = [];
 
-    // Process in batches to respect rate limits
     for (let i = 0; i < contents.length; i += this.batchSize) {
       const batch = contents.slice(i, i + this.batchSize);
       const batchItems = allItems.slice(i, i + this.batchSize);
@@ -198,7 +233,6 @@ class EmbeddingsService {
           });
         });
         
-        // Rate limiting delay between batches
         if (i + this.batchSize < contents.length) {
           await new Promise(resolve => setTimeout(resolve, this.batchDelay));
         }
@@ -213,7 +247,6 @@ class EmbeddingsService {
   }
 
   public async embedText(text: string): Promise<number[]> {
-    // Re‑use the private batch helper to avoid auth duplication
     const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
     return embedding;
   }
@@ -239,25 +272,21 @@ class EmbeddingsService {
     }
 
     try {
-      // Generate embedding for query
       const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
       const queryEmbedding = queryEmbeddings[0];
 
       console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
 
-      // Calculate similarities - properly typed
       const similarities: SimilarityResult[] = this.embeddings.map(item => ({
         ...item,
         similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
       }));
 
-      // Filter by threshold and sort by similarity (descending - highest first)
       const results = similarities
         .filter(item => item.similarity >= threshold)
-        .sort((a, b) => b.similarity - a.similarity) // CRITICAL: Ensure descending order
+        .sort((a, b) => b.similarity - a.similarity) 
         .slice(0, maxResults);
 
-      // ENHANCED: Verify ordering is correct
       const orderingValid = results.every((item, index) => {
         if (index === 0) return true;
         return item.similarity <= results[index - 1].similarity;
@@ -270,15 +299,13 @@ class EmbeddingsService {
         });
       }
 
-      // ENHANCED: Log top results for debugging
       console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
       if (results.length > 0) {
-        console.log('[EMBEDDINGS] Top 5 similarity matches:');
-        results.slice(0, 5).forEach((item, idx) => {
+        console.log('[EMBEDDINGS] Top 10 similarity matches:');
+        results.slice(0, 10).forEach((item, idx) => {
           console.log(`  ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
         });
         
-        // Verify first result is indeed the highest
         const topSimilarity = results[0].similarity;
         const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
         if (hasHigherSimilarity) {

From f00e2d3cfda065053a733ddc8a783dbf0f9a4ec1 Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Mon, 4 Aug 2025 21:38:31 +0200
Subject: [PATCH 4/4] content

---
 src/data/tools.yaml | 238 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 238 insertions(+)

diff --git a/src/data/tools.yaml b/src/data/tools.yaml
index f3862bd..0c3a185 100644
--- a/src/data/tools.yaml
+++ b/src/data/tools.yaml
@@ -3367,6 +3367,244 @@ tools:
     accessType: download
     license: "MPL\_/ AGPL"
     knowledgebase: false
+  - name: ShadowExplorer
+    icon: 🗂️
+    type: software
+    description: >-
+      Das schlanke Windows-Tool macht Volume-Shadow-Copy-Snapshots auch in Home-Editionen sichtbar und erlaubt das komfortable Durchstöbern sowie Wiederherstellen früherer Datei-Versionen. Damit lassen sich versehentlich gelöschte oder überschriebene Dateien in Sekunden zurückholen – geeignet für schnelle Triage und klassische Datenträgerforensik.
+    domains:
+      - static-investigations
+      - incident-response
+    phases:
+      - examination
+      - analysis
+    platforms:
+      - Windows
+    related_software:
+      - OSFMount
+      - PhotoRec
+    domain-agnostic-software: null
+    skillLevel: novice
+    accessType: download
+    url: https://www.shadowexplorer.com/
+    license: Freeware
+    knowledgebase: false
+    tags:
+      - gui
+      - shadow-copy
+      - snapshot-browsing
+      - file-recovery
+      - previous-versions
+      - scenario:file_recovery
+      - point-in-time-restore
+    related_concepts:
+      - Digital Evidence Chain of Custody
+
+
+  - name: Sonic Visualiser
+    icon: 🎵
+    type: software
+    description: >-
+      Die Open-Source-Audio-Analyse-Suite wird in der Forensik eingesetzt,
+      um Wave- und Kompressionsformate bis auf Sample-Ebene zu untersuchen.
+      Spektrogramm-Visualisierung, Zeit-/Frequenz-Annotationen und
+      Transkriptions-Plugins (Vamp) helfen, Manipulationen wie
+      Bandpass-Filter, Time-Stretching oder Insert-Edits nachzuweisen.
+      FFT- und Mel-Spectral-Views decken versteckte Audio-Watermarks oder
+      Steganografie auf. Export-Funktionen in CSV/JSON erlauben die
+      Weiterverarbeitung in Python-Notebooks oder SIEM-Pipelines.
+      Ideal für Voice-Authentication-Checks, Deep-Fake-Erkennung
+      und Beweisaufbereitung vor Gericht.
+    skillLevel: intermediate
+    url: https://www.sonicvisualiser.org/
+    domains:
+      - static-investigations
+      - fraud-investigation
+    phases:
+      - examination
+      - analysis
+      - reporting
+    platforms:
+      - Windows
+      - Linux
+      - macOS
+    accessType: download
+    license: GPL-2.0
+    knowledgebase: false
+    tags:
+      - gui
+      - audio-forensics
+      - spectrogram
+      - plugin-support
+      - annotation
+      - csv-export
+    related_concepts: []
+    related_software:
+      - Audacity
+
+  - name: Dissect
+    icon: 🧩
+    type: software
+    description: >-
+      Fox-ITs Python-Framework abstrahiert Windows- und Linux-Speicherabbilder
+      in virtuelle Objekte (Prozesse, Dateien, Registry, Kernel-Strukturen),
+      ohne zuvor ein Profil definieren zu müssen. Modularer
+      Hypervisor-Layer erlaubt das Mounten und gleichzeitige Analysieren
+      mehrerer Memory-Dumps – perfekt für großflächige Incident-Response.
+      Plugins dekodieren PTEs, handle tables, APC-Queues und liefern
+      YARA-kompatible Scans. Die Zero-Copy-Architektur beschleunigt Queries auf
+      Multi-GB-Images signifikant. Unterstützt Windows 11 24H2-Kernel sowie
+      Linux 6.x-schichten ab Juli 2025.
+    skillLevel: advanced
+    url: https://github.com/fox-it/dissect
+    domains:
+      - incident-response
+      - malware-analysis
+      - static-investigations
+    phases:
+      - examination
+      - analysis
+    platforms:
+      - Windows
+      - Linux
+      - macOS
+    accessType: download
+    license: Apache 2.0
+    knowledgebase: false
+    tags:
+      - command-line
+      - memory-analysis
+      - plugin-support
+      - python-library
+      - zero-copy
+      - profile-less
+    related_concepts:
+      - Regular Expressions (Regex)
+    related_software:
+      - Volatility 3
+      - Rekall
+
+  - name: Docker Explorer
+    icon: 🐳
+    type: software
+    description: >-
+      Googles Forensik-Toolkit zerlegt Offline-Docker-Volumes und
+      Overlay-Dateisysteme ohne laufenden Daemon. Es extrahiert
+      Container-Config, Image-Layer, ENV-Variablen, Mounted-Secrets
+      und schreibt Timeline-fähige Metadata-JSONs. Unterstützt btrfs,
+      overlay2 und zfs Storage-Driver sowie Docker Desktop (macOS/Windows).
+      Perfekt, um bösartige Images nach Supply-Chain-Attacken zu enttarnen
+      oder flüchtige Container nach einem Incident nachträglich zu analysieren.
+    skillLevel: intermediate
+    url: https://github.com/google/docker-explorer
+    domains:
+      - cloud-forensics
+      - incident-response
+      - static-investigations
+    phases:
+      - data-collection
+      - examination
+      - analysis
+    platforms:
+      - Linux
+      - macOS
+      - Windows
+    accessType: download
+    license: Apache 2.0
+    knowledgebase: false
+    tags:
+      - command-line
+      - container-forensics
+      - docker
+      - timeline
+      - json-export
+      - supply-chain
+    related_concepts: []
+    related_software:
+      - Velociraptor
+      - osquery
+
+  - name: Ghiro
+    icon: 🖼️
+    type: software
+    description: >-
+      Die Web-basierte Bild­forensik-Plattform automatisiert EXIF-Analyse,
+      Hash-Matching, Error-Level-Evaluation (ELA) und
+      Steganografie-Erkennung für große Dateibatches. Unterstützt
+      Gesichts- und NSFW-Detection sowie GPS-Reverse-Geocoding für
+      Bewegungsprofile. Reports sind gerichtsfest
+      versioniert, REST-API und Celery-Worker skalieren auf
+      Millionen Bilder – ideal für CSAM-Ermittlungen oder Fake-News-Prüfung.
+    skillLevel: intermediate
+    url: https://getghiro.org/
+    domains:
+      - static-investigations
+      - fraud-investigation
+      - mobile-forensics
+    phases:
+      - examination
+      - analysis
+      - reporting
+    platforms:
+      - Web
+      - Linux
+    accessType: server-based
+    license: GPL-2.0
+    knowledgebase: false
+    tags:
+      - web-interface
+      - image-forensics
+      - exif-analysis
+      - steganography
+      - nsfw-detection
+      - batch-processing
+    related_concepts:
+      - Hash Functions & Digital Signatures
+    related_software:
+      - ExifTool
+      - PhotoRec
+
+  - name: Sherloq
+    icon: 🔍
+    type: software
+    description: >-
+      Das Python-GUI-Toolkit für visuelle Datei-Analyse kombiniert
+      klassische Reverse-Steganografie-Techniken (LSB, Palette-Tweaking,
+      DCT-Coefficient-Scanning) mit modernen CV-Algorithmen.
+      Heatmaps und Histogramm-Diffs zeigen Manipulations-Hotspots,
+      während eine „Carve-All-Layers“-Funktion versteckte Daten in PNG,
+      JPEG, BMP, GIF und Audio-Spectra aufspürt. Plugins für zsteg,
+      binwalk und exiftool erweitern die Pipeline.
+      Eine Must-have-Ergänzung zu Ghidra & friends, wenn
+      Malware Dateien als Dead-Drop nutzt.
+    skillLevel: intermediate
+    url: https://github.com/GuidoBartoli/sherloq
+    domains:
+      - malware-analysis
+      - static-investigations
+    phases:
+      - examination
+      - analysis
+    platforms:
+      - Windows
+      - Linux
+      - macOS
+    accessType: download
+    license: MIT
+    knowledgebase: false
+    tags:
+      - gui
+      - image-forensics
+      - steganography
+      - lsb-extraction
+      - histogram-analysis
+      - plugin-support
+    related_concepts:
+      - Regular Expressions (Regex)
+    related_software:
+      - Ghiro
+      - CyberChef
+
   - name: Cortex
     type: software
     description: >-