fix embeddings truncation
This commit is contained in:
		
							parent
							
								
									3a5e8e88b2
								
							
						
					
					
						commit
						7c3cc7ec9a
					
				
							
								
								
									
										73
									
								
								.env.example
									
									
									
									
									
								
							
							
						
						
									
										73
									
								
								.env.example
									
									
									
									
									
								
							@ -42,32 +42,35 @@ AI_EMBEDDINGS_MODEL=mistral-embed
 | 
			
		||||
# How many similar tools/concepts embeddings search returns as candidates
 | 
			
		||||
# 🔍 This is the FIRST filter - vector similarity matching
 | 
			
		||||
# Lower = faster, less comprehensive | Higher = slower, more comprehensive
 | 
			
		||||
AI_EMBEDDING_CANDIDATES=40
 | 
			
		||||
AI_EMBEDDING_CANDIDATES=50
 | 
			
		||||
 | 
			
		||||
# Minimum similarity score threshold (0.0-1.0)
 | 
			
		||||
# Lower = more results but less relevant | Higher = fewer but more relevant
 | 
			
		||||
AI_SIMILARITY_THRESHOLD=0.3
 | 
			
		||||
 | 
			
		||||
# === AI SELECTION FROM EMBEDDINGS ===
 | 
			
		||||
# When embeddings are enabled, how many top tools to send with full context
 | 
			
		||||
# 🎯 This is the SECOND filter - take best N from embeddings results
 | 
			
		||||
AI_EMBEDDING_SELECTION_LIMIT=30
 | 
			
		||||
AI_EMBEDDING_CONCEPTS_LIMIT=15
 | 
			
		||||
 | 
			
		||||
# === AI SELECTION STAGE ===
 | 
			
		||||
# Maximum tools the AI can select from embedding candidates
 | 
			
		||||
# 🤖 This is the SECOND filter - AI intelligent selection
 | 
			
		||||
# Should be ≤ AI_EMBEDDING_CANDIDATES
 | 
			
		||||
AI_MAX_SELECTED_ITEMS=25
 | 
			
		||||
 | 
			
		||||
# Maximum tools sent to AI for detailed analysis (micro-tasks)
 | 
			
		||||
# 📋 This is the FINAL context size sent to AI models
 | 
			
		||||
# Lower = less AI context, faster responses | Higher = more context, slower
 | 
			
		||||
AI_MAX_TOOLS_TO_ANALYZE=20
 | 
			
		||||
# === EMBEDDINGS EFFICIENCY THRESHOLDS ===
 | 
			
		||||
# Minimum tools required for embeddings to be considered useful
 | 
			
		||||
AI_EMBEDDINGS_MIN_TOOLS=8
 | 
			
		||||
 | 
			
		||||
# Maximum concepts sent to AI for background knowledge selection
 | 
			
		||||
# 📚 Concepts are smaller than tools, so can be higher
 | 
			
		||||
AI_MAX_CONCEPTS_TO_ANALYZE=10
 | 
			
		||||
# Maximum percentage of total tools that embeddings can return to be considered "filtering"
 | 
			
		||||
AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75
 | 
			
		||||
 | 
			
		||||
# === CONTEXT FLOW SUMMARY ===
 | 
			
		||||
# 1. Vector Search: 111 total tools → AI_EMBEDDING_CANDIDATES (40) most similar
 | 
			
		||||
# 2. AI Selection: 40 candidates → AI_MAX_SELECTED_ITEMS (25) best matches  
 | 
			
		||||
# 3. AI Analysis: 25 selected → AI_MAX_TOOLS_TO_ANALYZE (20) for micro-tasks
 | 
			
		||||
# 4. Final Output: Recommendations based on analyzed subset
 | 
			
		||||
# 3. Final Output: Recommendations based on analyzed subset
 | 
			
		||||
 | 
			
		||||
# ============================================================================
 | 
			
		||||
# 4. AI PERFORMANCE & RATE LIMITING
 | 
			
		||||
@ -107,12 +110,6 @@ AI_MAX_CONTEXT_TOKENS=3000
 | 
			
		||||
# Larger = more context per call | Smaller = faster responses
 | 
			
		||||
AI_MAX_PROMPT_TOKENS=1200
 | 
			
		||||
 | 
			
		||||
# Timeout for individual micro-tasks (milliseconds)
 | 
			
		||||
AI_MICRO_TASK_TIMEOUT_MS=25000
 | 
			
		||||
 | 
			
		||||
# Maximum size of the processing queue
 | 
			
		||||
AI_QUEUE_MAX_SIZE=50
 | 
			
		||||
 | 
			
		||||
# ============================================================================
 | 
			
		||||
# 6. AUTHENTICATION & AUTHORIZATION (OPTIONAL)
 | 
			
		||||
# ============================================================================
 | 
			
		||||
@ -183,15 +180,6 @@ FORENSIC_AUDIT_RETENTION_HOURS=24
 | 
			
		||||
# Maximum audit entries per request
 | 
			
		||||
FORENSIC_AUDIT_MAX_ENTRIES=50
 | 
			
		||||
 | 
			
		||||
# Enable detailed AI pipeline logging
 | 
			
		||||
AI_PIPELINE_DEBUG=false
 | 
			
		||||
 | 
			
		||||
# Enable performance metrics collection
 | 
			
		||||
AI_PERFORMANCE_METRICS=false
 | 
			
		||||
 | 
			
		||||
# Enable detailed micro-task debugging
 | 
			
		||||
AI_MICRO_TASK_DEBUG=false
 | 
			
		||||
 | 
			
		||||
# ============================================================================
 | 
			
		||||
# 10. QUALITY CONTROL & BIAS DETECTION (ADVANCED)
 | 
			
		||||
# ============================================================================
 | 
			
		||||
@ -207,37 +195,6 @@ CONFIDENCE_MINIMUM_THRESHOLD=40
 | 
			
		||||
CONFIDENCE_MEDIUM_THRESHOLD=60
 | 
			
		||||
CONFIDENCE_HIGH_THRESHOLD=80
 | 
			
		||||
 | 
			
		||||
# Bias detection settings
 | 
			
		||||
BIAS_DETECTION_ENABLED=false
 | 
			
		||||
BIAS_POPULARITY_THRESHOLD=0.7
 | 
			
		||||
BIAS_DIVERSITY_MINIMUM=0.6
 | 
			
		||||
BIAS_CELEBRITY_TOOLS=""
 | 
			
		||||
 | 
			
		||||
# Quality control thresholds
 | 
			
		||||
QUALITY_MIN_RESPONSE_LENGTH=50
 | 
			
		||||
QUALITY_MIN_SELECTION_COUNT=1
 | 
			
		||||
QUALITY_MAX_PROCESSING_TIME_MS=30000
 | 
			
		||||
 | 
			
		||||
# ============================================================================
 | 
			
		||||
# 11. USER INTERFACE DEFAULTS (OPTIONAL)
 | 
			
		||||
# ============================================================================
 | 
			
		||||
 | 
			
		||||
# Default UI behavior (users can override)
 | 
			
		||||
UI_SHOW_AUDIT_TRAIL_DEFAULT=false
 | 
			
		||||
UI_SHOW_CONFIDENCE_SCORES=true
 | 
			
		||||
UI_SHOW_BIAS_WARNINGS=true
 | 
			
		||||
UI_AUDIT_TRAIL_COLLAPSIBLE=true
 | 
			
		||||
 | 
			
		||||
# ============================================================================
 | 
			
		||||
# 12. CACHING & PERFORMANCE (OPTIONAL)
 | 
			
		||||
# ============================================================================
 | 
			
		||||
 | 
			
		||||
# Cache AI responses (milliseconds)
 | 
			
		||||
AI_RESPONSE_CACHE_TTL_MS=3600000
 | 
			
		||||
 | 
			
		||||
# Queue cleanup interval (milliseconds)
 | 
			
		||||
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
 | 
			
		||||
 | 
			
		||||
# ============================================================================
 | 
			
		||||
# PERFORMANCE TUNING PRESETS
 | 
			
		||||
# ============================================================================
 | 
			
		||||
@ -245,21 +202,18 @@ AI_QUEUE_CLEANUP_INTERVAL_MS=300000
 | 
			
		||||
# 🚀 FOR FASTER RESPONSES (less comprehensive):
 | 
			
		||||
# AI_EMBEDDING_CANDIDATES=20
 | 
			
		||||
# AI_MAX_SELECTED_ITEMS=15  
 | 
			
		||||
# AI_MAX_TOOLS_TO_ANALYZE=10
 | 
			
		||||
# AI_MICRO_TASK_DELAY_MS=200
 | 
			
		||||
# AI_MAX_CONTEXT_TOKENS=2000
 | 
			
		||||
 | 
			
		||||
# 🎯 FOR BETTER QUALITY (more comprehensive):
 | 
			
		||||
# AI_EMBEDDING_CANDIDATES=60
 | 
			
		||||
# AI_MAX_SELECTED_ITEMS=40
 | 
			
		||||
# AI_MAX_TOOLS_TO_ANALYZE=30
 | 
			
		||||
# AI_MICRO_TASK_DELAY_MS=800
 | 
			
		||||
# AI_MAX_CONTEXT_TOKENS=4000
 | 
			
		||||
 | 
			
		||||
# 🔋 FOR LOW-POWER SYSTEMS (minimal resources):
 | 
			
		||||
# AI_EMBEDDING_CANDIDATES=15
 | 
			
		||||
# AI_MAX_SELECTED_ITEMS=10
 | 
			
		||||
# AI_MAX_TOOLS_TO_ANALYZE=8
 | 
			
		||||
# AI_RATE_LIMIT_MAX_REQUESTS=2
 | 
			
		||||
# AI_MICRO_TASK_DELAY_MS=1000
 | 
			
		||||
 | 
			
		||||
@ -285,7 +239,6 @@ AI_QUEUE_CLEANUP_INTERVAL_MS=300000
 | 
			
		||||
 | 
			
		||||
# 🔍 WITH FULL MONITORING:
 | 
			
		||||
# - Enable FORENSIC_AUDIT_ENABLED=true
 | 
			
		||||
# - Enable AI_PIPELINE_DEBUG=true
 | 
			
		||||
# - Configure audit retention and detail level
 | 
			
		||||
 | 
			
		||||
# ============================================================================
 | 
			
		||||
 | 
			
		||||
@ -1,126 +0,0 @@
 | 
			
		||||
// src/config/forensic.config.ts
 | 
			
		||||
// Centralized configuration for forensic RAG enhancements
 | 
			
		||||
 | 
			
		||||
export const FORENSIC_CONFIG = {
 | 
			
		||||
  audit: {
 | 
			
		||||
    enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
 | 
			
		||||
    detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as 'minimal' | 'standard' | 'verbose') || 'standard',
 | 
			
		||||
    retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10),
 | 
			
		||||
    maxEntriesPerRequest: parseInt(process.env.FORENSIC_AUDIT_MAX_ENTRIES || '50', 10)
 | 
			
		||||
  },
 | 
			
		||||
  confidence: {
 | 
			
		||||
    embeddingsWeight: parseFloat(process.env.CONFIDENCE_EMBEDDINGS_WEIGHT || '0.3'),
 | 
			
		||||
    consensusWeight: parseFloat(process.env.CONFIDENCE_CONSENSUS_WEIGHT || '0.25'),
 | 
			
		||||
    domainMatchWeight: parseFloat(process.env.CONFIDENCE_DOMAIN_MATCH_WEIGHT || '0.25'),
 | 
			
		||||
    freshnessWeight: parseFloat(process.env.CONFIDENCE_FRESHNESS_WEIGHT || '0.2'),
 | 
			
		||||
    minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
 | 
			
		||||
    highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10),
 | 
			
		||||
    mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10)
 | 
			
		||||
  },
 | 
			
		||||
  bias: {
 | 
			
		||||
    enabled: process.env.BIAS_DETECTION_ENABLED === 'true',
 | 
			
		||||
    popularityThreshold: parseFloat(process.env.BIAS_POPULARITY_THRESHOLD || '0.7'),
 | 
			
		||||
    diversityMinimum: parseFloat(process.env.BIAS_DIVERSITY_MINIMUM || '0.6'),
 | 
			
		||||
    domainMismatchThreshold: parseFloat(process.env.BIAS_DOMAIN_MISMATCH_THRESHOLD || '0.3'),
 | 
			
		||||
    warningThreshold: parseInt(process.env.BIAS_WARNING_THRESHOLD || '3', 10),
 | 
			
		||||
    celebrityTools: (process.env.BIAS_CELEBRITY_TOOLS || 'Volatility 3,Wireshark,Autopsy,Maltego').split(',').map(t => t.trim())
 | 
			
		||||
  },
 | 
			
		||||
  // Quality thresholds for various metrics
 | 
			
		||||
  quality: {
 | 
			
		||||
    minResponseLength: parseInt(process.env.QUALITY_MIN_RESPONSE_LENGTH || '50', 10),
 | 
			
		||||
    minSelectionCount: parseInt(process.env.QUALITY_MIN_SELECTION_COUNT || '1', 10),
 | 
			
		||||
    maxProcessingTime: parseInt(process.env.QUALITY_MAX_PROCESSING_TIME_MS || '30000', 10)
 | 
			
		||||
  },
 | 
			
		||||
  // Display preferences
 | 
			
		||||
  ui: {
 | 
			
		||||
    showAuditTrailByDefault: process.env.UI_SHOW_AUDIT_TRAIL_DEFAULT === 'true',
 | 
			
		||||
    showConfidenceScores: process.env.UI_SHOW_CONFIDENCE_SCORES !== 'false',
 | 
			
		||||
    showBiasWarnings: process.env.UI_SHOW_BIAS_WARNINGS !== 'false',
 | 
			
		||||
    auditTrailCollapsible: process.env.UI_AUDIT_TRAIL_COLLAPSIBLE !== 'false'
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Validation function to ensure configuration is valid
 | 
			
		||||
export function validateForensicConfig(): { valid: boolean; errors: string[] } {
 | 
			
		||||
  const errors: string[] = [];
 | 
			
		||||
  
 | 
			
		||||
  // Validate audit configuration
 | 
			
		||||
  if (FORENSIC_CONFIG.audit.retentionHours < 1 || FORENSIC_CONFIG.audit.retentionHours > 168) {
 | 
			
		||||
    errors.push('FORENSIC_AUDIT_RETENTION_HOURS must be between 1 and 168 (1 week)');
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if (!['minimal', 'standard', 'verbose'].includes(FORENSIC_CONFIG.audit.detailLevel)) {
 | 
			
		||||
    errors.push('FORENSIC_AUDIT_DETAIL_LEVEL must be one of: minimal, standard, verbose');
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // Validate confidence weights sum to approximately 1.0
 | 
			
		||||
  const weightSum = FORENSIC_CONFIG.confidence.embeddingsWeight + 
 | 
			
		||||
                    FORENSIC_CONFIG.confidence.consensusWeight + 
 | 
			
		||||
                    FORENSIC_CONFIG.confidence.domainMatchWeight + 
 | 
			
		||||
                    FORENSIC_CONFIG.confidence.freshnessWeight;
 | 
			
		||||
  
 | 
			
		||||
  if (Math.abs(weightSum - 1.0) > 0.05) {
 | 
			
		||||
    errors.push(`Confidence weights must sum to 1.0 (currently ${weightSum.toFixed(3)})`);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // Validate threshold ranges
 | 
			
		||||
  if (FORENSIC_CONFIG.confidence.minimumThreshold < 0 || FORENSIC_CONFIG.confidence.minimumThreshold > 100) {
 | 
			
		||||
    errors.push('CONFIDENCE_MINIMUM_THRESHOLD must be between 0 and 100');
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if (FORENSIC_CONFIG.confidence.highThreshold <= FORENSIC_CONFIG.confidence.mediumThreshold) {
 | 
			
		||||
    errors.push('CONFIDENCE_HIGH_THRESHOLD must be greater than CONFIDENCE_MEDIUM_THRESHOLD');
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // Validate bias thresholds
 | 
			
		||||
  if (FORENSIC_CONFIG.bias.popularityThreshold < 0 || FORENSIC_CONFIG.bias.popularityThreshold > 1) {
 | 
			
		||||
    errors.push('BIAS_POPULARITY_THRESHOLD must be between 0 and 1');
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if (FORENSIC_CONFIG.bias.diversityMinimum < 0 || FORENSIC_CONFIG.bias.diversityMinimum > 1) {
 | 
			
		||||
    errors.push('BIAS_DIVERSITY_MINIMUM must be between 0 and 1');
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return {
 | 
			
		||||
    valid: errors.length === 0,
 | 
			
		||||
    errors
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Helper functions for configuration access
 | 
			
		||||
export function isAuditEnabled(): boolean {
 | 
			
		||||
  return FORENSIC_CONFIG.audit.enabled;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function getAuditDetailLevel(): 'minimal' | 'standard' | 'verbose' {
 | 
			
		||||
  return FORENSIC_CONFIG.audit.detailLevel;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function getConfidenceThresholds() {
 | 
			
		||||
  return {
 | 
			
		||||
    minimum: FORENSIC_CONFIG.confidence.minimumThreshold,
 | 
			
		||||
    medium: FORENSIC_CONFIG.confidence.mediumThreshold,
 | 
			
		||||
    high: FORENSIC_CONFIG.confidence.highThreshold
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function isBiasDetectionEnabled(): boolean {
 | 
			
		||||
  return FORENSIC_CONFIG.bias.enabled;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Initialize and validate configuration on module load
 | 
			
		||||
const configValidation = validateForensicConfig();
 | 
			
		||||
if (!configValidation.valid) {
 | 
			
		||||
  console.warn('[FORENSIC CONFIG] Configuration validation failed:', configValidation.errors);
 | 
			
		||||
  // In development, we might want to throw an error
 | 
			
		||||
  if (process.env.NODE_ENV === 'development') {
 | 
			
		||||
    throw new Error(`Forensic configuration invalid: ${configValidation.errors.join(', ')}`);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
console.log('[FORENSIC CONFIG] Configuration loaded:', {
 | 
			
		||||
  auditEnabled: FORENSIC_CONFIG.audit.enabled,
 | 
			
		||||
  confidenceEnabled: true, // Always enabled
 | 
			
		||||
  biasDetectionEnabled: FORENSIC_CONFIG.bias.enabled,
 | 
			
		||||
  detailLevel: FORENSIC_CONFIG.audit.detailLevel
 | 
			
		||||
});
 | 
			
		||||
@ -78,17 +78,25 @@ class ImprovedMicroTaskAIPipeline {
 | 
			
		||||
  private similarityThreshold: number;
 | 
			
		||||
  private microTaskDelay: number;
 | 
			
		||||
  
 | 
			
		||||
  // NEW: Embedding selection limits (top N from pre-filtered candidates)
 | 
			
		||||
  private embeddingSelectionLimit: number;
 | 
			
		||||
  private embeddingConceptsLimit: number;
 | 
			
		||||
  
 | 
			
		||||
  // NEW: Embeddings efficiency thresholds
 | 
			
		||||
  private embeddingsMinTools: number;
 | 
			
		||||
  private embeddingsMaxReductionRatio: number;
 | 
			
		||||
  
 | 
			
		||||
  private maxContextTokens: number;
 | 
			
		||||
  private maxPromptTokens: number;
 | 
			
		||||
  
 | 
			
		||||
  // NEW: Audit Configuration
 | 
			
		||||
  // Audit Configuration
 | 
			
		||||
  private auditConfig: {
 | 
			
		||||
    enabled: boolean;
 | 
			
		||||
    detailLevel: 'minimal' | 'standard' | 'verbose';
 | 
			
		||||
    retentionHours: number;
 | 
			
		||||
  };
 | 
			
		||||
  
 | 
			
		||||
  // NEW: Temporary audit storage for pre-context operations
 | 
			
		||||
  // Temporary audit storage for pre-context operations
 | 
			
		||||
  private tempAuditEntries: AuditEntry[] = [];
 | 
			
		||||
 | 
			
		||||
  constructor() {
 | 
			
		||||
@ -98,20 +106,38 @@ class ImprovedMicroTaskAIPipeline {
 | 
			
		||||
      model: this.getEnv('AI_ANALYZER_MODEL')
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
 | 
			
		||||
    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '60', 10); 
 | 
			
		||||
    this.similarityThreshold = 0.3; 
 | 
			
		||||
    // Core pipeline configuration
 | 
			
		||||
    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '25', 10);
 | 
			
		||||
    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '50', 10); 
 | 
			
		||||
    this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
 | 
			
		||||
    this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
 | 
			
		||||
    
 | 
			
		||||
    // NEW: Embedding selection limits (top N from pre-filtered candidates)
 | 
			
		||||
    this.embeddingSelectionLimit = parseInt(process.env.AI_EMBEDDING_SELECTION_LIMIT || '30', 10);
 | 
			
		||||
    this.embeddingConceptsLimit = parseInt(process.env.AI_EMBEDDING_CONCEPTS_LIMIT || '15', 10);
 | 
			
		||||
    
 | 
			
		||||
    // NEW: Embeddings efficiency thresholds
 | 
			
		||||
    this.embeddingsMinTools = parseInt(process.env.AI_EMBEDDINGS_MIN_TOOLS || '8', 10);
 | 
			
		||||
    this.embeddingsMaxReductionRatio = parseFloat(process.env.AI_EMBEDDINGS_MAX_REDUCTION_RATIO || '0.75');
 | 
			
		||||
    
 | 
			
		||||
    // Context management
 | 
			
		||||
    this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
 | 
			
		||||
    this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
 | 
			
		||||
    
 | 
			
		||||
    // NEW: Initialize Audit Configuration
 | 
			
		||||
    // Audit configuration
 | 
			
		||||
    this.auditConfig = {
 | 
			
		||||
      enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
 | 
			
		||||
      detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as any) || 'standard',
 | 
			
		||||
      retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    // Log configuration for debugging
 | 
			
		||||
    console.log('[AI PIPELINE] Configuration loaded:', {
 | 
			
		||||
      embeddingCandidates: this.embeddingCandidates,
 | 
			
		||||
      embeddingSelection: `${this.embeddingSelectionLimit} tools, ${this.embeddingConceptsLimit} concepts`,
 | 
			
		||||
      embeddingsThresholds: `min ${this.embeddingsMinTools} tools, max ${this.embeddingsMaxReductionRatio * 100}% of total`,
 | 
			
		||||
      auditEnabled: this.auditConfig.enabled
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private getEnv(key: string): string {
 | 
			
		||||
@ -272,50 +298,49 @@ class ImprovedMicroTaskAIPipeline {
 | 
			
		||||
        userQuery, 
 | 
			
		||||
        this.embeddingCandidates, 
 | 
			
		||||
        this.similarityThreshold
 | 
			
		||||
      ) as SimilarityResult[]; // Type assertion for similarity property
 | 
			
		||||
      ) as SimilarityResult[];
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Embeddings found ${similarItems.length} similar items`);
 | 
			
		||||
      console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
 | 
			
		||||
      
 | 
			
		||||
      // FIXED: Create lookup maps for O(1) access while preserving original data
 | 
			
		||||
      // Create lookup maps for O(1) access while preserving original data
 | 
			
		||||
      const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
 | 
			
		||||
      const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
 | 
			
		||||
      
 | 
			
		||||
      // FIXED: Process in similarity order, preserving the ranking
 | 
			
		||||
      // Process in similarity order, preserving the ranking
 | 
			
		||||
      const similarTools = similarItems
 | 
			
		||||
        .filter((item): item is SimilarityResult => item.type === 'tool')
 | 
			
		||||
        .map(item => toolsMap.get(item.name))
 | 
			
		||||
        .filter((tool): tool is any => tool !== undefined); // Proper type guard
 | 
			
		||||
        .filter((tool): tool is any => tool !== undefined);
 | 
			
		||||
      
 | 
			
		||||
      const similarConcepts = similarItems
 | 
			
		||||
        .filter((item): item is SimilarityResult => item.type === 'concept')
 | 
			
		||||
        .map(item => conceptsMap.get(item.name))
 | 
			
		||||
        .filter((concept): concept is any => concept !== undefined); // Proper type guard
 | 
			
		||||
        .filter((concept): concept is any => concept !== undefined);
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
 | 
			
		||||
      console.log(`[AI PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
 | 
			
		||||
      
 | 
			
		||||
      // Log the first few tools to verify ordering is preserved
 | 
			
		||||
      if (similarTools.length > 0) {
 | 
			
		||||
        console.log(`[IMPROVED PIPELINE] Top similar tools (in similarity order):`);
 | 
			
		||||
        similarTools.slice(0, 5).forEach((tool, idx) => {
 | 
			
		||||
          const originalSimilarItem = similarItems.find(item => item.name === tool.name);
 | 
			
		||||
          console.log(`  ${idx + 1}. ${tool.name} (similarity: ${originalSimilarItem?.similarity?.toFixed(4) || 'N/A'})`);
 | 
			
		||||
        });
 | 
			
		||||
      }
 | 
			
		||||
      // FIXED: Better threshold logic - only use embeddings if we get meaningful filtering
 | 
			
		||||
      const totalAvailableTools = toolsData.tools.length;
 | 
			
		||||
      const reductionRatio = similarTools.length / totalAvailableTools;
 | 
			
		||||
      
 | 
			
		||||
      if (similarTools.length >= 15) {
 | 
			
		||||
      if (similarTools.length >= this.embeddingsMinTools && reductionRatio <= this.embeddingsMaxReductionRatio) {
 | 
			
		||||
        candidateTools = similarTools;
 | 
			
		||||
        candidateConcepts = similarConcepts;
 | 
			
		||||
        selectionMethod = 'embeddings_candidates';
 | 
			
		||||
        
 | 
			
		||||
        console.log(`[IMPROVED PIPELINE] Using embeddings candidates in similarity order: ${candidateTools.length} tools`);
 | 
			
		||||
        console.log(`[AI PIPELINE] Using embeddings filtering: ${totalAvailableTools} → ${similarTools.length} tools (${(reductionRatio * 100).toFixed(1)}% reduction)`);
 | 
			
		||||
      } else {
 | 
			
		||||
        console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${similarTools.length} < 15), using full dataset`);
 | 
			
		||||
        if (similarTools.length < this.embeddingsMinTools) {
 | 
			
		||||
          console.log(`[AI PIPELINE] Embeddings found too few tools (${similarTools.length} < ${this.embeddingsMinTools}), using full dataset`);
 | 
			
		||||
        } else {
 | 
			
		||||
          console.log(`[AI PIPELINE] Embeddings didn't filter enough (${(reductionRatio * 100).toFixed(1)}% > ${(this.embeddingsMaxReductionRatio * 100).toFixed(1)}%), using full dataset`);
 | 
			
		||||
        }
 | 
			
		||||
        candidateTools = toolsData.tools;
 | 
			
		||||
        candidateConcepts = toolsData.concepts;
 | 
			
		||||
        selectionMethod = 'full_dataset';
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // NEW: Add Audit Entry for Embeddings Search with ordering verification
 | 
			
		||||
      // Enhanced audit entry with reduction statistics
 | 
			
		||||
      if (this.auditConfig.enabled) {
 | 
			
		||||
        this.addAuditEntry(null, 'retrieval', 'embeddings-search', 
 | 
			
		||||
          { query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates }, 
 | 
			
		||||
@ -323,21 +348,29 @@ class ImprovedMicroTaskAIPipeline {
 | 
			
		||||
            candidatesFound: similarItems.length, 
 | 
			
		||||
            toolsInOrder: similarTools.slice(0, 3).map((t: any) => t.name),
 | 
			
		||||
            conceptsInOrder: similarConcepts.slice(0, 3).map((c: any) => c.name),
 | 
			
		||||
            orderingPreserved: true
 | 
			
		||||
            reductionRatio: reductionRatio,
 | 
			
		||||
            usingEmbeddings: selectionMethod === 'embeddings_candidates',
 | 
			
		||||
            totalAvailable: totalAvailableTools,
 | 
			
		||||
            filtered: similarTools.length
 | 
			
		||||
          },
 | 
			
		||||
          similarTools.length >= 15 ? 85 : 60,
 | 
			
		||||
          selectionMethod === 'embeddings_candidates' ? 85 : 60,
 | 
			
		||||
          embeddingsStart,
 | 
			
		||||
          { selectionMethod, embeddingsEnabled: true, orderingFixed: true }
 | 
			
		||||
          { 
 | 
			
		||||
            selectionMethod, 
 | 
			
		||||
            embeddingsEnabled: true, 
 | 
			
		||||
            reductionAchieved: selectionMethod === 'embeddings_candidates',
 | 
			
		||||
            tokenSavingsExpected: selectionMethod === 'embeddings_candidates'
 | 
			
		||||
          }
 | 
			
		||||
        );
 | 
			
		||||
      }
 | 
			
		||||
    } else {
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
 | 
			
		||||
      console.log(`[AI PIPELINE] Embeddings disabled, using full dataset`);
 | 
			
		||||
      candidateTools = toolsData.tools;
 | 
			
		||||
      candidateConcepts = toolsData.concepts;
 | 
			
		||||
      selectionMethod = 'full_dataset';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(`[IMPROVED PIPELINE] AI will analyze ${candidateTools.length} candidate tools (ordering preserved: ${selectionMethod === 'embeddings_candidates'})`);
 | 
			
		||||
    console.log(`[AI PIPELINE] AI will analyze ${candidateTools.length} candidate tools (method: ${selectionMethod})`);
 | 
			
		||||
    const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
 | 
			
		||||
    
 | 
			
		||||
    return {
 | 
			
		||||
@ -387,15 +420,37 @@ class ImprovedMicroTaskAIPipeline {
 | 
			
		||||
      related_software: concept.related_software || []
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    // Generate the German prompt with tool data
 | 
			
		||||
    // CORRECTED LOGIC: 
 | 
			
		||||
    let toolsToSend: any[];
 | 
			
		||||
    let conceptsToSend: any[];
 | 
			
		||||
    
 | 
			
		||||
    if (selectionMethod === 'embeddings_candidates') {
 | 
			
		||||
      // WITH EMBEDDINGS: Take top N from pre-filtered candidates
 | 
			
		||||
      toolsToSend = toolsWithFullData.slice(0, this.embeddingSelectionLimit);
 | 
			
		||||
      conceptsToSend = conceptsWithFullData.slice(0, this.embeddingConceptsLimit);
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[AI PIPELINE] Embeddings enabled: sending top ${toolsToSend.length} pre-filtered tools`);
 | 
			
		||||
    } else {
 | 
			
		||||
      // WITHOUT EMBEDDINGS: Send entire compressed database (original behavior)
 | 
			
		||||
      toolsToSend = toolsWithFullData; // ALL tools from database
 | 
			
		||||
      conceptsToSend = conceptsWithFullData; // ALL concepts from database
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[AI PIPELINE] Embeddings disabled: sending entire database (${toolsToSend.length} tools, ${conceptsToSend.length} concepts)`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Generate the German prompt with appropriately selected tool data
 | 
			
		||||
    const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.maxSelectedItems);
 | 
			
		||||
    const prompt = `${basePrompt}
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE TOOLS (mit vollständigen Daten):
 | 
			
		||||
${JSON.stringify(toolsWithFullData.slice(0, 30), null, 2)}
 | 
			
		||||
${JSON.stringify(toolsToSend, null, 2)}
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE KONZEPTE (mit vollständigen Daten):
 | 
			
		||||
${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
 | 
			
		||||
${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
			
		||||
 | 
			
		||||
    // Log token usage for monitoring
 | 
			
		||||
    const estimatedTokens = this.estimateTokens(prompt);
 | 
			
		||||
    console.log(`[AI PIPELINE] Method: ${selectionMethod}, Tools: ${toolsToSend.length}, Tokens: ~${estimatedTokens}`);
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      const response = await this.callAI(prompt, 2500);
 | 
			
		||||
@ -403,16 +458,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
 | 
			
		||||
      const result = this.safeParseJSON(response, null);
 | 
			
		||||
      
 | 
			
		||||
      if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
 | 
			
		||||
        console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
 | 
			
		||||
        console.error('[AI PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
 | 
			
		||||
        
 | 
			
		||||
        // NEW: Add Audit Entry for Failed Selection
 | 
			
		||||
        if (this.auditConfig.enabled) {
 | 
			
		||||
          this.addAuditEntry(null, 'selection', 'ai-tool-selection-failed',
 | 
			
		||||
            { candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
 | 
			
		||||
            { error: 'Invalid JSON structure', response: response.slice(0, 200) },
 | 
			
		||||
            10, // Very low confidence
 | 
			
		||||
            10,
 | 
			
		||||
            selectionStart,
 | 
			
		||||
            { aiModel: this.config.model, selectionMethod }
 | 
			
		||||
            { aiModel: this.config.model, selectionMethod, tokensSent: estimatedTokens, toolsSent: toolsToSend.length }
 | 
			
		||||
          );
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
@ -421,19 +475,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
 | 
			
		||||
 | 
			
		||||
      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
 | 
			
		||||
      if (totalSelected === 0) {
 | 
			
		||||
        console.error('[IMPROVED PIPELINE] AI selection returned no tools');
 | 
			
		||||
        console.error('[AI PIPELINE] AI selection returned no tools');
 | 
			
		||||
        throw new Error('AI selection returned empty selection');
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts`);
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] AI reasoning: ${result.reasoning}`);
 | 
			
		||||
      console.log(`[AI PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts from ${toolsToSend.length} candidates`);
 | 
			
		||||
 | 
			
		||||
      const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
 | 
			
		||||
      const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
 | 
			
		||||
      
 | 
			
		||||
      // NEW: Add Audit Entry for Successful Selection
 | 
			
		||||
      if (this.auditConfig.enabled) {
 | 
			
		||||
        const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
 | 
			
		||||
        
 | 
			
		||||
@ -443,11 +493,12 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
 | 
			
		||||
            selectedToolCount: result.selectedTools.length, 
 | 
			
		||||
            selectedConceptCount: result.selectedConcepts.length,
 | 
			
		||||
            reasoning: result.reasoning?.slice(0, 200) + '...',
 | 
			
		||||
            finalToolNames: selectedTools.map(t => t.name)
 | 
			
		||||
            finalToolNames: selectedTools.map(t => t.name),
 | 
			
		||||
            selectionEfficiency: `${toolsToSend.length} → ${result.selectedTools.length}`
 | 
			
		||||
          },
 | 
			
		||||
          confidence,
 | 
			
		||||
          selectionStart,
 | 
			
		||||
          { aiModel: this.config.model, selectionMethod, promptTokens: this.estimateTokens(prompt) }
 | 
			
		||||
          { aiModel: this.config.model, selectionMethod, promptTokens: estimatedTokens, toolsSent: toolsToSend.length }
 | 
			
		||||
        );
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
@ -457,69 +508,21 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[IMPROVED PIPELINE] AI selection failed:', error);
 | 
			
		||||
      console.error('[AI PIPELINE] AI selection failed:', error);
 | 
			
		||||
      
 | 
			
		||||
      // NEW: Add Audit Entry for Selection Error
 | 
			
		||||
      if (this.auditConfig.enabled) {
 | 
			
		||||
        this.addAuditEntry(null, 'selection', 'ai-tool-selection-error',
 | 
			
		||||
          { candidateCount: candidateTools.length, mode },
 | 
			
		||||
          { error: error.message },
 | 
			
		||||
          5, // Very low confidence
 | 
			
		||||
          5,
 | 
			
		||||
          selectionStart,
 | 
			
		||||
          { aiModel: this.config.model, selectionMethod }
 | 
			
		||||
          { aiModel: this.config.model, selectionMethod, tokensSent: estimatedTokens }
 | 
			
		||||
        );
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
 | 
			
		||||
      return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
 | 
			
		||||
      throw error;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
 | 
			
		||||
    const emergencyStart = Date.now();
 | 
			
		||||
    
 | 
			
		||||
    const queryLower = userQuery.toLowerCase();
 | 
			
		||||
    const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
 | 
			
		||||
    
 | 
			
		||||
    const scoredTools = candidateTools.map(tool => {
 | 
			
		||||
      const toolText = (
 | 
			
		||||
        tool.name + ' ' + 
 | 
			
		||||
        tool.description + ' ' + 
 | 
			
		||||
        (tool.tags || []).join(' ') + ' ' +
 | 
			
		||||
        (tool.platforms || []).join(' ') + ' ' +
 | 
			
		||||
        (tool.domains || []).join(' ')
 | 
			
		||||
      ).toLowerCase();
 | 
			
		||||
      
 | 
			
		||||
      const score = keywords.reduce((acc, keyword) => {
 | 
			
		||||
        return acc + (toolText.includes(keyword) ? 1 : 0);
 | 
			
		||||
      }, 0);
 | 
			
		||||
      
 | 
			
		||||
      return { tool, score };
 | 
			
		||||
    }).filter(item => item.score > 0)
 | 
			
		||||
      .sort((a, b) => b.score - a.score);
 | 
			
		||||
    
 | 
			
		||||
    const maxTools = mode === 'workflow' ? 20 : 8;
 | 
			
		||||
    const selectedTools = scoredTools.slice(0, maxTools).map(item => item.tool);
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
 | 
			
		||||
    
 | 
			
		||||
    // NEW: Add Audit Entry for Emergency Selection
 | 
			
		||||
    if (this.auditConfig.enabled) {
 | 
			
		||||
      this.addAuditEntry(null, 'selection', 'emergency-keyword-selection',
 | 
			
		||||
        { keywords: keywords.slice(0, 10), candidateCount: candidateTools.length },
 | 
			
		||||
        { selectedCount: selectedTools.length, topScores: scoredTools.slice(0, 5).map(s => ({ name: s.tool.name, score: s.score })) },
 | 
			
		||||
        40, // Moderate confidence for emergency selection
 | 
			
		||||
        emergencyStart,
 | 
			
		||||
        { selectionMethod: 'emergency_keyword' }
 | 
			
		||||
      );
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return {
 | 
			
		||||
      selectedTools,
 | 
			
		||||
      selectedConcepts: candidateConcepts.slice(0, 3)
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async delay(ms: number): Promise<void> {
 | 
			
		||||
    return new Promise(resolve => setTimeout(resolve, ms));
 | 
			
		||||
  }
 | 
			
		||||
@ -826,7 +829,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
 | 
			
		||||
    // NEW: Clear any previous temporary audit entries
 | 
			
		||||
    this.tempAuditEntries = [];
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
 | 
			
		||||
    console.log(`[AI PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      // Stage 1: Get intelligent candidates (embeddings + AI selection)
 | 
			
		||||
@ -848,7 +851,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
 | 
			
		||||
      // NEW: Merge any temporary audit entries from pre-context operations
 | 
			
		||||
      this.mergeTemporaryAuditEntries(context);
 | 
			
		||||
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
 | 
			
		||||
      console.log(`[AI PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
 | 
			
		||||
 | 
			
		||||
      // NEW: Add initial audit entry
 | 
			
		||||
      this.addAuditEntry(context, 'initialization', 'pipeline-start',
 | 
			
		||||
@ -925,9 +928,9 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
 | 
			
		||||
        contextContinuityUsed: true
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
 | 
			
		||||
      console.log(`[AI PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
 | 
			
		||||
      console.log(`[AI PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
 | 
			
		||||
      console.log(`[AI PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
 | 
			
		||||
 | 
			
		||||
      return {
 | 
			
		||||
        recommendation: {
 | 
			
		||||
@ -939,7 +942,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[IMPROVED PIPELINE] Processing failed:', error);
 | 
			
		||||
      console.error('[AI PIPELINE] Processing failed:', error);
 | 
			
		||||
      
 | 
			
		||||
      // NEW: Ensure temp audit entries are cleared even on error
 | 
			
		||||
      this.tempAuditEntries = [];
 | 
			
		||||
 | 
			
		||||
@ -77,33 +77,8 @@ interface EnhancedCompressedToolsData {
 | 
			
		||||
  domains: any[];
 | 
			
		||||
  phases: any[];
 | 
			
		||||
  'domain-agnostic-software': any[];
 | 
			
		||||
  scenarios?: any[]; // Optional for AI processing
 | 
			
		||||
  scenarios?: any[];
 | 
			
		||||
  skill_levels: any;
 | 
			
		||||
  // Enhanced context for micro-tasks
 | 
			
		||||
  domain_relationships: DomainRelationship[];
 | 
			
		||||
  phase_dependencies: PhaseDependency[];
 | 
			
		||||
  tool_compatibility_matrix: CompatibilityMatrix[];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface DomainRelationship {
 | 
			
		||||
  domain_id: string;
 | 
			
		||||
  tool_count: number;
 | 
			
		||||
  common_tags: string[];
 | 
			
		||||
  skill_distribution: Record<string, number>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface PhaseDependency {
 | 
			
		||||
  phase_id: string;
 | 
			
		||||
  order: number;
 | 
			
		||||
  depends_on: string | null;
 | 
			
		||||
  enables: string | null;
 | 
			
		||||
  is_parallel_capable: boolean;
 | 
			
		||||
  typical_duration: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface CompatibilityMatrix {
 | 
			
		||||
  type: string;
 | 
			
		||||
  groups: Record<string, string[]>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let cachedData: ToolsData | null = null;
 | 
			
		||||
@ -146,104 +121,6 @@ function generateDataVersion(data: any): string {
 | 
			
		||||
  return Math.abs(hash).toString(36);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Enhanced: Generate domain relationships for better AI understanding
 | 
			
		||||
function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
 | 
			
		||||
  const relationships: DomainRelationship[] = [];
 | 
			
		||||
  
 | 
			
		||||
  for (const domain of domains) {
 | 
			
		||||
    const domainTools = tools.filter(tool => 
 | 
			
		||||
      tool.domains && tool.domains.includes(domain.id)
 | 
			
		||||
    );
 | 
			
		||||
    
 | 
			
		||||
    const commonTags = domainTools
 | 
			
		||||
      .flatMap(tool => tool.tags || [])
 | 
			
		||||
      .reduce((acc: any, tag: string) => {
 | 
			
		||||
        acc[tag] = (acc[tag] || 0) + 1;
 | 
			
		||||
        return acc;
 | 
			
		||||
      }, {});
 | 
			
		||||
      
 | 
			
		||||
    const topTags = Object.entries(commonTags)
 | 
			
		||||
      .sort(([,a], [,b]) => (b as number) - (a as number))
 | 
			
		||||
      .slice(0, 5)
 | 
			
		||||
      .map(([tag]) => tag);
 | 
			
		||||
    
 | 
			
		||||
    relationships.push({
 | 
			
		||||
      domain_id: domain.id,
 | 
			
		||||
      tool_count: domainTools.length,
 | 
			
		||||
      common_tags: topTags,
 | 
			
		||||
      skill_distribution: domainTools.reduce((acc: any, tool: any) => {
 | 
			
		||||
        acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
 | 
			
		||||
        return acc;
 | 
			
		||||
      }, {})
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return relationships;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Enhanced: Generate phase dependencies
 | 
			
		||||
function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
 | 
			
		||||
  const dependencies: PhaseDependency[] = [];
 | 
			
		||||
  
 | 
			
		||||
  for (let i = 0; i < phases.length; i++) {
 | 
			
		||||
    const phase = phases[i];
 | 
			
		||||
    const nextPhase = phases[i + 1];
 | 
			
		||||
    const prevPhase = phases[i - 1];
 | 
			
		||||
    
 | 
			
		||||
    dependencies.push({
 | 
			
		||||
      phase_id: phase.id,
 | 
			
		||||
      order: i + 1,
 | 
			
		||||
      depends_on: prevPhase?.id || null,
 | 
			
		||||
      enables: nextPhase?.id || null,
 | 
			
		||||
      is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
 | 
			
		||||
      typical_duration: phase.id === 'data-collection' ? 'hours-days' :
 | 
			
		||||
                       phase.id === 'examination' ? 'hours-weeks' :
 | 
			
		||||
                       phase.id === 'analysis' ? 'days-weeks' :
 | 
			
		||||
                       'hours-days'
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return dependencies;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Enhanced: Generate tool compatibility matrix
 | 
			
		||||
function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
 | 
			
		||||
  const matrix: CompatibilityMatrix[] = [];
 | 
			
		||||
  
 | 
			
		||||
  // Group tools by common characteristics
 | 
			
		||||
  const platformGroups = tools.reduce((acc: any, tool: any) => {
 | 
			
		||||
    if (tool.platforms) {
 | 
			
		||||
      tool.platforms.forEach((platform: string) => {
 | 
			
		||||
        if (!acc[platform]) acc[platform] = [];
 | 
			
		||||
        acc[platform].push(tool.name);
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
    return acc;
 | 
			
		||||
  }, {});
 | 
			
		||||
  
 | 
			
		||||
  const phaseGroups = tools.reduce((acc: any, tool: any) => {
 | 
			
		||||
    if (tool.phases) {
 | 
			
		||||
      tool.phases.forEach((phase: string) => {
 | 
			
		||||
        if (!acc[phase]) acc[phase] = [];
 | 
			
		||||
        acc[phase].push(tool.name);
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
    return acc;
 | 
			
		||||
  }, {});
 | 
			
		||||
  
 | 
			
		||||
  matrix.push({
 | 
			
		||||
    type: 'platform_compatibility',
 | 
			
		||||
    groups: platformGroups
 | 
			
		||||
  });
 | 
			
		||||
  
 | 
			
		||||
  matrix.push({
 | 
			
		||||
    type: 'phase_synergy',
 | 
			
		||||
    groups: phaseGroups
 | 
			
		||||
  });
 | 
			
		||||
  
 | 
			
		||||
  return matrix;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function loadRawData(): Promise<ToolsData> {
 | 
			
		||||
  if (!cachedData) {
 | 
			
		||||
    const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
 | 
			
		||||
@ -337,27 +214,16 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
 | 
			
		||||
        };
 | 
			
		||||
      });
 | 
			
		||||
    
 | 
			
		||||
    // Enhanced: Add rich context data
 | 
			
		||||
    const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
 | 
			
		||||
    const phaseDependencies = generatePhaseDependencies(data.phases);
 | 
			
		||||
    const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
 | 
			
		||||
    
 | 
			
		||||
    cachedCompressedData = {
 | 
			
		||||
      tools: compressedTools,
 | 
			
		||||
      concepts: concepts,
 | 
			
		||||
      domains: data.domains,
 | 
			
		||||
      phases: data.phases,
 | 
			
		||||
      'domain-agnostic-software': data['domain-agnostic-software'],
 | 
			
		||||
      scenarios: data.scenarios, // Include scenarios for context
 | 
			
		||||
      scenarios: data.scenarios,
 | 
			
		||||
      skill_levels: data.skill_levels || {},
 | 
			
		||||
      // Enhanced context for micro-tasks
 | 
			
		||||
      domain_relationships: domainRelationships,
 | 
			
		||||
      phase_dependencies: phaseDependencies,
 | 
			
		||||
      tool_compatibility_matrix: toolCompatibilityMatrix
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
 | 
			
		||||
    console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return cachedCompressedData;
 | 
			
		||||
 | 
			
		||||
@ -157,15 +157,6 @@ class RateLimitedQueue {
 | 
			
		||||
    return status;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  setDelay(ms: number): void {
 | 
			
		||||
    if (!Number.isFinite(ms) || ms < 0) return;
 | 
			
		||||
    this.delayMs = ms;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  getDelay(): number {
 | 
			
		||||
    return this.delayMs;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async processQueue(): Promise<void> {
 | 
			
		||||
    if (this.isProcessing) {
 | 
			
		||||
      return;
 | 
			
		||||
 | 
			
		||||
@ -1,8 +1,3 @@
 | 
			
		||||
/**
 | 
			
		||||
 * CONSOLIDATED Tool utility functions for consistent tool operations across the app
 | 
			
		||||
 * Works in both server (Node.js) and client (browser) environments
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
export interface Tool {
 | 
			
		||||
  name: string;
 | 
			
		||||
  type?: 'software' | 'method' | 'concept';
 | 
			
		||||
@ -18,10 +13,6 @@ export interface Tool {
 | 
			
		||||
  related_concepts?: string[];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Creates a URL-safe slug from a tool name
 | 
			
		||||
 * Used for URLs, IDs, and file names consistently across the app
 | 
			
		||||
 */
 | 
			
		||||
export function createToolSlug(toolName: string): string {
 | 
			
		||||
  if (!toolName || typeof toolName !== 'string') {
 | 
			
		||||
    console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
 | 
			
		||||
@ -35,9 +26,6 @@ export function createToolSlug(toolName: string): string {
 | 
			
		||||
    .replace(/^-|-$/g, '');           // Remove leading/trailing hyphens
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Finds a tool by name or slug from tools array
 | 
			
		||||
 */
 | 
			
		||||
export function findToolByIdentifier(tools: Tool[], identifier: string): Tool | undefined {
 | 
			
		||||
  if (!identifier || !Array.isArray(tools)) return undefined;
 | 
			
		||||
  
 | 
			
		||||
@ -47,23 +35,9 @@ export function findToolByIdentifier(tools: Tool[], identifier: string): Tool |
 | 
			
		||||
  );
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Checks if tool has a valid project URL (hosted on CC24 server)
 | 
			
		||||
 */
 | 
			
		||||
export function isToolHosted(tool: Tool): boolean {
 | 
			
		||||
  return tool.projectUrl !== undefined && 
 | 
			
		||||
         tool.projectUrl !== null && 
 | 
			
		||||
         tool.projectUrl !== "" && 
 | 
			
		||||
         tool.projectUrl.trim() !== "";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Determines tool category for styling/logic
 | 
			
		||||
 */
 | 
			
		||||
export function getToolCategory(tool: Tool): 'concept' | 'method' | 'hosted' | 'oss' | 'proprietary' {
 | 
			
		||||
  if (tool.type === 'concept') return 'concept';
 | 
			
		||||
  if (tool.type === 'method') return 'method';
 | 
			
		||||
  if (isToolHosted(tool)) return 'hosted';
 | 
			
		||||
  if (tool.license && tool.license !== 'Proprietary') return 'oss';
 | 
			
		||||
  return 'proprietary';
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user