fine-tuning of confidence
This commit is contained in:
		
							parent
							
								
									183e36b86d
								
							
						
					
					
						commit
						27e64f05ca
					
				
							
								
								
									
										13
									
								
								.env.example
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								.env.example
									
									
									
									
									
								
							@ -190,23 +190,20 @@ FORENSIC_AUDIT_RETENTION_HOURS=24
 | 
				
			|||||||
FORENSIC_AUDIT_MAX_ENTRIES=50
 | 
					FORENSIC_AUDIT_MAX_ENTRIES=50
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# ============================================================================
 | 
					# ============================================================================
 | 
				
			||||||
# 10. CONFIDENCE SCORING SYSTEM (Enhancement 2)
 | 
					# 10. ENHANCED CONFIDENCE SCORING SYSTEM
 | 
				
			||||||
# ============================================================================
 | 
					# ============================================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Confidence component weights (must sum to 1.0)
 | 
					# Confidence component weights (must sum to 1.0)
 | 
				
			||||||
CONFIDENCE_EMBEDDINGS_WEIGHT=0.3      # Weight for vector similarity quality
 | 
					CONFIDENCE_SEMANTIC_WEIGHT=0.25        # Weight for vector similarity quality  
 | 
				
			||||||
CONFIDENCE_CONSENSUS_WEIGHT=0.25      # Weight for micro-task agreement  
 | 
					CONFIDENCE_SUITABILITY_WEIGHT=0.4      # Weight for AI-determined task fitness
 | 
				
			||||||
CONFIDENCE_DOMAIN_MATCH_WEIGHT=0.25   # Weight for domain alignment
 | 
					CONFIDENCE_CONSISTENCY_WEIGHT=0.2      # Weight for cross-validation agreement  
 | 
				
			||||||
CONFIDENCE_FRESHNESS_WEIGHT=0.2       # Weight for tool freshness/maintenance
 | 
					CONFIDENCE_RELIABILITY_WEIGHT=0.15     # Weight for tool quality indicators
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Confidence thresholds (0-100)
 | 
					# Confidence thresholds (0-100)
 | 
				
			||||||
CONFIDENCE_MINIMUM_THRESHOLD=40        # Below this = weak recommendation
 | 
					CONFIDENCE_MINIMUM_THRESHOLD=40        # Below this = weak recommendation
 | 
				
			||||||
CONFIDENCE_MEDIUM_THRESHOLD=60         # 40-59 = weak, 60-79 = moderate  
 | 
					CONFIDENCE_MEDIUM_THRESHOLD=60         # 40-59 = weak, 60-79 = moderate  
 | 
				
			||||||
CONFIDENCE_HIGH_THRESHOLD=80           # 80+ = strong recommendation
 | 
					CONFIDENCE_HIGH_THRESHOLD=80           # 80+ = strong recommendation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Domain keywords for confidence scoring (domain:keyword1,keyword2|domain:keyword3,keyword4)
 | 
					 | 
				
			||||||
CONFIDENCE_DOMAIN_KEYWORDS="incident-response:incident,breach,attack,compromise,response|malware-analysis:malware,virus,trojan,reverse,analysis|network-forensics:network,traffic,packet,pcap,wireshark|mobile-forensics:mobile,android,ios,phone,app|cloud-forensics:cloud,aws,azure,saas,paas"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# ============================================================================
 | 
					# ============================================================================
 | 
				
			||||||
# PERFORMANCE TUNING PRESETS
 | 
					# PERFORMANCE TUNING PRESETS
 | 
				
			||||||
# ============================================================================
 | 
					# ============================================================================
 | 
				
			||||||
 | 
				
			|||||||
@ -785,41 +785,41 @@ class AIQueryInterface {
 | 
				
			|||||||
          <div style="display: grid; grid-template-columns: 1fr; gap: 0.625rem; margin-bottom: 0.75rem;">
 | 
					          <div style="display: grid; grid-template-columns: 1fr; gap: 0.625rem; margin-bottom: 0.75rem;">
 | 
				
			||||||
            <div style="background: var(--color-bg-secondary); padding: 0.5rem; border-radius: 0.375rem; border-left: 3px solid var(--color-accent);">
 | 
					            <div style="background: var(--color-bg-secondary); padding: 0.5rem; border-radius: 0.375rem; border-left: 3px solid var(--color-accent);">
 | 
				
			||||||
              <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.25rem;">
 | 
					              <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.25rem;">
 | 
				
			||||||
                <span style="font-weight: 600; font-size: 0.6875rem;">🔍 Ähnlichkeit zur Anfrage</span>
 | 
					                <span style="font-weight: 600; font-size: 0.6875rem;">🔍 Semantische Relevanz</span>
 | 
				
			||||||
                <strong style="color: var(--color-accent);">${confidence.embeddingsQuality}%</strong>
 | 
					                <strong style="color: var(--color-accent);">${confidence.semanticRelevance}%</strong>
 | 
				
			||||||
              </div>
 | 
					              </div>
 | 
				
			||||||
              <div style="font-size: 0.625rem; color: var(--color-text-secondary); line-height: 1.3;">
 | 
					              <div style="font-size: 0.625rem; color: var(--color-text-secondary); line-height: 1.3;">
 | 
				
			||||||
                Wie gut die Tool-Beschreibung zu Ihrer Suchanfrage passt (basierend auf Vektor-Ähnlichkeit)
 | 
					                Wie gut die Tool-Beschreibung semantisch zu Ihrer Anfrage passt (basierend auf Vektor-Ähnlichkeit)
 | 
				
			||||||
              </div>
 | 
					              </div>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            <div style="background: var(--color-bg-secondary); padding: 0.5rem; border-radius: 0.375rem; border-left: 3px solid var(--color-primary);">
 | 
					            <div style="background: var(--color-bg-secondary); padding: 0.5rem; border-radius: 0.375rem; border-left: 3px solid var(--color-primary);">
 | 
				
			||||||
              <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.25rem;">
 | 
					              <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.25rem;">
 | 
				
			||||||
                <span style="font-weight: 600; font-size: 0.6875rem;">🎯 Domain-Passung</span>
 | 
					                <span style="font-weight: 600; font-size: 0.6875rem;">🎯 Aufgaben-Eignung</span>
 | 
				
			||||||
                <strong style="color: var(--color-primary);">${confidence.domainAlignment}%</strong>
 | 
					                <strong style="color: var(--color-primary);">${confidence.taskSuitability}%</strong>
 | 
				
			||||||
              </div>
 | 
					              </div>
 | 
				
			||||||
              <div style="font-size: 0.625rem; color: var(--color-text-secondary); line-height: 1.3;">
 | 
					              <div style="font-size: 0.625rem; color: var(--color-text-secondary); line-height: 1.3;">
 | 
				
			||||||
                Wie gut das Tool-Einsatzgebiet zu Ihrem forensischen Szenario passt
 | 
					                KI-bewertete Eignung des Tools für Ihre spezifische forensische Aufgabenstellung
 | 
				
			||||||
              </div>
 | 
					              </div>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            <div style="background: var(--color-bg-secondary); padding: 0.5rem; border-radius: 0.375rem; border-left: 3px solid var(--color-warning);">
 | 
					            <div style="background: var(--color-bg-secondary); padding: 0.5rem; border-radius: 0.375rem; border-left: 3px solid var(--color-warning);">
 | 
				
			||||||
              <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.25rem;">
 | 
					              <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.25rem;">
 | 
				
			||||||
                <span style="font-weight: 600; font-size: 0.6875rem;">🤝 KI-Konsens</span>
 | 
					                <span style="font-weight: 600; font-size: 0.6875rem;">🤝 Methodische Konsistenz</span>
 | 
				
			||||||
                <strong style="color: var(--color-warning);">${confidence.consensus}%</strong>
 | 
					                <strong style="color: var(--color-warning);">${confidence.methodologicalConsistency}%</strong>
 | 
				
			||||||
              </div>
 | 
					              </div>
 | 
				
			||||||
              <div style="font-size: 0.625rem; color: var(--color-text-secondary); line-height: 1.3;">
 | 
					              <div style="font-size: 0.625rem; color: var(--color-text-secondary); line-height: 1.3;">
 | 
				
			||||||
                Wie einig sich die verschiedenen KI-Analyseschritte über dieses Tool sind
 | 
					                Wie einheitlich verschiedene Analyseschritte dieses Tool bewerten (Kreuzvalidierung)
 | 
				
			||||||
              </div>
 | 
					              </div>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
            
 | 
					            
 | 
				
			||||||
            <div style="background: var(--color-bg-secondary); padding: 0.5rem; border-radius: 0.375rem; border-left: 3px solid var(--color-text-secondary);">
 | 
					            <div style="background: var(--color-bg-secondary); padding: 0.5rem; border-radius: 0.375rem; border-left: 3px solid var(--color-text-secondary);">
 | 
				
			||||||
              <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.25rem;">
 | 
					              <div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 0.25rem;">
 | 
				
			||||||
                <span style="font-weight: 600; font-size: 0.6875rem;">🔄 Aktualität</span>
 | 
					                <span style="font-weight: 600; font-size: 0.6875rem;">🔧 Tool-Zuverlässigkeit</span>
 | 
				
			||||||
                <strong style="color: var(--color-text);">${confidence.freshness}%</strong>
 | 
					                <strong style="color: var(--color-text);">${confidence.toolReliability}%</strong>  
 | 
				
			||||||
              </div>
 | 
					              </div>
 | 
				
			||||||
              <div style="font-size: 0.625rem; color: var(--color-text-secondary); line-height: 1.3;">
 | 
					              <div style="font-size: 0.625rem; color: var(--color-text-secondary); line-height: 1.3;">
 | 
				
			||||||
                Wie aktuell und gut gepflegt das Tool ist (basierend auf Hosting-Status, Knowledge Base, Open Source)
 | 
					                Qualitätsindikatoren: Dokumentation, Wartung, Verfügbarkeit und Benutzerfreundlichkeit
 | 
				
			||||||
              </div>
 | 
					              </div>
 | 
				
			||||||
            </div>
 | 
					            </div>
 | 
				
			||||||
          </div>
 | 
					          </div>
 | 
				
			||||||
@ -827,7 +827,7 @@ class AIQueryInterface {
 | 
				
			|||||||
          ${confidence.strengthIndicators && confidence.strengthIndicators.length > 0 ? `
 | 
					          ${confidence.strengthIndicators && confidence.strengthIndicators.length > 0 ? `
 | 
				
			||||||
            <div style="margin-bottom: 0.75rem; padding: 0.5rem; background: var(--color-oss-bg); border-radius: 0.375rem; border-left: 3px solid var(--color-accent);">
 | 
					            <div style="margin-bottom: 0.75rem; padding: 0.5rem; background: var(--color-oss-bg); border-radius: 0.375rem; border-left: 3px solid var(--color-accent);">
 | 
				
			||||||
              <strong style="color: var(--color-accent); font-size: 0.6875rem; display: flex; align-items: center; gap: 0.25rem; margin-bottom: 0.375rem;">
 | 
					              <strong style="color: var(--color-accent); font-size: 0.6875rem; display: flex; align-items: center; gap: 0.25rem; margin-bottom: 0.375rem;">
 | 
				
			||||||
                <span>✓</span> Was für dieses Tool spricht:
 | 
					                <span>✓</span> Stärken dieser Empfehlung:
 | 
				
			||||||
              </strong>
 | 
					              </strong>
 | 
				
			||||||
              <ul style="margin: 0; padding-left: 1rem; font-size: 0.625rem; line-height: 1.4;">
 | 
					              <ul style="margin: 0; padding-left: 1rem; font-size: 0.625rem; line-height: 1.4;">
 | 
				
			||||||
                ${confidence.strengthIndicators.slice(0, 3).map(s => `<li style="margin-bottom: 0.25rem;">${this.sanitizeText(s)}</li>`).join('')}
 | 
					                ${confidence.strengthIndicators.slice(0, 3).map(s => `<li style="margin-bottom: 0.25rem;">${this.sanitizeText(s)}</li>`).join('')}
 | 
				
			||||||
@ -838,7 +838,7 @@ class AIQueryInterface {
 | 
				
			|||||||
          ${confidence.uncertaintyFactors && confidence.uncertaintyFactors.length > 0 ? `
 | 
					          ${confidence.uncertaintyFactors && confidence.uncertaintyFactors.length > 0 ? `
 | 
				
			||||||
            <div style="padding: 0.5rem; background: var(--color-hosted-bg); border-radius: 0.375rem; border-left: 3px solid var(--color-warning);">
 | 
					            <div style="padding: 0.5rem; background: var(--color-hosted-bg); border-radius: 0.375rem; border-left: 3px solid var(--color-warning);">
 | 
				
			||||||
              <strong style="color: var(--color-warning); font-size: 0.6875rem; display: flex; align-items: center; gap: 0.25rem; margin-bottom: 0.375rem;">
 | 
					              <strong style="color: var(--color-warning); font-size: 0.6875rem; display: flex; align-items: center; gap: 0.25rem; margin-bottom: 0.375rem;">
 | 
				
			||||||
                <span>⚠</span> Unsicherheitsfaktoren:
 | 
					                <span>⚠</span> Mögliche Einschränkungen:
 | 
				
			||||||
              </strong>
 | 
					              </strong>
 | 
				
			||||||
              <ul style="margin: 0; padding-left: 1rem; font-size: 0.625rem; line-height: 1.4;">
 | 
					              <ul style="margin: 0; padding-left: 1rem; font-size: 0.625rem; line-height: 1.4;">
 | 
				
			||||||
                ${confidence.uncertaintyFactors.slice(0, 3).map(f => `<li style="margin-bottom: 0.25rem;">${this.sanitizeText(f)}</li>`).join('')}
 | 
					                ${confidence.uncertaintyFactors.slice(0, 3).map(f => `<li style="margin-bottom: 0.25rem;">${this.sanitizeText(f)}</li>`).join('')}
 | 
				
			||||||
@ -847,7 +847,7 @@ class AIQueryInterface {
 | 
				
			|||||||
          ` : ''}
 | 
					          ` : ''}
 | 
				
			||||||
          
 | 
					          
 | 
				
			||||||
          <div style="margin-top: 0.75rem; padding-top: 0.75rem; border-top: 1px solid var(--color-border); font-size: 0.625rem; color: var(--color-text-secondary); text-align: center;">
 | 
					          <div style="margin-top: 0.75rem; padding-top: 0.75rem; border-top: 1px solid var(--color-border); font-size: 0.625rem; color: var(--color-text-secondary); text-align: center;">
 | 
				
			||||||
            Vertrauensscore basiert auf KI-Analyse • Forensisch validiert
 | 
					            Mehrstufige KI-Analyse mit Kreuzvalidierung
 | 
				
			||||||
          </div>
 | 
					          </div>
 | 
				
			||||||
        </div>
 | 
					        </div>
 | 
				
			||||||
      </span>
 | 
					      </span>
 | 
				
			||||||
 | 
				
			|||||||
@ -147,7 +147,7 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  // Tool evaluation prompt
 | 
					  // Tool evaluation prompt
 | 
				
			||||||
  toolEvaluation: (userQuery: string, tool: any, rank: number) => {
 | 
					  toolEvaluation: (userQuery: string, tool: any, rank: number) => {
 | 
				
			||||||
    return `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem nach forensischen Qualitätskriterien.
 | 
					  return `Sie sind ein DFIR-Experte und bewerten ein forensisches Tool für eine spezifische Aufgabe.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PROBLEM: "${userQuery}"
 | 
					PROBLEM: "${userQuery}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -155,16 +155,26 @@ TOOL: ${tool.name}
 | 
				
			|||||||
BESCHREIBUNG: ${tool.description}
 | 
					BESCHREIBUNG: ${tool.description}
 | 
				
			||||||
PLATTFORMEN: ${tool.platforms?.join(', ') || 'N/A'}
 | 
					PLATTFORMEN: ${tool.platforms?.join(', ') || 'N/A'}
 | 
				
			||||||
SKILL LEVEL: ${tool.skillLevel}
 | 
					SKILL LEVEL: ${tool.skillLevel}
 | 
				
			||||||
 | 
					DOMAINS: ${tool.domains?.join(', ') || 'N/A'}
 | 
				
			||||||
 | 
					TAGS: ${tool.tags?.join(', ') || 'N/A'}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
					Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
  "suitability_score": "high|medium|low",
 | 
					  "suitability_score": "high|medium|low",
 | 
				
			||||||
 | 
					  "task_relevance": 85,
 | 
				
			||||||
  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
 | 
					  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
 | 
				
			||||||
  "implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
 | 
					  "implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
 | 
				
			||||||
  "pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
 | 
					  "pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
 | 
				
			||||||
  "cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
 | 
					  "cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
 | 
				
			||||||
 | 
					  "limitations": ["Spezifische Einschränkung 1", "Mögliche Problematik 2"],
 | 
				
			||||||
  "alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
 | 
					  "alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
 | 
				
			||||||
}`;
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WICHTIG:
 | 
				
			||||||
 | 
					- task_relevance: Numerischer Wert 0-100 wie gut das Tool für DIESE SPEZIFISCHE Aufgabe geeignet ist
 | 
				
			||||||
 | 
					- limitations: Konkrete Einschränkungen oder Situationen wo das Tool NICHT optimal wäre
 | 
				
			||||||
 | 
					- Berücksichtigen Sie den Skill Level vs. Anfrage-Komplexität
 | 
				
			||||||
 | 
					- Bewerten Sie objektiv, nicht beschönigend`;
 | 
				
			||||||
  },
 | 
					  },
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Background knowledge selection prompt
 | 
					  // Background knowledge selection prompt
 | 
				
			||||||
@ -191,7 +201,7 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
				
			|||||||
  // Final recommendations prompt
 | 
					  // Final recommendations prompt
 | 
				
			||||||
  finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => {
 | 
					  finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => {
 | 
				
			||||||
    const prompt = isWorkflow ? 
 | 
					    const prompt = isWorkflow ? 
 | 
				
			||||||
      `Erstellen Sie eine forensisch fundierte Workflow-Empfehlung basierend auf DFIR-Prinzipien.
 | 
					      `Erstellen Sie eine Workflow-Empfehlung basierend auf DFIR-Prinzipien.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SZENARIO: "${userQuery}"
 | 
					SZENARIO: "${userQuery}"
 | 
				
			||||||
AUSGEWÄHLTE TOOLS: ${selectedToolNames.join(', ') || 'Keine Tools ausgewählt'}
 | 
					AUSGEWÄHLTE TOOLS: ${selectedToolNames.join(', ') || 'Keine Tools ausgewählt'}
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,7 @@
 | 
				
			|||||||
// src/utils/aiPipeline.ts - Enhanced with Audit Trail System
 | 
					// src/utils/aiPipeline.ts - Enhanced with Proper Confidence Scoring
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import { getCompressedToolsDataForAI } from './dataService.js';
 | 
					import { getCompressedToolsDataForAI } from './dataService.js';
 | 
				
			||||||
import { embeddingsService, type EmbeddingData } from './embeddings.js';
 | 
					import { embeddingsService, type EmbeddingData, type SimilarityResult } from './embeddings.js';
 | 
				
			||||||
import { AI_PROMPTS, getPrompt } from '../config/prompts.js';
 | 
					import { AI_PROMPTS, getPrompt } from '../config/prompts.js';
 | 
				
			||||||
import { isToolHosted } from './toolHelpers.js';
 | 
					import { isToolHosted } from './toolHelpers.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -34,11 +34,11 @@ interface AnalysisResult {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
interface AuditEntry {
 | 
					interface AuditEntry {
 | 
				
			||||||
  timestamp: number;
 | 
					  timestamp: number;
 | 
				
			||||||
  phase: string;           // 'retrieval', 'selection', 'micro-task-N'
 | 
					  phase: string;           
 | 
				
			||||||
  action: string;          // 'embeddings-search', 'ai-selection', 'tool-evaluation'
 | 
					  action: string;          
 | 
				
			||||||
  input: any;              // What went into this step
 | 
					  input: any;              
 | 
				
			||||||
  output: any;             // What came out of this step
 | 
					  output: any;             
 | 
				
			||||||
  confidence: number;      // 0-100: How confident we are in this step
 | 
					  confidence: number;      
 | 
				
			||||||
  processingTimeMs: number;
 | 
					  processingTimeMs: number;
 | 
				
			||||||
  metadata: Record<string, any>;
 | 
					  metadata: Record<string, any>;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@ -56,29 +56,27 @@ interface AnalysisContext {
 | 
				
			|||||||
  problemAnalysis?: string;
 | 
					  problemAnalysis?: string;
 | 
				
			||||||
  investigationApproach?: string;
 | 
					  investigationApproach?: string;
 | 
				
			||||||
  criticalConsiderations?: string;
 | 
					  criticalConsiderations?: string;
 | 
				
			||||||
  selectedTools?: Array<{tool: any, phase: string, priority: string, justification?: string}>;
 | 
					  selectedTools?: Array<{tool: any, phase: string, priority: string, justification?: string, taskRelevance?: number, limitations?: string[]}>;
 | 
				
			||||||
  backgroundKnowledge?: Array<{concept: any, relevance: string}>;
 | 
					  backgroundKnowledge?: Array<{concept: any, relevance: string}>;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  seenToolNames: Set<string>;
 | 
					  seenToolNames: Set<string>;
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  auditTrail: AuditEntry[];
 | 
					  auditTrail: AuditEntry[];
 | 
				
			||||||
}
 | 
					  
 | 
				
			||||||
 | 
					  // Store actual similarity data from embeddings
 | 
				
			||||||
interface SimilarityResult extends EmbeddingData {
 | 
					  embeddingsSimilarities: Map<string, number>;
 | 
				
			||||||
  similarity: number;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
interface ConfidenceMetrics {
 | 
					interface ConfidenceMetrics {
 | 
				
			||||||
  overall: number;           // 0-100: Combined confidence score
 | 
					  overall: number;                    // 0-100: Combined confidence score
 | 
				
			||||||
  embeddingsQuality: number; // How well embeddings matched
 | 
					  semanticRelevance: number;          // How well tool description matches query (from embeddings)
 | 
				
			||||||
  domainAlignment: number;   // How well tools match scenario domain
 | 
					  taskSuitability: number;           // AI-determined fitness for this specific task  
 | 
				
			||||||
  consensus: number;         // How much micro-tasks agree
 | 
					  methodologicalConsistency: number; // How well different analysis steps agree
 | 
				
			||||||
  freshness: number;         // How recent/up-to-date the selection is
 | 
					  toolReliability: number;           // Indicators of tool quality and maintenance
 | 
				
			||||||
  uncertaintyFactors: string[]; // What could make this wrong
 | 
					  uncertaintyFactors: string[];      // Specific reasons why this might not work
 | 
				
			||||||
  strengthIndicators: string[]; // What makes this recommendation strong
 | 
					  strengthIndicators: string[];      // Specific reasons why this is a good choice
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
class ImprovedMicroTaskAIPipeline {
 | 
					class ImprovedMicroTaskAIPipeline {
 | 
				
			||||||
  private config: AIConfig;
 | 
					  private config: AIConfig;
 | 
				
			||||||
  private maxSelectedItems: number;
 | 
					  private maxSelectedItems: number;
 | 
				
			||||||
@ -105,10 +103,10 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private confidenceConfig: {
 | 
					  private confidenceConfig: {
 | 
				
			||||||
    embeddingsWeight: number;
 | 
					    semanticWeight: number;        // Weight for embeddings similarity
 | 
				
			||||||
    consensusWeight: number;
 | 
					    suitabilityWeight: number;     // Weight for AI task fit evaluation
 | 
				
			||||||
    domainMatchWeight: number;
 | 
					    consistencyWeight: number;     // Weight for cross-validation agreement
 | 
				
			||||||
    freshnessWeight: number;
 | 
					    reliabilityWeight: number;     // Weight for tool quality indicators
 | 
				
			||||||
    minimumThreshold: number;
 | 
					    minimumThreshold: number;
 | 
				
			||||||
    mediumThreshold: number;
 | 
					    mediumThreshold: number;
 | 
				
			||||||
    highThreshold: number;
 | 
					    highThreshold: number;
 | 
				
			||||||
@ -146,25 +144,19 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
      retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
 | 
					      retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    console.log('[AI PIPELINE] Configuration loaded:', {
 | 
					    // Updated confidence weights - more focused on AI evaluation
 | 
				
			||||||
      embeddingCandidates: this.embeddingCandidates,
 | 
					 | 
				
			||||||
      embeddingSelection: `${this.embeddingSelectionLimit} tools, ${this.embeddingConceptsLimit} concepts`,
 | 
					 | 
				
			||||||
      noEmbeddingsLimits: `${this.noEmbeddingsToolLimit || 'unlimited'} tools, ${this.noEmbeddingsConceptLimit || 'unlimited'} concepts`,
 | 
					 | 
				
			||||||
      auditEnabled: this.auditConfig.enabled
 | 
					 | 
				
			||||||
    });
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    this.confidenceConfig = {
 | 
					    this.confidenceConfig = {
 | 
				
			||||||
      embeddingsWeight: parseFloat(process.env.CONFIDENCE_EMBEDDINGS_WEIGHT || '0.3'),
 | 
					      semanticWeight: parseFloat(process.env.CONFIDENCE_SEMANTIC_WEIGHT || '0.25'),     // Embeddings similarity
 | 
				
			||||||
      consensusWeight: parseFloat(process.env.CONFIDENCE_CONSENSUS_WEIGHT || '0.25'), 
 | 
					      suitabilityWeight: parseFloat(process.env.CONFIDENCE_SUITABILITY_WEIGHT || '0.4'), // AI task fit evaluation  
 | 
				
			||||||
      domainMatchWeight: parseFloat(process.env.CONFIDENCE_DOMAIN_MATCH_WEIGHT || '0.25'),
 | 
					      consistencyWeight: parseFloat(process.env.CONFIDENCE_CONSISTENCY_WEIGHT || '0.2'), // Cross-validation agreement
 | 
				
			||||||
      freshnessWeight: parseFloat(process.env.CONFIDENCE_FRESHNESS_WEIGHT || '0.2'),
 | 
					      reliabilityWeight: parseFloat(process.env.CONFIDENCE_RELIABILITY_WEIGHT || '0.15'), // Tool quality indicators
 | 
				
			||||||
      minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
 | 
					      minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
 | 
				
			||||||
      mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10),
 | 
					      mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10),
 | 
				
			||||||
      highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10)
 | 
					      highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10)
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    console.log('[AI PIPELINE] Confidence scoring enabled:', {
 | 
					    console.log('[AI PIPELINE] Enhanced confidence scoring enabled:', {
 | 
				
			||||||
      weights: `E:${this.confidenceConfig.embeddingsWeight} C:${this.confidenceConfig.consensusWeight} D:${this.confidenceConfig.domainMatchWeight} F:${this.confidenceConfig.freshnessWeight}`,
 | 
					      weights: `Semantic:${this.confidenceConfig.semanticWeight} Suitability:${this.confidenceConfig.suitabilityWeight} Consistency:${this.confidenceConfig.consistencyWeight} Reliability:${this.confidenceConfig.reliabilityWeight}`,
 | 
				
			||||||
      thresholds: `${this.confidenceConfig.minimumThreshold}/${this.confidenceConfig.mediumThreshold}/${this.confidenceConfig.highThreshold}`
 | 
					      thresholds: `${this.confidenceConfig.minimumThreshold}/${this.confidenceConfig.mediumThreshold}/${this.confidenceConfig.highThreshold}`
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@ -247,8 +239,8 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
    let confidence = 60; // Base confidence
 | 
					    let confidence = 60; // Base confidence
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
 | 
					    if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
 | 
				
			||||||
    else if (selectionRatio <= 0.05) confidence -= 10; // Too few
 | 
					    else if (selectionRatio <= 0.05) confidence -= 10; 
 | 
				
			||||||
    else confidence -= 15; // Too many
 | 
					    else confidence -= 15; 
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    if (hasReasoning) confidence += 15;
 | 
					    if (hasReasoning) confidence += 15;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
@ -357,7 +349,7 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
          const possibleTools = toolMatches
 | 
					          const possibleTools = toolMatches
 | 
				
			||||||
            .map(match => match.replace(/"/g, ''))
 | 
					            .map(match => match.replace(/"/g, ''))
 | 
				
			||||||
            .filter(name => name.length > 2 && !['selectedTools', 'selectedConcepts', 'reasoning'].includes(name))
 | 
					            .filter(name => name.length > 2 && !['selectedTools', 'selectedConcepts', 'reasoning'].includes(name))
 | 
				
			||||||
            .slice(0, 15); // Reasonable limit
 | 
					            .slice(0, 15); 
 | 
				
			||||||
          
 | 
					          
 | 
				
			||||||
          if (possibleTools.length > 0) {
 | 
					          if (possibleTools.length > 0) {
 | 
				
			||||||
            console.log(`[AI PIPELINE] Recovered ${possibleTools.length} possible tool names from broken JSON`);
 | 
					            console.log(`[AI PIPELINE] Recovered ${possibleTools.length} possible tool names from broken JSON`);
 | 
				
			||||||
@ -374,7 +366,7 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {   
 | 
					  private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string, taskRelevance?: number, limitations?: string[]): boolean {   
 | 
				
			||||||
    context.seenToolNames.add(tool.name);
 | 
					    context.seenToolNames.add(tool.name);
 | 
				
			||||||
    if (!context.selectedTools) context.selectedTools = [];
 | 
					    if (!context.selectedTools) context.selectedTools = [];
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
@ -382,18 +374,22 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
      tool,
 | 
					      tool,
 | 
				
			||||||
      phase,
 | 
					      phase,
 | 
				
			||||||
      priority,
 | 
					      priority,
 | 
				
			||||||
      justification
 | 
					      justification,
 | 
				
			||||||
 | 
					      taskRelevance,
 | 
				
			||||||
 | 
					      limitations
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    return true;
 | 
					    return true;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
 | 
					  private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string, context: AnalysisContext) {
 | 
				
			||||||
    let candidateTools: any[] = [];
 | 
					    let candidateTools: any[] = [];
 | 
				
			||||||
    let candidateConcepts: any[] = [];
 | 
					    let candidateConcepts: any[] = [];
 | 
				
			||||||
    let selectionMethod = 'unknown';
 | 
					    let selectionMethod = 'unknown';
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    // WAIT for embeddings initialization if embeddings are enabled
 | 
					    // Initialize embeddings similarities storage
 | 
				
			||||||
 | 
					    context.embeddingsSimilarities = new Map<string, number>();
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    if (process.env.AI_EMBEDDINGS_ENABLED === 'true') {
 | 
					    if (process.env.AI_EMBEDDINGS_ENABLED === 'true') {
 | 
				
			||||||
      try {
 | 
					      try {
 | 
				
			||||||
        console.log('[AI PIPELINE] Waiting for embeddings initialization...');
 | 
					        console.log('[AI PIPELINE] Waiting for embeddings initialization...');
 | 
				
			||||||
@ -414,6 +410,11 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
      
 | 
					      
 | 
				
			||||||
      console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
 | 
					      console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
 | 
					      // Store actual similarity scores for confidence calculation
 | 
				
			||||||
 | 
					      similarItems.forEach(item => {
 | 
				
			||||||
 | 
					        context.embeddingsSimilarities.set(item.name, item.similarity);
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
      const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
 | 
					      const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
 | 
				
			||||||
      const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
 | 
					      const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
@ -450,7 +451,7 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
      }
 | 
					      }
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      if (this.auditConfig.enabled) {
 | 
					      if (this.auditConfig.enabled) {
 | 
				
			||||||
        this.addAuditEntry(null, 'retrieval', 'embeddings-search', 
 | 
					        this.addAuditEntry(context, 'retrieval', 'embeddings-search', 
 | 
				
			||||||
          { query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates }, 
 | 
					          { query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates }, 
 | 
				
			||||||
          { 
 | 
					          { 
 | 
				
			||||||
            candidatesFound: similarItems.length, 
 | 
					            candidatesFound: similarItems.length, 
 | 
				
			||||||
@ -459,7 +460,8 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
            reductionRatio: reductionRatio,
 | 
					            reductionRatio: reductionRatio,
 | 
				
			||||||
            usingEmbeddings: selectionMethod === 'embeddings_candidates',
 | 
					            usingEmbeddings: selectionMethod === 'embeddings_candidates',
 | 
				
			||||||
            totalAvailable: totalAvailableTools,
 | 
					            totalAvailable: totalAvailableTools,
 | 
				
			||||||
            filtered: similarTools.length
 | 
					            filtered: similarTools.length,
 | 
				
			||||||
 | 
					            avgSimilarity: similarItems.length > 0 ? similarItems.reduce((sum, item) => sum + item.similarity, 0) / similarItems.length : 0
 | 
				
			||||||
          },
 | 
					          },
 | 
				
			||||||
          selectionMethod === 'embeddings_candidates' ? 85 : 60,
 | 
					          selectionMethod === 'embeddings_candidates' ? 85 : 60,
 | 
				
			||||||
          embeddingsStart,
 | 
					          embeddingsStart,
 | 
				
			||||||
@ -479,7 +481,7 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    console.log(`[AI PIPELINE] AI will analyze ${candidateTools.length} candidate tools (method: ${selectionMethod})`);
 | 
					    console.log(`[AI PIPELINE] AI will analyze ${candidateTools.length} candidate tools (method: ${selectionMethod})`);
 | 
				
			||||||
    const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
 | 
					    const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod, context);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    return {
 | 
					    return {
 | 
				
			||||||
      tools: finalSelection.selectedTools,
 | 
					      tools: finalSelection.selectedTools,
 | 
				
			||||||
@ -495,7 +497,8 @@ class ImprovedMicroTaskAIPipeline {
 | 
				
			|||||||
    candidateTools: any[], 
 | 
					    candidateTools: any[], 
 | 
				
			||||||
    candidateConcepts: any[], 
 | 
					    candidateConcepts: any[], 
 | 
				
			||||||
    mode: string,
 | 
					    mode: string,
 | 
				
			||||||
    selectionMethod: string
 | 
					    selectionMethod: string,
 | 
				
			||||||
 | 
					    context: AnalysisContext
 | 
				
			||||||
  ) {
 | 
					  ) {
 | 
				
			||||||
    const selectionStart = Date.now();
 | 
					    const selectionStart = Date.now();
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
@ -576,7 +579,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
        console.error('[AI PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
 | 
					        console.error('[AI PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        if (this.auditConfig.enabled) {
 | 
					        if (this.auditConfig.enabled) {
 | 
				
			||||||
          this.addAuditEntry(null, 'selection', 'ai-tool-selection-failed',
 | 
					          this.addAuditEntry(context, 'selection', 'ai-tool-selection-failed',
 | 
				
			||||||
            { candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
 | 
					            { candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
 | 
				
			||||||
            { error: 'Invalid JSON structure', response: response.slice(0, 200) },
 | 
					            { error: 'Invalid JSON structure', response: response.slice(0, 200) },
 | 
				
			||||||
            10,
 | 
					            10,
 | 
				
			||||||
@ -602,7 +605,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
      if (this.auditConfig.enabled) {
 | 
					      if (this.auditConfig.enabled) {
 | 
				
			||||||
        const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
 | 
					        const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        this.addAuditEntry(null, 'selection', 'ai-tool-selection',
 | 
					        this.addAuditEntry(context, 'selection', 'ai-tool-selection',
 | 
				
			||||||
          { candidateCount: candidateTools.length, mode, promptLength: prompt.length },
 | 
					          { candidateCount: candidateTools.length, mode, promptLength: prompt.length },
 | 
				
			||||||
          { 
 | 
					          { 
 | 
				
			||||||
            selectedToolCount: result.selectedTools.length, 
 | 
					            selectedToolCount: result.selectedTools.length, 
 | 
				
			||||||
@ -626,7 +629,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
      console.error('[AI PIPELINE] AI selection failed:', error);
 | 
					      console.error('[AI PIPELINE] AI selection failed:', error);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      if (this.auditConfig.enabled) {
 | 
					      if (this.auditConfig.enabled) {
 | 
				
			||||||
        this.addAuditEntry(null, 'selection', 'ai-tool-selection-error',
 | 
					        this.addAuditEntry(context, 'selection', 'ai-tool-selection-error',
 | 
				
			||||||
          { candidateCount: candidateTools.length, mode },
 | 
					          { candidateCount: candidateTools.length, mode },
 | 
				
			||||||
          { error: error.message },
 | 
					          { error: error.message },
 | 
				
			||||||
          5,
 | 
					          5,
 | 
				
			||||||
@ -700,38 +703,225 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  private calculateRecommendationConfidence(
 | 
					  private calculateRecommendationConfidence(
 | 
				
			||||||
    tool: any, 
 | 
					    tool: any, 
 | 
				
			||||||
    embeddingsSimilarity: number,
 | 
					    context: AnalysisContext,
 | 
				
			||||||
    domainMatch: boolean,
 | 
					    taskRelevance: number = 70,
 | 
				
			||||||
    microTaskAgreement: number,
 | 
					    limitations: string[] = []
 | 
				
			||||||
    context: AnalysisContext
 | 
					 | 
				
			||||||
  ): ConfidenceMetrics {
 | 
					  ): ConfidenceMetrics {
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    const embeddingsQuality = Math.min(100, embeddingsSimilarity * 100 * 2); // Scale 0.5 similarity to 100%
 | 
					    // 1. Semantic Relevance: Real embeddings similarity score
 | 
				
			||||||
    const domainAlignment = domainMatch ? 90 : (tool.domains?.length > 0 ? 60 : 30);
 | 
					    const semanticRelevance = context.embeddingsSimilarities.has(tool.name) ? 
 | 
				
			||||||
    const consensus = Math.min(100, microTaskAgreement * 100);
 | 
					      Math.round(context.embeddingsSimilarities.get(tool.name)! * 100) : 50;
 | 
				
			||||||
    const freshness = this.calculateToolFreshness(tool);
 | 
					 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					    // 2. Task Suitability: AI-determined fitness for specific task
 | 
				
			||||||
 | 
					    const taskSuitability = Math.round(taskRelevance);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // 3. Methodological Consistency: Cross-validation between micro-tasks
 | 
				
			||||||
 | 
					    const methodologicalConsistency = this.calculateCrossValidationScore(tool.name, context);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // 4. Tool Reliability: Quality indicators
 | 
				
			||||||
 | 
					    const toolReliability = this.calculateToolReliability(tool);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Debug logging
 | 
				
			||||||
 | 
					    console.log(`[CONFIDENCE DEBUG] ${tool.name}:`, {
 | 
				
			||||||
 | 
					      semantic: semanticRelevance,
 | 
				
			||||||
 | 
					      taskSuitability: taskSuitability,
 | 
				
			||||||
 | 
					      consistency: methodologicalConsistency,
 | 
				
			||||||
 | 
					      reliability: toolReliability,
 | 
				
			||||||
 | 
					      hasEmbeddingsSimilarity: context.embeddingsSimilarities.has(tool.name),
 | 
				
			||||||
 | 
					      rawTaskRelevance: taskRelevance
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Calculate weighted overall score
 | 
				
			||||||
    const overall = (
 | 
					    const overall = (
 | 
				
			||||||
      embeddingsQuality * this.confidenceConfig.embeddingsWeight +
 | 
					      semanticRelevance * this.confidenceConfig.semanticWeight +
 | 
				
			||||||
      domainAlignment * this.confidenceConfig.domainMatchWeight +
 | 
					      taskSuitability * this.confidenceConfig.suitabilityWeight +
 | 
				
			||||||
      consensus * this.confidenceConfig.consensusWeight +
 | 
					      methodologicalConsistency * this.confidenceConfig.consistencyWeight +
 | 
				
			||||||
      freshness * this.confidenceConfig.freshnessWeight
 | 
					      toolReliability * this.confidenceConfig.reliabilityWeight
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, overall);
 | 
					    const uncertaintyFactors = this.identifySpecificUncertaintyFactors(tool, context, limitations, overall);
 | 
				
			||||||
    const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall);
 | 
					    const strengthIndicators = this.identifySpecificStrengthIndicators(tool, context, overall);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return {
 | 
					    return {
 | 
				
			||||||
      overall: Math.round(overall),
 | 
					      overall: Math.round(overall),
 | 
				
			||||||
      embeddingsQuality: Math.round(embeddingsQuality),
 | 
					      semanticRelevance: Math.round(semanticRelevance),
 | 
				
			||||||
      domainAlignment: Math.round(domainAlignment), 
 | 
					      taskSuitability: Math.round(taskSuitability), 
 | 
				
			||||||
      consensus: Math.round(consensus),
 | 
					      methodologicalConsistency: Math.round(methodologicalConsistency),
 | 
				
			||||||
      freshness: Math.round(freshness),
 | 
					      toolReliability: Math.round(toolReliability),
 | 
				
			||||||
      uncertaintyFactors,
 | 
					      uncertaintyFactors,
 | 
				
			||||||
      strengthIndicators
 | 
					      strengthIndicators
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private calculateCrossValidationScore(toolName: string, context: AnalysisContext): number {
 | 
				
			||||||
 | 
					    // Look for entries where this tool was mentioned across different phases
 | 
				
			||||||
 | 
					    const relevantEntries = context.auditTrail.filter(entry => 
 | 
				
			||||||
 | 
					      entry.phase === 'micro-task' || entry.phase === 'selection'
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    let toolMentions = 0;
 | 
				
			||||||
 | 
					    let positiveEvaluations = 0;
 | 
				
			||||||
 | 
					    let confidenceSum = 0;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    relevantEntries.forEach(entry => {
 | 
				
			||||||
 | 
					      let toolFound = false;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Check various ways the tool might be referenced in output
 | 
				
			||||||
 | 
					      if (entry.output && typeof entry.output === 'object') {
 | 
				
			||||||
 | 
					        // Check selectedTools arrays
 | 
				
			||||||
 | 
					        if (Array.isArray(entry.output.selectedTools) && 
 | 
				
			||||||
 | 
					            entry.output.selectedTools.includes(toolName)) {
 | 
				
			||||||
 | 
					          toolFound = true;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        // Check finalToolNames arrays  
 | 
				
			||||||
 | 
					        if (Array.isArray(entry.output.finalToolNames) && 
 | 
				
			||||||
 | 
					            entry.output.finalToolNames.includes(toolName)) {
 | 
				
			||||||
 | 
					          toolFound = true;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        // Check toolName in individual evaluation
 | 
				
			||||||
 | 
					        if (entry.output.toolName === toolName) {
 | 
				
			||||||
 | 
					          toolFound = true;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      if (toolFound) {
 | 
				
			||||||
 | 
					        toolMentions++;
 | 
				
			||||||
 | 
					        confidenceSum += entry.confidence;
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        // Consider it positive if confidence >= 60
 | 
				
			||||||
 | 
					        if (entry.confidence >= 60) {
 | 
				
			||||||
 | 
					          positiveEvaluations++;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    console.log(`[AI PIPELINE] Cross-validation for ${toolName}: ${toolMentions} mentions, ${positiveEvaluations} positive, avg confidence: ${toolMentions > 0 ? Math.round(confidenceSum / toolMentions) : 0}`);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (toolMentions === 0) {
 | 
				
			||||||
 | 
					      return 60; // Default when no cross-validation data available
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (toolMentions === 1) {
 | 
				
			||||||
 | 
					      // Single mention - use confidence directly but cap it
 | 
				
			||||||
 | 
					      return Math.min(85, Math.max(40, confidenceSum));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Multiple mentions - calculate agreement ratio
 | 
				
			||||||
 | 
					    const agreementRatio = positiveEvaluations / toolMentions;
 | 
				
			||||||
 | 
					    const avgConfidence = confidenceSum / toolMentions;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Combine agreement ratio with average confidence
 | 
				
			||||||
 | 
					    const crossValidationScore = (agreementRatio * 0.7 + (avgConfidence / 100) * 0.3) * 100;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return Math.round(Math.min(95, Math.max(30, crossValidationScore)));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // NEW: Calculate tool reliability based on objective indicators
 | 
				
			||||||
 | 
					  private calculateToolReliability(tool: any): number {
 | 
				
			||||||
 | 
					    let reliability = 50; // Base score
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Documentation availability
 | 
				
			||||||
 | 
					    if (tool.knowledgebase === true) reliability += 25;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Active maintenance (hosted tools are typically maintained)
 | 
				
			||||||
 | 
					    if (isToolHosted(tool)) reliability += 20;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Community support (open source often has community)
 | 
				
			||||||
 | 
					    if (tool.license && tool.license !== 'Proprietary') reliability += 10;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Skill level appropriateness (not too complex, not too simple)
 | 
				
			||||||
 | 
					    if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') reliability += 10;
 | 
				
			||||||
 | 
					    else if (tool.skillLevel === 'expert') reliability -= 5; // May be overcomplicated
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Multi-platform support (more versatile)
 | 
				
			||||||
 | 
					    if (tool.platforms && tool.platforms.length > 1) reliability += 5;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return Math.min(100, reliability);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // NEW: Identify specific uncertainty factors based on analysis
 | 
				
			||||||
 | 
					  private identifySpecificUncertaintyFactors(tool: any, context: AnalysisContext, limitations: string[], confidence: number): string[] {
 | 
				
			||||||
 | 
					    const factors: string[] = [];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Add AI-identified limitations
 | 
				
			||||||
 | 
					    if (limitations && limitations.length > 0) {
 | 
				
			||||||
 | 
					      factors.push(...limitations.slice(0, 3)); // Limit to top 3
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Low semantic similarity
 | 
				
			||||||
 | 
					    const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
 | 
				
			||||||
 | 
					    if (similarity < 0.4) {
 | 
				
			||||||
 | 
					      factors.push('Geringe semantische Ähnlichkeit zur Anfrage - Tool-Beschreibung passt möglicherweise nicht optimal');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Skill level mismatch
 | 
				
			||||||
 | 
					    if (tool.skillLevel === 'expert' && /schnell|rapid|triage|urgent/i.test(context.userQuery)) {
 | 
				
			||||||
 | 
					      factors.push('Experten-Tool für Eilszenario - möglicherweise zu komplex für schnelle Antworten');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (tool.skillLevel === 'novice' && /komplex|erweitert|tiefgehend|advanced/i.test(context.userQuery)) {
 | 
				
			||||||
 | 
					      factors.push('Einsteiger-Tool für komplexes Szenario - könnte funktionale Einschränkungen haben');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Access limitations
 | 
				
			||||||
 | 
					    if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') {
 | 
				
			||||||
 | 
					      factors.push('Installation erforderlich - nicht sofort verfügbar ohne Setup');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Cross-validation disagreement
 | 
				
			||||||
 | 
					    const crossValidation = this.calculateCrossValidationScore(tool.name, context);
 | 
				
			||||||
 | 
					    if (crossValidation < 50) {
 | 
				
			||||||
 | 
					      factors.push('Uneinheitliche Bewertung in verschiedenen Analyseschritten - Empfehlung nicht eindeutig');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return factors.slice(0, 4); // Limit to 4 most important factors
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // NEW: Identify specific strength indicators
 | 
				
			||||||
 | 
					  private identifySpecificStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
 | 
				
			||||||
 | 
					    const indicators: string[] = [];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // High confidence overall
 | 
				
			||||||
 | 
					    if (confidence >= this.confidenceConfig.highThreshold) {
 | 
				
			||||||
 | 
					      indicators.push('Hohe Gesamtbewertung durch mehrfache Validierung');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // High semantic similarity
 | 
				
			||||||
 | 
					    const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
 | 
				
			||||||
 | 
					    if (similarity >= 0.7) {
 | 
				
			||||||
 | 
					      indicators.push('Sehr gute semantische Übereinstimmung mit Ihrer Anfrage');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Strong cross-validation
 | 
				
			||||||
 | 
					    const crossValidation = this.calculateCrossValidationScore(tool.name, context);
 | 
				
			||||||
 | 
					    if (crossValidation >= 80) {
 | 
				
			||||||
 | 
					      indicators.push('Konsistente Empfehlung über verschiedene Analyseschritte hinweg');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Quality indicators
 | 
				
			||||||
 | 
					    if (tool.knowledgebase === true) {
 | 
				
			||||||
 | 
					      indicators.push('Umfassende Dokumentation und Wissensbasis verfügbar');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (isToolHosted(tool)) {
 | 
				
			||||||
 | 
					      indicators.push('Sofort verfügbar über gehostete Lösung - kein Setup erforderlich');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Skill level match
 | 
				
			||||||
 | 
					    if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') {
 | 
				
			||||||
 | 
					      indicators.push('Ausgewogenes Verhältnis zwischen Funktionalität und Benutzerfreundlichkeit');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Method alignment
 | 
				
			||||||
 | 
					    if (tool.type === 'method' && /methodik|vorgehen|prozess|ansatz/i.test(context.userQuery)) {
 | 
				
			||||||
 | 
					      indicators.push('Methodischer Ansatz passt zu Ihrer prozeduralen Anfrage');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return indicators.slice(0, 4); // Limit to 4 most important indicators
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
					  private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
				
			||||||
    const isWorkflow = context.mode === 'workflow';
 | 
					    const isWorkflow = context.mode === 'workflow';
 | 
				
			||||||
    const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
 | 
					    const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
 | 
				
			||||||
@ -833,27 +1023,49 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
    if (result.success) {
 | 
					    if (result.success) {
 | 
				
			||||||
      const evaluation = this.safeParseJSON(result.content, {
 | 
					      const evaluation = this.safeParseJSON(result.content, {
 | 
				
			||||||
        suitability_score: 'medium',
 | 
					        suitability_score: 'medium',
 | 
				
			||||||
 | 
					        task_relevance: '',
 | 
				
			||||||
        detailed_explanation: 'Evaluation failed',
 | 
					        detailed_explanation: 'Evaluation failed',
 | 
				
			||||||
        implementation_approach: '',
 | 
					        implementation_approach: '',
 | 
				
			||||||
        pros: [],
 | 
					        pros: [],
 | 
				
			||||||
        cons: [],
 | 
					        cons: [],
 | 
				
			||||||
 | 
					        limitations: [],
 | 
				
			||||||
        alternatives: ''
 | 
					        alternatives: ''
 | 
				
			||||||
      });
 | 
					      });
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
 | 
					      // Debug logging to see what we're getting
 | 
				
			||||||
 | 
					      console.log(`[AI PIPELINE] Tool ${tool.name} evaluation:`, {
 | 
				
			||||||
 | 
					        taskRelevance: evaluation.task_relevance,
 | 
				
			||||||
 | 
					        suitabilityScore: evaluation.suitability_score,
 | 
				
			||||||
 | 
					        limitationsCount: evaluation.limitations?.length || 0
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Ensure task_relevance is a number
 | 
				
			||||||
 | 
					      const taskRelevance = typeof evaluation.task_relevance === 'number' ? 
 | 
				
			||||||
 | 
					        evaluation.task_relevance : 
 | 
				
			||||||
 | 
					        parseInt(String(evaluation.task_relevance)) || 70;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Store enhanced evaluation data
 | 
				
			||||||
      this.addToolToSelection(context, {
 | 
					      this.addToolToSelection(context, {
 | 
				
			||||||
        ...tool,
 | 
					        ...tool,
 | 
				
			||||||
        evaluation: {
 | 
					        evaluation: {
 | 
				
			||||||
          ...evaluation,
 | 
					          ...evaluation,
 | 
				
			||||||
 | 
					          task_relevance: taskRelevance, // Ensure it's stored as number
 | 
				
			||||||
          rank
 | 
					          rank
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
      }, 'evaluation', evaluation.suitability_score);
 | 
					      }, 'evaluation', evaluation.suitability_score, evaluation.detailed_explanation, 
 | 
				
			||||||
 | 
					      taskRelevance, evaluation.limitations);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
 | 
					      this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
 | 
				
			||||||
        { toolName: tool.name, rank },
 | 
					        { toolName: tool.name, rank },
 | 
				
			||||||
        { suitabilityScore: evaluation.suitability_score, hasExplanation: !!evaluation.detailed_explanation },
 | 
					        { 
 | 
				
			||||||
 | 
					          suitabilityScore: evaluation.suitability_score, 
 | 
				
			||||||
 | 
					          taskRelevance: taskRelevance, // Use the cleaned number
 | 
				
			||||||
 | 
					          hasExplanation: !!evaluation.detailed_explanation,
 | 
				
			||||||
 | 
					          limitationsIdentified: evaluation.limitations?.length || 0
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
        evaluation.suitability_score === 'high' ? 85 : evaluation.suitability_score === 'medium' ? 70 : 50,
 | 
					        evaluation.suitability_score === 'high' ? 85 : evaluation.suitability_score === 'medium' ? 70 : 50,
 | 
				
			||||||
        Date.now() - result.processingTimeMs,
 | 
					        Date.now() - result.processingTimeMs,
 | 
				
			||||||
        { toolType: tool.type }
 | 
					        { toolType: tool.type, taskRelevanceScore: taskRelevance }
 | 
				
			||||||
      );
 | 
					      );
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
@ -963,28 +1175,31 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
 | 
					  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
 | 
				
			||||||
    const startTime = Date.now();
 | 
					    const startTime = Date.now();
 | 
				
			||||||
    let completedTasks = 0;
 | 
					    let completeTasks = 0;
 | 
				
			||||||
    let failedTasks = 0;
 | 
					    let failedTasks = 0;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    this.tempAuditEntries = [];
 | 
					    this.tempAuditEntries = [];
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    console.log(`[AI PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
 | 
					    console.log(`[AI PIPELINE] Starting ${mode} query processing with enhanced confidence scoring`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try {
 | 
					    try {
 | 
				
			||||||
      const toolsData = await getCompressedToolsDataForAI();
 | 
					      const toolsData = await getCompressedToolsDataForAI();
 | 
				
			||||||
      const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
 | 
					 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      const context: AnalysisContext = {
 | 
					      const context: AnalysisContext = {
 | 
				
			||||||
        userQuery,
 | 
					        userQuery,
 | 
				
			||||||
        mode,
 | 
					        mode,
 | 
				
			||||||
        filteredData,
 | 
					        filteredData: {}, // Will be populated by getIntelligentCandidates
 | 
				
			||||||
        contextHistory: [],
 | 
					        contextHistory: [],
 | 
				
			||||||
        maxContextLength: this.maxContextTokens,
 | 
					        maxContextLength: this.maxContextTokens,
 | 
				
			||||||
        currentContextLength: 0,
 | 
					        currentContextLength: 0,
 | 
				
			||||||
        seenToolNames: new Set<string>(),
 | 
					        seenToolNames: new Set<string>(),
 | 
				
			||||||
        auditTrail: []
 | 
					        auditTrail: [],
 | 
				
			||||||
 | 
					        embeddingsSimilarities: new Map<string, number>()
 | 
				
			||||||
      };
 | 
					      };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode, context);
 | 
				
			||||||
 | 
					      context.filteredData = filteredData;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
      this.mergeTemporaryAuditEntries(context);
 | 
					      this.mergeTemporaryAuditEntries(context);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      console.log(`[AI PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
 | 
					      console.log(`[AI PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
 | 
				
			||||||
@ -994,58 +1209,54 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
        { candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length },
 | 
					        { candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length },
 | 
				
			||||||
        90, 
 | 
					        90, 
 | 
				
			||||||
        startTime,
 | 
					        startTime,
 | 
				
			||||||
        { auditEnabled: this.auditConfig.enabled }
 | 
					        { auditEnabled: this.auditConfig.enabled, confidenceScoringEnabled: true }
 | 
				
			||||||
      );
 | 
					      );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // MICRO-TASK SEQUENCE
 | 
					      // MICRO-TASK SEQUENCE WITH ENHANCED CONFIDENCE TRACKING
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      // Task 1: Scenario/Problem Analysis
 | 
					 | 
				
			||||||
      const analysisResult = await this.analyzeScenario(context);
 | 
					      const analysisResult = await this.analyzeScenario(context);
 | 
				
			||||||
      if (analysisResult.success) completedTasks++; else failedTasks++;
 | 
					      if (analysisResult.success) completeTasks++; else failedTasks++;
 | 
				
			||||||
      await this.delay(this.microTaskDelay);
 | 
					      await this.delay(this.microTaskDelay);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Task 2: Investigation/Solution Approach
 | 
					 | 
				
			||||||
      const approachResult = await this.generateApproach(context);
 | 
					      const approachResult = await this.generateApproach(context);
 | 
				
			||||||
      if (approachResult.success) completedTasks++; else failedTasks++;
 | 
					      if (approachResult.success) completeTasks++; else failedTasks++;
 | 
				
			||||||
      await this.delay(this.microTaskDelay);
 | 
					      await this.delay(this.microTaskDelay);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Task 3: Critical Considerations
 | 
					 | 
				
			||||||
      const considerationsResult = await this.generateCriticalConsiderations(context);
 | 
					      const considerationsResult = await this.generateCriticalConsiderations(context);
 | 
				
			||||||
      if (considerationsResult.success) completedTasks++; else failedTasks++;
 | 
					      if (considerationsResult.success) completeTasks++; else failedTasks++;
 | 
				
			||||||
      await this.delay(this.microTaskDelay);
 | 
					      await this.delay(this.microTaskDelay);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Task 4: Tool Selection/Evaluation (mode-dependent)
 | 
					 | 
				
			||||||
      if (mode === 'workflow') {
 | 
					      if (mode === 'workflow') {
 | 
				
			||||||
        const phases = toolsData.phases || [];
 | 
					        const phases = toolsData.phases || [];
 | 
				
			||||||
        for (const phase of phases) {
 | 
					        for (const phase of phases) {
 | 
				
			||||||
          const toolSelectionResult = await this.selectToolsForPhase(context, phase);
 | 
					          const toolSelectionResult = await this.selectToolsForPhase(context, phase);
 | 
				
			||||||
          if (toolSelectionResult.success) completedTasks++; else failedTasks++;
 | 
					          if (toolSelectionResult.success) completeTasks++; else failedTasks++;
 | 
				
			||||||
          await this.delay(this.microTaskDelay);
 | 
					          await this.delay(this.microTaskDelay);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
      } else {
 | 
					      } else {
 | 
				
			||||||
        const topTools = filteredData.tools.slice(0, 3);
 | 
					        const topTools = filteredData.tools.slice(0, 3);
 | 
				
			||||||
        for (let i = 0; i < topTools.length; i++) {
 | 
					        for (let i = 0; i < topTools.length; i++) {
 | 
				
			||||||
          const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
 | 
					          const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
 | 
				
			||||||
          if (evaluationResult.success) completedTasks++; else failedTasks++;
 | 
					          if (evaluationResult.success) completeTasks++; else failedTasks++;
 | 
				
			||||||
          await this.delay(this.microTaskDelay);
 | 
					          await this.delay(this.microTaskDelay);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      const knowledgeResult = await this.selectBackgroundKnowledge(context);
 | 
					      const knowledgeResult = await this.selectBackgroundKnowledge(context);
 | 
				
			||||||
      if (knowledgeResult.success) completedTasks++; else failedTasks++;
 | 
					      if (knowledgeResult.success) completeTasks++; else failedTasks++;
 | 
				
			||||||
      await this.delay(this.microTaskDelay);
 | 
					      await this.delay(this.microTaskDelay);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      const finalResult = await this.generateFinalRecommendations(context);
 | 
					      const finalResult = await this.generateFinalRecommendations(context);
 | 
				
			||||||
      if (finalResult.success) completedTasks++; else failedTasks++;
 | 
					      if (finalResult.success) completeTasks++; else failedTasks++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      const recommendation = this.buildRecommendation(context, mode, finalResult.content);
 | 
					      const recommendation = this.buildRecommendation(context, mode, finalResult.content);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      this.addAuditEntry(context, 'completion', 'pipeline-end',
 | 
					      this.addAuditEntry(context, 'completion', 'pipeline-end',
 | 
				
			||||||
        { completedTasks, failedTasks },
 | 
					        { completedTasks: completeTasks, failedTasks },
 | 
				
			||||||
        { finalRecommendation: !!recommendation, auditEntriesGenerated: context.auditTrail.length },
 | 
					        { finalRecommendation: !!recommendation, auditEntriesGenerated: context.auditTrail.length },
 | 
				
			||||||
        completedTasks > failedTasks ? 85 : 60,
 | 
					        completeTasks > failedTasks ? 85 : 60,
 | 
				
			||||||
        startTime,
 | 
					        startTime,
 | 
				
			||||||
        { totalProcessingTimeMs: Date.now() - startTime }
 | 
					        { totalProcessingTimeMs: Date.now() - startTime, confidenceScoresGenerated: context.selectedTools?.length || 0 }
 | 
				
			||||||
      );
 | 
					      );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      const processingStats = {
 | 
					      const processingStats = {
 | 
				
			||||||
@ -1054,13 +1265,13 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
        finalSelectedItems: (context.selectedTools?.length || 0) + 
 | 
					        finalSelectedItems: (context.selectedTools?.length || 0) + 
 | 
				
			||||||
                           (context.backgroundKnowledge?.length || 0),
 | 
					                           (context.backgroundKnowledge?.length || 0),
 | 
				
			||||||
        processingTimeMs: Date.now() - startTime,
 | 
					        processingTimeMs: Date.now() - startTime,
 | 
				
			||||||
        microTasksCompleted: completedTasks,
 | 
					        microTasksCompleted: completeTasks,
 | 
				
			||||||
        microTasksFailed: failedTasks,
 | 
					        microTasksFailed: failedTasks,
 | 
				
			||||||
        contextContinuityUsed: true
 | 
					        contextContinuityUsed: true
 | 
				
			||||||
      };
 | 
					      };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      console.log(`[AI PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
 | 
					      console.log(`[AI PIPELINE] Completed: ${completeTasks} tasks, Failed: ${failedTasks} tasks`);
 | 
				
			||||||
      console.log(`[AI PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
 | 
					      console.log(`[AI PIPELINE] Enhanced confidence scores generated: ${context.selectedTools?.length || 0}`);
 | 
				
			||||||
      console.log(`[AI PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
 | 
					      console.log(`[AI PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      return {
 | 
					      return {
 | 
				
			||||||
@ -1080,128 +1291,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  private calculateToolFreshness(tool: any): number {
 | 
					 | 
				
			||||||
    // Base freshness score
 | 
					 | 
				
			||||||
    let freshness = 70; // Default for tools without specific freshness data
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    // Boost for tools with knowledge base (more maintained)
 | 
					 | 
				
			||||||
    if (tool.knowledgebase === true) freshness += 20;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    // Boost for hosted tools (actively maintained)
 | 
					 | 
				
			||||||
    if (isToolHosted(tool)) freshness += 15;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    // Slight boost for open source (community maintained)
 | 
					 | 
				
			||||||
    if (tool.license && tool.license !== 'Proprietary') freshness += 5;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    return Math.min(100, freshness);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  private checkDomainMatch(tool: any, userQuery: string): boolean {
 | 
					 | 
				
			||||||
    if (!tool.domains || tool.domains.length === 0) return false;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    const queryLower = userQuery.toLowerCase();
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    // Load domain keywords from environment with fallback
 | 
					 | 
				
			||||||
    const domainKeywordsEnv = process.env.CONFIDENCE_DOMAIN_KEYWORDS || 
 | 
					 | 
				
			||||||
      'incident-response:incident,breach,attack,compromise,response|malware-analysis:malware,virus,trojan,reverse,analysis|network-forensics:network,traffic,packet,pcap,wireshark|mobile-forensics:mobile,android,ios,phone,app|cloud-forensics:cloud,aws,azure,saas,paas';
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    const domainKeywords = domainKeywordsEnv.split('|').reduce((acc, pair) => {
 | 
					 | 
				
			||||||
      const [domain, keywords] = pair.split(':');
 | 
					 | 
				
			||||||
      if (domain && keywords) {
 | 
					 | 
				
			||||||
        acc[domain] = keywords.split(',');
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
      return acc;
 | 
					 | 
				
			||||||
    }, {});
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    return tool.domains.some(domain => {
 | 
					 | 
				
			||||||
      const keywords = domainKeywords[domain] || [domain.replace('-', ' ')];
 | 
					 | 
				
			||||||
      return keywords.some(keyword => queryLower.includes(keyword));
 | 
					 | 
				
			||||||
    });
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  private getMicroTaskAgreement(toolName: string, context: AnalysisContext): number {
 | 
					 | 
				
			||||||
    // Check how many micro-tasks selected this tool
 | 
					 | 
				
			||||||
    const microTaskEntries = context.auditTrail.filter(entry => 
 | 
					 | 
				
			||||||
      entry.phase === 'micro-task' && 
 | 
					 | 
				
			||||||
      entry.action.includes('selection') &&
 | 
					 | 
				
			||||||
      entry.output && 
 | 
					 | 
				
			||||||
      typeof entry.output === 'object' &&
 | 
					 | 
				
			||||||
      Array.isArray(entry.output.selectedTools) &&
 | 
					 | 
				
			||||||
      entry.output.selectedTools.includes(toolName)
 | 
					 | 
				
			||||||
    );
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    const totalMicroTasks = context.auditTrail.filter(entry => 
 | 
					 | 
				
			||||||
      entry.phase === 'micro-task' && entry.action.includes('selection')
 | 
					 | 
				
			||||||
    ).length;
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    return totalMicroTasks > 0 ? microTaskEntries.length / totalMicroTasks : 0.8; // Default high agreement
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  private getEmbeddingsSimilarity(toolName: string, context: AnalysisContext): number {
 | 
					 | 
				
			||||||
    // Extract similarity from audit trail embeddings entry
 | 
					 | 
				
			||||||
    const embeddingsEntry = context.auditTrail.find(entry => 
 | 
					 | 
				
			||||||
      entry.phase === 'retrieval' && entry.action === 'embeddings-search'
 | 
					 | 
				
			||||||
    );
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (!embeddingsEntry || !embeddingsEntry.output) return 0.5; // Default medium similarity
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    // Look for similarity data in the output (implementation specific)
 | 
					 | 
				
			||||||
    // This would need to be populated during embeddings search
 | 
					 | 
				
			||||||
    return 0.7; // Placeholder - would need actual similarity data from embeddings
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  private identifyUncertaintyFactors(tool: any, context: AnalysisContext, confidence: number): string[] {
 | 
					 | 
				
			||||||
    const factors: string[] = [];
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (confidence < this.confidenceConfig.mediumThreshold) {
 | 
					 | 
				
			||||||
      factors.push('Low overall confidence - consider manual validation');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (!this.checkDomainMatch(tool, context.userQuery)) {
 | 
					 | 
				
			||||||
      factors.push('Domain mismatch detected - tool may not be specifically designed for this scenario');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (tool.skillLevel === 'expert' && /rapid|quick|urgent|triage/i.test(context.userQuery)) {
 | 
					 | 
				
			||||||
      factors.push('Expert-level tool for rapid scenario - may be overcomplicated');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (tool.type === 'software' && !isToolHosted(tool) && !tool.url) {
 | 
					 | 
				
			||||||
      factors.push('Limited access information - availability uncertain');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (tool.skillLevel === 'novice' && /complex|advanced|deep/i.test(context.userQuery)) {
 | 
					 | 
				
			||||||
      factors.push('Novice-level tool for complex scenario - may lack required capabilities');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    return factors;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
 | 
					 | 
				
			||||||
    const indicators: string[] = [];
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (confidence >= this.confidenceConfig.highThreshold) {
 | 
					 | 
				
			||||||
      indicators.push('High confidence recommendation based on multiple factors');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (this.checkDomainMatch(tool, context.userQuery)) {
 | 
					 | 
				
			||||||
      indicators.push('Strong domain alignment with scenario requirements');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (tool.knowledgebase === true) {
 | 
					 | 
				
			||||||
      indicators.push('Documentation and knowledge base available for guidance');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (isToolHosted(tool)) {
 | 
					 | 
				
			||||||
      indicators.push('Hosted solution available for immediate access');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    if (tool.type === 'method' && /methodology|approach|process/i.test(context.userQuery)) {
 | 
					 | 
				
			||||||
      indicators.push('Methodological approach matches procedural inquiry');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    return indicators;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
 | 
					  private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
 | 
				
			||||||
    const isWorkflow = mode === 'workflow';
 | 
					    const isWorkflow = mode === 'workflow';
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
@ -1218,13 +1307,12 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    if (isWorkflow) {
 | 
					    if (isWorkflow) {
 | 
				
			||||||
      const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
 | 
					      const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
 | 
				
			||||||
        // Calculate confidence for each tool
 | 
					        // Calculate enhanced confidence for each tool
 | 
				
			||||||
        const confidence = this.calculateRecommendationConfidence(
 | 
					        const confidence = this.calculateRecommendationConfidence(
 | 
				
			||||||
          st.tool,
 | 
					          st.tool,
 | 
				
			||||||
          this.getEmbeddingsSimilarity(st.tool.name, context),
 | 
					          context,
 | 
				
			||||||
          this.checkDomainMatch(st.tool, context.userQuery),
 | 
					          st.taskRelevance || 70,
 | 
				
			||||||
          this.getMicroTaskAgreement(st.tool.name, context),
 | 
					          st.limitations || []
 | 
				
			||||||
          context
 | 
					 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        // Add audit entry for confidence calculation
 | 
					        // Add audit entry for confidence calculation
 | 
				
			||||||
@ -1233,15 +1321,15 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
          { 
 | 
					          { 
 | 
				
			||||||
            overall: confidence.overall,
 | 
					            overall: confidence.overall,
 | 
				
			||||||
            components: {
 | 
					            components: {
 | 
				
			||||||
              embeddings: confidence.embeddingsQuality,
 | 
					              semantic: confidence.semanticRelevance,
 | 
				
			||||||
              domain: confidence.domainAlignment,
 | 
					              suitability: confidence.taskSuitability,
 | 
				
			||||||
              consensus: confidence.consensus,
 | 
					              consistency: confidence.methodologicalConsistency,
 | 
				
			||||||
              freshness: confidence.freshness
 | 
					              reliability: confidence.toolReliability
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
          },
 | 
					          },
 | 
				
			||||||
          confidence.overall,
 | 
					          confidence.overall,
 | 
				
			||||||
          Date.now(),
 | 
					          Date.now(),
 | 
				
			||||||
          { uncertaintyCount: confidence.uncertaintyFactors.length }
 | 
					          { uncertaintyCount: confidence.uncertaintyFactors.length, strengthCount: confidence.strengthIndicators.length }
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return {
 | 
					        return {
 | 
				
			||||||
@ -1264,10 +1352,9 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
      const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
 | 
					      const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
 | 
				
			||||||
        const confidence = this.calculateRecommendationConfidence(
 | 
					        const confidence = this.calculateRecommendationConfidence(
 | 
				
			||||||
          st.tool,
 | 
					          st.tool,
 | 
				
			||||||
          this.getEmbeddingsSimilarity(st.tool.name, context),
 | 
					          context,
 | 
				
			||||||
          this.checkDomainMatch(st.tool, context.userQuery),
 | 
					          st.taskRelevance || 70,
 | 
				
			||||||
          this.getMicroTaskAgreement(st.tool.name, context),
 | 
					          st.limitations || []
 | 
				
			||||||
          context
 | 
					 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        this.addAuditEntry(context, 'validation', 'confidence-scoring',
 | 
					        this.addAuditEntry(context, 'validation', 'confidence-scoring',
 | 
				
			||||||
@ -1278,7 +1365,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
 | 
				
			|||||||
          },
 | 
					          },
 | 
				
			||||||
          confidence.overall,
 | 
					          confidence.overall,
 | 
				
			||||||
          Date.now(),
 | 
					          Date.now(),
 | 
				
			||||||
          { strengthCount: confidence.strengthIndicators.length }
 | 
					          { strengthCount: confidence.strengthIndicators.length, limitationsCount: confidence.uncertaintyFactors.length }
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return {
 | 
					        return {
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user