first draft enhancement 2

2025-08-05 13:03:33 +02:00
parent c267681e7d
commit 99117e8e7a
7 changed files with 714 additions and 67 deletions
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -3,6 +3,7 @@
 import { getCompressedToolsDataForAI } from './dataService.js';
 import { embeddingsService, type EmbeddingData } from './embeddings.js';
 import { AI_PROMPTS, getPrompt } from '../config/prompts.js';
+import { isToolHosted } from './toolHelpers.js';

 interface AIConfig {
  endpoint: string;
@@ -67,6 +68,16 @@ interface SimilarityResult extends EmbeddingData {
  similarity: number;
 }

+interface ConfidenceMetrics {
+  overall: number;           // 0-100: Combined confidence score
+  embeddingsQuality: number; // How well embeddings matched
+  domainAlignment: number;   // How well tools match scenario domain
+  consensus: number;         // How much micro-tasks agree
+  freshness: number;         // How recent/up-to-date the selection is
+  uncertaintyFactors: string[]; // What could make this wrong
+  strengthIndicators: string[]; // What makes this recommendation strong
+}
+

 class ImprovedMicroTaskAIPipeline {
  private config: AIConfig;
@@ -92,6 +103,16 @@ class ImprovedMicroTaskAIPipeline {
    detailLevel: 'minimal' | 'standard' | 'verbose';
    retentionHours: number;
  };
+
+  private confidenceConfig: {
+    embeddingsWeight: number;
+    consensusWeight: number;
+    domainMatchWeight: number;
+    freshnessWeight: number;
+    minimumThreshold: number;
+    mediumThreshold: number;
+    highThreshold: number;
+  };
  
  private tempAuditEntries: AuditEntry[] = [];

@@ -131,6 +152,21 @@ class ImprovedMicroTaskAIPipeline {
      noEmbeddingsLimits: `${this.noEmbeddingsToolLimit || 'unlimited'} tools, ${this.noEmbeddingsConceptLimit || 'unlimited'} concepts`,
      auditEnabled: this.auditConfig.enabled
    });
+
+    this.confidenceConfig = {
+      embeddingsWeight: parseFloat(process.env.CONFIDENCE_EMBEDDINGS_WEIGHT || '0.3'),
+      consensusWeight: parseFloat(process.env.CONFIDENCE_CONSENSUS_WEIGHT || '0.25'), 
+      domainMatchWeight: parseFloat(process.env.CONFIDENCE_DOMAIN_MATCH_WEIGHT || '0.25'),
+      freshnessWeight: parseFloat(process.env.CONFIDENCE_FRESHNESS_WEIGHT || '0.2'),
+      minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
+      mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10),
+      highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10)
+    };
+    
+    console.log('[AI PIPELINE] Confidence scoring enabled:', {
+      weights: `E:${this.confidenceConfig.embeddingsWeight} C:${this.confidenceConfig.consensusWeight} D:${this.confidenceConfig.domainMatchWeight} F:${this.confidenceConfig.freshnessWeight}`,
+      thresholds: `${this.confidenceConfig.minimumThreshold}/${this.confidenceConfig.mediumThreshold}/${this.confidenceConfig.highThreshold}`
+    });
  }

  private getEnv(key: string): string {
@@ -662,6 +698,40 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
    }
  }

+  private calculateRecommendationConfidence(
+    tool: any, 
+    embeddingsSimilarity: number,
+    domainMatch: boolean,
+    microTaskAgreement: number,
+    context: AnalysisContext
+  ): ConfidenceMetrics {
+    
+    const embeddingsQuality = Math.min(100, embeddingsSimilarity * 100 * 2); // Scale 0.5 similarity to 100%
+    const domainAlignment = domainMatch ? 90 : (tool.domains?.length > 0 ? 60 : 30);
+    const consensus = Math.min(100, microTaskAgreement * 100);
+    const freshness = this.calculateToolFreshness(tool);
+    
+    const overall = (
+      embeddingsQuality * this.confidenceConfig.embeddingsWeight +
+      domainAlignment * this.confidenceConfig.domainMatchWeight +
+      consensus * this.confidenceConfig.consensusWeight +
+      freshness * this.confidenceConfig.freshnessWeight
+    );
+
+    const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, overall);
+    const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall);
+
+    return {
+      overall: Math.round(overall),
+      embeddingsQuality: Math.round(embeddingsQuality),
+      domainAlignment: Math.round(domainAlignment), 
+      consensus: Math.round(consensus),
+      freshness: Math.round(freshness),
+      uncertaintyFactors,
+      strengthIndicators
+    };
+  }
+
  private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';
    const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
@@ -1010,6 +1080,124 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
    }
  }

+  private calculateToolFreshness(tool: any): number {
+    // Base freshness score
+    let freshness = 70; // Default for tools without specific freshness data
+    
+    // Boost for tools with knowledge base (more maintained)
+    if (tool.knowledgebase === true) freshness += 20;
+    
+    // Boost for hosted tools (actively maintained)
+    if (isToolHosted(tool)) freshness += 15;
+    
+    // Slight boost for open source (community maintained)
+    if (tool.license && tool.license !== 'Proprietary') freshness += 5;
+    
+    return Math.min(100, freshness);
+  }
+
+  private checkDomainMatch(tool: any, userQuery: string): boolean {
+    if (!tool.domains || tool.domains.length === 0) return false;
+    
+    const queryLower = userQuery.toLowerCase();
+    
+    const domainKeywordsEnv = process.env.CONFIDENCE_DOMAIN_KEYWORDS || '';
+    
+    const domainKeywords = domainKeywordsEnv.split('|').reduce((acc, pair) => {
+      const [domain, keywords] = pair.split(':');
+      acc[domain] = keywords.split(',');
+      return acc;
+    }, {});
+    
+    return tool.domains.some(domain => {
+      const keywords = domainKeywords[domain] || [domain.replace('-', ' ')];
+      return keywords.some(keyword => queryLower.includes(keyword));
+    });
+  }
+
+  private getMicroTaskAgreement(toolName: string, context: AnalysisContext): number {
+    // Check how many micro-tasks selected this tool
+    const microTaskEntries = context.auditTrail.filter(entry => 
+      entry.phase === 'micro-task' && 
+      entry.action.includes('selection') &&
+      entry.output && 
+      typeof entry.output === 'object' &&
+      Array.isArray(entry.output.selectedTools) &&
+      entry.output.selectedTools.includes(toolName)
+    );
+    
+    const totalMicroTasks = context.auditTrail.filter(entry => 
+      entry.phase === 'micro-task' && entry.action.includes('selection')
+    ).length;
+    
+    return totalMicroTasks > 0 ? microTaskEntries.length / totalMicroTasks : 0.8; // Default high agreement
+  }
+
+  private getEmbeddingsSimilarity(toolName: string, context: AnalysisContext): number {
+    // Extract similarity from audit trail embeddings entry
+    const embeddingsEntry = context.auditTrail.find(entry => 
+      entry.phase === 'retrieval' && entry.action === 'embeddings-search'
+    );
+    
+    if (!embeddingsEntry || !embeddingsEntry.output) return 0.5; // Default medium similarity
+    
+    // Look for similarity data in the output (implementation specific)
+    // This would need to be populated during embeddings search
+    return 0.7; // Placeholder - would need actual similarity data from embeddings
+  }
+
+  private identifyUncertaintyFactors(tool: any, context: AnalysisContext, confidence: number): string[] {
+    const factors: string[] = [];
+    
+    if (confidence < this.confidenceConfig.mediumThreshold) {
+      factors.push('Low overall confidence - consider manual validation');
+    }
+    
+    if (!this.checkDomainMatch(tool, context.userQuery)) {
+      factors.push('Domain mismatch detected - tool may not be specifically designed for this scenario');
+    }
+    
+    if (tool.skillLevel === 'expert' && /rapid|quick|urgent|triage/i.test(context.userQuery)) {
+      factors.push('Expert-level tool for rapid scenario - may be overcomplicated');
+    }
+    
+    if (tool.type === 'software' && !isToolHosted(tool) && !tool.url) {
+      factors.push('Limited access information - availability uncertain');
+    }
+    
+    if (tool.skillLevel === 'novice' && /complex|advanced|deep/i.test(context.userQuery)) {
+      factors.push('Novice-level tool for complex scenario - may lack required capabilities');
+    }
+    
+    return factors;
+  }
+
+  private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
+    const indicators: string[] = [];
+    
+    if (confidence >= this.confidenceConfig.highThreshold) {
+      indicators.push('High confidence recommendation based on multiple factors');
+    }
+    
+    if (this.checkDomainMatch(tool, context.userQuery)) {
+      indicators.push('Strong domain alignment with scenario requirements');
+    }
+    
+    if (tool.knowledgebase === true) {
+      indicators.push('Documentation and knowledge base available for guidance');
+    }
+    
+    if (isToolHosted(tool)) {
+      indicators.push('Hosted solution available for immediate access');
+    }
+    
+    if (tool.type === 'method' && /methodology|approach|process/i.test(context.userQuery)) {
+      indicators.push('Methodological approach matches procedural inquiry');
+    }
+    
+    return indicators;
+  }
+
  private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
    const isWorkflow = mode === 'workflow';
    
@@ -1025,20 +1213,71 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
    };

    if (isWorkflow) {
-      return {
-        ...base,
-        recommended_tools: context.selectedTools?.map(st => ({
+      const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
+        // Calculate confidence for each tool
+        const confidence = this.calculateRecommendationConfidence(
+          st.tool,
+          this.getEmbeddingsSimilarity(st.tool.name, context),
+          this.checkDomainMatch(st.tool, context.userQuery),
+          this.getMicroTaskAgreement(st.tool.name, context),
+          context
+        );
+        
+        // Add audit entry for confidence calculation
+        this.addAuditEntry(context, 'validation', 'confidence-scoring',
+          { toolName: st.tool.name, phase: st.phase },
+          { 
+            overall: confidence.overall,
+            components: {
+              embeddings: confidence.embeddingsQuality,
+              domain: confidence.domainAlignment,
+              consensus: confidence.consensus,
+              freshness: confidence.freshness
+            }
+          },
+          confidence.overall,
+          Date.now(),
+          { uncertaintyCount: confidence.uncertaintyFactors.length }
+        );
+
+        return {
          name: st.tool.name,
          phase: st.phase,
          priority: st.priority,
-          justification: st.justification || `Empfohlen für ${st.phase}`
-        })) || [],
+          justification: st.justification || `Empfohlen für ${st.phase}`,
+          confidence: confidence,
+          recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' : 
+                                confidence.overall >= this.confidenceConfig.mediumThreshold ? 'moderate' : 'weak'
+        };
+      }) || [];
+
+      return {
+        ...base,
+        recommended_tools: recommendedToolsWithConfidence,
        workflow_suggestion: finalContent
      };
    } else {
-      return {
-        ...base,
-        recommended_tools: context.selectedTools?.map(st => ({
+      const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
+        const confidence = this.calculateRecommendationConfidence(
+          st.tool,
+          this.getEmbeddingsSimilarity(st.tool.name, context),
+          this.checkDomainMatch(st.tool, context.userQuery),
+          this.getMicroTaskAgreement(st.tool.name, context),
+          context
+        );
+        
+        this.addAuditEntry(context, 'validation', 'confidence-scoring',
+          { toolName: st.tool.name, rank: st.tool.evaluation?.rank || 1 },
+          { 
+            overall: confidence.overall,
+            suitabilityAlignment: st.priority === 'high' && confidence.overall >= this.confidenceConfig.highThreshold
+          },
+          confidence.overall,
+          Date.now(),
+          { strengthCount: confidence.strengthIndicators.length }
+        );
+
+        return {
          name: st.tool.name,
          rank: st.tool.evaluation?.rank || 1,
          suitability_score: st.priority,
@@ -1046,8 +1285,16 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
          implementation_approach: st.tool.evaluation?.implementation_approach || '',
          pros: st.tool.evaluation?.pros || [],
          cons: st.tool.evaluation?.cons || [],
-          alternatives: st.tool.evaluation?.alternatives || ''
-        })) || [],
+          alternatives: st.tool.evaluation?.alternatives || '',
+          confidence: confidence,
+          recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' : 
+                                confidence.overall >= this.confidenceConfig.mediumThreshold ? 'moderate' : 'weak'
+        };
+      }) || [];
+
+      return {
+        ...base,
+        recommended_tools: recommendedToolsWithConfidence,
        additional_considerations: finalContent
      };
    }