fix audit trail
This commit is contained in:
		
							parent
							
								
									3d5d2506e9
								
							
						
					
					
						commit
						28af56d6ef
					
				@ -1131,10 +1131,55 @@ class AIQueryInterface {
 | 
			
		||||
    const lowConfidenceSteps = auditTrail.filter(entry => (entry.confidence || 0) < 60).length;
 | 
			
		||||
    const mediumConfidenceSteps = auditTrail.length - highConfidenceSteps - lowConfidenceSteps;
 | 
			
		||||
 | 
			
		||||
    // FIX 1: Count actual AI decision actions only
 | 
			
		||||
    const aiDecisionCount = auditTrail.filter(entry => entry.action === 'ai-decision').length;
 | 
			
		||||
    const embeddingsUsageCount = auditTrail.filter(entry => entry.metadata?.embeddingsUsed).length;
 | 
			
		||||
    
 | 
			
		||||
    // FIX 2: Count actual similarity search actions, not metadata flags
 | 
			
		||||
    const embeddingsUsageCount = auditTrail.filter(entry => entry.action === 'similarity-search').length;
 | 
			
		||||
    
 | 
			
		||||
    // FIX 3: Maintain tool selection count (this was correct)
 | 
			
		||||
    const toolSelectionCount = auditTrail.filter(entry => entry.action === 'selection-decision').length;
 | 
			
		||||
 | 
			
		||||
    // Additional diagnostic counts for debugging
 | 
			
		||||
    const microTaskCount = auditTrail.filter(entry => 
 | 
			
		||||
      entry.action === 'ai-decision' && entry.metadata?.microTaskType
 | 
			
		||||
    ).length;
 | 
			
		||||
    
 | 
			
		||||
    const phaseToolSelectionCount = auditTrail.filter(entry => 
 | 
			
		||||
      entry.action === 'phase-tool-selection'
 | 
			
		||||
    ).length;
 | 
			
		||||
    
 | 
			
		||||
    const phaseEnhancementCount = auditTrail.filter(entry => 
 | 
			
		||||
      entry.action === 'phase-enhancement'
 | 
			
		||||
    ).length;
 | 
			
		||||
 | 
			
		||||
    // Enhanced insights with diagnostic information
 | 
			
		||||
    const keyInsights = [];
 | 
			
		||||
    const potentialIssues = [];
 | 
			
		||||
    
 | 
			
		||||
    if (embeddingsUsageCount > 0) {
 | 
			
		||||
      keyInsights.push(`Semantische Suche wurde ${embeddingsUsageCount}x erfolgreich eingesetzt`);
 | 
			
		||||
    } else {
 | 
			
		||||
      potentialIssues.push('Keine semantischen Suchen dokumentiert - möglicherweise fehlerhafte Auditierung');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (aiDecisionCount >= 5) {
 | 
			
		||||
      keyInsights.push(`${aiDecisionCount} KI-Entscheidungen mit detaillierter Begründung`);
 | 
			
		||||
    } else {
 | 
			
		||||
      potentialIssues.push(`Nur ${aiDecisionCount} KI-Entscheidungen dokumentiert - erwartet mindestens 5 für Vollständigkeit`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (microTaskCount > 0) {
 | 
			
		||||
      keyInsights.push(`${microTaskCount} spezialisierte Micro-Task-Analysen durchgeführt`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Detect mode-specific patterns for validation
 | 
			
		||||
    if (phaseToolSelectionCount > 0 || phaseEnhancementCount > 0) {
 | 
			
		||||
      keyInsights.push('Workflow-Modus: Phasenspezifische Analyse durchgeführt');
 | 
			
		||||
    } else if (microTaskCount >= 3) {
 | 
			
		||||
      keyInsights.push('Tool-Modus: Detaillierte Einzelbewertungen durchgeführt');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const phaseBreakdown = {};
 | 
			
		||||
    auditTrail.forEach(entry => {
 | 
			
		||||
      const phase = entry.phase || 'unknown';
 | 
			
		||||
@ -1168,76 +1213,21 @@ class AIQueryInterface {
 | 
			
		||||
      analysisQuality = 'poor';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const keyInsights = [];
 | 
			
		||||
    const embeddingsUsed = auditTrail.some(e => e.metadata?.embeddingsUsed);
 | 
			
		||||
    if (embeddingsUsed) {
 | 
			
		||||
      keyInsights.push('Semantische Suche wurde erfolgreich eingesetzt');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const aiDecisionsWithReasoning = auditTrail.filter(e => 
 | 
			
		||||
      e.action === 'ai-decision' && e.metadata?.reasoning
 | 
			
		||||
    ).length;
 | 
			
		||||
    if (aiDecisionsWithReasoning > 0) {
 | 
			
		||||
      keyInsights.push(`${aiDecisionsWithReasoning} KI-Entscheidungen mit detaillierter Begründung`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (highConfidenceSteps > auditTrail.length * 0.7) {
 | 
			
		||||
      keyInsights.push('Mehrheit der Analyseschritte mit hoher Sicherheit');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const responseQualityEntries = auditTrail.filter(e => 
 | 
			
		||||
      e.metadata?.responseConfidence && e.metadata.finalConfidence
 | 
			
		||||
    );
 | 
			
		||||
    if (responseQualityEntries.length > 0) {
 | 
			
		||||
      const avgResponseQuality = responseQualityEntries.reduce((sum, e) => 
 | 
			
		||||
        sum + (e.metadata.responseConfidence || 0), 0
 | 
			
		||||
      ) / responseQualityEntries.length;
 | 
			
		||||
      
 | 
			
		||||
      if (avgResponseQuality >= 70) {
 | 
			
		||||
        keyInsights.push(`Hohe AI-Antwortqualität (∅ ${Math.round(avgResponseQuality)}%)`);
 | 
			
		||||
      }
 | 
			
		||||
    // Validate expected counts based on mode detection
 | 
			
		||||
    const isWorkflowMode = phaseToolSelectionCount > 0 || phaseEnhancementCount > 0;
 | 
			
		||||
    const expectedMinAI = isWorkflowMode ? 11 : 8; // Workflow: 5 common + 6 phase selections, Tool: 5 common + 3 evaluations
 | 
			
		||||
    const expectedMinEmbeddings = 1; // Both modes should have initial search
 | 
			
		||||
 | 
			
		||||
    if (aiDecisionCount < expectedMinAI) {
 | 
			
		||||
      potentialIssues.push(`${expectedMinAI - aiDecisionCount} fehlende KI-Entscheidungen für ${isWorkflowMode ? 'Workflow' : 'Tool'}-Modus`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const potentialIssues = [];
 | 
			
		||||
    if (lowConfidenceSteps > 2) {
 | 
			
		||||
      potentialIssues.push(`${lowConfidenceSteps} Analyseschritte mit niedriger Konfidenz`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // FIXED: Only detect actual AI incompleteness, not display truncation
 | 
			
		||||
    // The old code incorrectly flagged display truncation as incomplete responses:
 | 
			
		||||
    // OLD (WRONG): e.output.response && e.output.response.includes('...')
 | 
			
		||||
    
 | 
			
		||||
    // NEW (CORRECT): Check metadata.aiResponse for actual incompleteness
 | 
			
		||||
    const incompleteAIResponses = auditTrail.filter(e => 
 | 
			
		||||
      e.action === 'ai-decision' && 
 | 
			
		||||
      e.metadata?.aiResponse && 
 | 
			
		||||
      (
 | 
			
		||||
        // Detect actual AI incompleteness patterns:
 | 
			
		||||
        e.metadata.aiResponse.trim().length < 10 || // Very short response
 | 
			
		||||
        e.metadata.aiResponse.endsWith('...') || // AI itself truncated (rare but possible)
 | 
			
		||||
        e.metadata.aiResponse.includes('[TRUNCATED]') || // Explicit truncation marker
 | 
			
		||||
        e.metadata.aiResponse.includes('I cannot continue') || // AI stopped unexpectedly
 | 
			
		||||
        e.metadata.aiResponse.includes('I need to stop here') || // AI indicated incompleteness
 | 
			
		||||
        e.metadata.aiResponse.includes('[RESPONSE_TOO_LONG]') || // Length limit hit
 | 
			
		||||
        // Also check if the AI response seems cut off mid-sentence
 | 
			
		||||
        (e.metadata.aiResponse.length > 50 && 
 | 
			
		||||
        !e.metadata.aiResponse.trim().match(/[.!?:]$/)) // Doesn't end with proper punctuation
 | 
			
		||||
      )
 | 
			
		||||
    ).length;
 | 
			
		||||
    
 | 
			
		||||
    if (incompleteAIResponses > 0) {
 | 
			
		||||
      potentialIssues.push(`${incompleteAIResponses} möglicherweise unvollständige AI-Antworten`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Additional quality checks
 | 
			
		||||
    const veryShortResponses = auditTrail.filter(e => 
 | 
			
		||||
      e.action === 'ai-decision' && 
 | 
			
		||||
      e.metadata?.aiResponse && 
 | 
			
		||||
      e.metadata.aiResponse.trim().length < 20
 | 
			
		||||
    ).length;
 | 
			
		||||
    
 | 
			
		||||
    if (veryShortResponses > 1) {
 | 
			
		||||
      potentialIssues.push(`${veryShortResponses} ungewöhnlich kurze AI-Antworten`);
 | 
			
		||||
    if (embeddingsUsageCount < expectedMinEmbeddings) {
 | 
			
		||||
      potentialIssues.push(`${expectedMinEmbeddings - embeddingsUsageCount} fehlende semantische Suchen`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
@ -1260,7 +1250,14 @@ class AIQueryInterface {
 | 
			
		||||
      },
 | 
			
		||||
      analysisQuality,
 | 
			
		||||
      keyInsights,
 | 
			
		||||
      potentialIssues
 | 
			
		||||
      potentialIssues,
 | 
			
		||||
      // Debug information
 | 
			
		||||
      debugCounts: {
 | 
			
		||||
        microTaskCount,
 | 
			
		||||
        phaseToolSelectionCount, 
 | 
			
		||||
        phaseEnhancementCount,
 | 
			
		||||
        detectedMode: isWorkflowMode ? 'workflow' : 'tool'
 | 
			
		||||
      }
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1184,6 +1184,37 @@ class AIPipeline {
 | 
			
		||||
    try {
 | 
			
		||||
      const response = await aiService.callMicroTaskAI(contextPrompt);
 | 
			
		||||
      
 | 
			
		||||
      // FIX: Ensure ALL AI calls generate audit entries
 | 
			
		||||
      const toolsDataHash = getDataVersion?.() || 'unknown';
 | 
			
		||||
      const aiConfig = aiService.getConfig();
 | 
			
		||||
      
 | 
			
		||||
      // Calculate response confidence for audit trail
 | 
			
		||||
      const responseConfidence = auditService.calculateAIResponseConfidence(
 | 
			
		||||
        response.content,
 | 
			
		||||
        this.getExpectedLengthForTaskType(taskType),
 | 
			
		||||
        taskType
 | 
			
		||||
      );
 | 
			
		||||
      
 | 
			
		||||
      // FIX: Always add AI decision audit entry for micro-tasks
 | 
			
		||||
      auditService.addAIDecision(
 | 
			
		||||
        this.getPhaseForTaskType(taskType),
 | 
			
		||||
        prompt, // Store original prompt without context
 | 
			
		||||
        response.content,
 | 
			
		||||
        responseConfidence,
 | 
			
		||||
        this.getReasoningForTaskType(taskType, response.content),
 | 
			
		||||
        startTime,
 | 
			
		||||
        {
 | 
			
		||||
          toolsDataHash: toolsDataHash,
 | 
			
		||||
          microTaskType: taskType,
 | 
			
		||||
          aiModel: aiConfig.model,
 | 
			
		||||
          contextLength: contextPrompt.length,
 | 
			
		||||
          originalPromptLength: prompt.length,
 | 
			
		||||
          contextHistoryUsed: context.contextHistory.length > 0,
 | 
			
		||||
          decisionBasis: 'ai-analysis',
 | 
			
		||||
          ...response.usage
 | 
			
		||||
        }
 | 
			
		||||
      );
 | 
			
		||||
      
 | 
			
		||||
      return {
 | 
			
		||||
        taskType,
 | 
			
		||||
        content: response.content,
 | 
			
		||||
@ -1193,6 +1224,29 @@ class AIPipeline {
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      // FIX: Also audit failed AI calls for completeness
 | 
			
		||||
      auditService.addEntry(
 | 
			
		||||
        this.getPhaseForTaskType(taskType),
 | 
			
		||||
        'ai-decision-failed',
 | 
			
		||||
        { 
 | 
			
		||||
          prompt: prompt.slice(0, 200) + '...',
 | 
			
		||||
          taskType: taskType,
 | 
			
		||||
          error: error.message
 | 
			
		||||
        },
 | 
			
		||||
        { 
 | 
			
		||||
          error: error.message,
 | 
			
		||||
          success: false
 | 
			
		||||
        },
 | 
			
		||||
        0, // Zero confidence for failed calls
 | 
			
		||||
        startTime,
 | 
			
		||||
        {
 | 
			
		||||
          toolsDataHash: getDataVersion?.() || 'unknown',
 | 
			
		||||
          microTaskType: taskType,
 | 
			
		||||
          failed: true,
 | 
			
		||||
          decisionBasis: 'ai-analysis'
 | 
			
		||||
        }
 | 
			
		||||
      );
 | 
			
		||||
      
 | 
			
		||||
      return {
 | 
			
		||||
        taskType,
 | 
			
		||||
        content: '',
 | 
			
		||||
@ -1203,6 +1257,51 @@ class AIPipeline {
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private getPhaseForTaskType(taskType: string): string {
 | 
			
		||||
    const phaseMap: Record<string, string> = {
 | 
			
		||||
      'scenario-analysis': 'contextual-analysis',
 | 
			
		||||
      'investigation-approach': 'contextual-analysis',
 | 
			
		||||
      'critical-considerations': 'contextual-analysis',
 | 
			
		||||
      'tool-evaluation': 'tool-evaluation',
 | 
			
		||||
      'background-knowledge': 'knowledge-synthesis',
 | 
			
		||||
      'final-recommendations': 'synthesis',
 | 
			
		||||
      'phase-completion-selection': 'phase-completion',
 | 
			
		||||
      'phase-completion-reasoning': 'phase-completion'
 | 
			
		||||
    };
 | 
			
		||||
    return phaseMap[taskType] || 'contextual-analysis';
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private getExpectedLengthForTaskType(taskType: string): { min: number; max: number } {
 | 
			
		||||
    const lengthMap: Record<string, { min: number; max: number }> = {
 | 
			
		||||
      'scenario-analysis': { min: 100, max: 500 },
 | 
			
		||||
      'investigation-approach': { min: 100, max: 400 },
 | 
			
		||||
      'critical-considerations': { min: 80, max: 300 },
 | 
			
		||||
      'tool-evaluation': { min: 200, max: 800 },
 | 
			
		||||
      'background-knowledge': { min: 50, max: 300 },
 | 
			
		||||
      'final-recommendations': { min: 150, max: 600 },
 | 
			
		||||
      'phase-completion-selection': { min: 50, max: 200 },
 | 
			
		||||
      'phase-completion-reasoning': { min: 100, max: 300 }
 | 
			
		||||
    };
 | 
			
		||||
    return lengthMap[taskType] || { min: 50, max: 300 };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private getReasoningForTaskType(taskType: string, response: string): string {
 | 
			
		||||
    const responseLength = response.length;
 | 
			
		||||
    const taskNames: Record<string, string> = {
 | 
			
		||||
      'scenario-analysis': 'Szenario-Analyse',
 | 
			
		||||
      'investigation-approach': 'Untersuchungsansatz',
 | 
			
		||||
      'critical-considerations': 'Kritische Überlegungen',
 | 
			
		||||
      'tool-evaluation': 'Tool-Bewertung',
 | 
			
		||||
      'background-knowledge': 'Hintergrundwissen-Auswahl',
 | 
			
		||||
      'final-recommendations': 'Abschließende Empfehlungen',
 | 
			
		||||
      'phase-completion-selection': 'Phasen-Vervollständigung',
 | 
			
		||||
      'phase-completion-reasoning': 'Phasen-Begründung'
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    const taskName = taskNames[taskType] || taskType;
 | 
			
		||||
    return `KI generierte ${taskName} (${responseLength} Zeichen) - forensisch fundierte Analyse mit methodischer Begründung`;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private addToContextHistory(context: PipelineContext, newEntry: string): void {
 | 
			
		||||
    const entryTokens = aiService.estimateTokens(newEntry);
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
@ -99,6 +99,9 @@ class ToolSelector {
 | 
			
		||||
    
 | 
			
		||||
    console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
 | 
			
		||||
    
 | 
			
		||||
    // FIX: Record the start time for audit trail
 | 
			
		||||
    const embeddingsSearchStart = Date.now();
 | 
			
		||||
    
 | 
			
		||||
    const similarItems = await embeddingsService.findSimilar(
 | 
			
		||||
      userQuery,
 | 
			
		||||
      this.config.embeddingCandidates,
 | 
			
		||||
@ -107,6 +110,27 @@ class ToolSelector {
 | 
			
		||||
    
 | 
			
		||||
    console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
 | 
			
		||||
    
 | 
			
		||||
    // FIX: Import and use auditService to record this embeddings search
 | 
			
		||||
    const { auditService } = await import('./auditService.js');
 | 
			
		||||
    const { getDataVersion } = await import('./dataService.js');
 | 
			
		||||
    
 | 
			
		||||
    const toolsDataHash = getDataVersion() || 'unknown';
 | 
			
		||||
    
 | 
			
		||||
    // FIX: Add audit entry for initial embeddings search that happens in BOTH modes
 | 
			
		||||
    auditService.addEmbeddingsSearch(
 | 
			
		||||
      userQuery,
 | 
			
		||||
      similarItems,
 | 
			
		||||
      this.config.similarityThreshold,
 | 
			
		||||
      embeddingsSearchStart,
 | 
			
		||||
      {
 | 
			
		||||
        toolsDataHash: toolsDataHash,
 | 
			
		||||
        selectionPhase: 'initial-candidate-selection',
 | 
			
		||||
        candidateLimit: this.config.embeddingCandidates,
 | 
			
		||||
        mode: mode,
 | 
			
		||||
        reasoning: `Initiale semantische Suche für ${mode}-Modus - Reduzierung der ${toolsData.tools.length} verfügbaren Tools auf ${similarItems.length} relevante Kandidaten`
 | 
			
		||||
      }
 | 
			
		||||
    );
 | 
			
		||||
    
 | 
			
		||||
    similarItems.forEach(item => {
 | 
			
		||||
      context.embeddingsSimilarities.set(item.name, item.similarity);
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user