fix audit trail

2025-08-18 00:08:57 +02:00
parent 3d5d2506e9
commit 28af56d6ef
3 changed files with 186 additions and 66 deletions
--- a/src/components/AIQueryInterface.astro
+++ b/src/components/AIQueryInterface.astro
@@ -1131,10 +1131,55 @@ class AIQueryInterface {
    const lowConfidenceSteps = auditTrail.filter(entry => (entry.confidence || 0) < 60).length;
    const mediumConfidenceSteps = auditTrail.length - highConfidenceSteps - lowConfidenceSteps;
    // FIX 1: Count actual AI decision actions only
    const aiDecisionCount = auditTrail.filter(entry => entry.action === 'ai-decision').length;
-    const embeddingsUsageCount = auditTrail.filter(entry => entry.metadata?.embeddingsUsed).length;
+    
    // FIX 2: Count actual similarity search actions, not metadata flags
    const embeddingsUsageCount = auditTrail.filter(entry => entry.action === 'similarity-search').length;
    // FIX 3: Maintain tool selection count (this was correct)
    const toolSelectionCount = auditTrail.filter(entry => entry.action === 'selection-decision').length;
    // Additional diagnostic counts for debugging
    const microTaskCount = auditTrail.filter(entry => 
      entry.action === 'ai-decision' && entry.metadata?.microTaskType
    ).length;
    const phaseToolSelectionCount = auditTrail.filter(entry => 
      entry.action === 'phase-tool-selection'
    ).length;
    const phaseEnhancementCount = auditTrail.filter(entry => 
      entry.action === 'phase-enhancement'
    ).length;
    // Enhanced insights with diagnostic information
    const keyInsights = [];
    const potentialIssues = [];
    if (embeddingsUsageCount > 0) {
      keyInsights.push(`Semantische Suche wurde ${embeddingsUsageCount}x erfolgreich eingesetzt`);
    } else {
      potentialIssues.push('Keine semantischen Suchen dokumentiert - möglicherweise fehlerhafte Auditierung');
    }
    if (aiDecisionCount >= 5) {
      keyInsights.push(`${aiDecisionCount} KI-Entscheidungen mit detaillierter Begründung`);
    } else {
      potentialIssues.push(`Nur ${aiDecisionCount} KI-Entscheidungen dokumentiert - erwartet mindestens 5 für Vollständigkeit`);
    }
    if (microTaskCount > 0) {
      keyInsights.push(`${microTaskCount} spezialisierte Micro-Task-Analysen durchgeführt`);
    }
    // Detect mode-specific patterns for validation
    if (phaseToolSelectionCount > 0 || phaseEnhancementCount > 0) {
      keyInsights.push('Workflow-Modus: Phasenspezifische Analyse durchgeführt');
    } else if (microTaskCount >= 3) {
      keyInsights.push('Tool-Modus: Detaillierte Einzelbewertungen durchgeführt');
    }
    const phaseBreakdown = {};
    auditTrail.forEach(entry => {
      const phase = entry.phase || 'unknown';
@@ -1168,76 +1213,21 @@ class AIQueryInterface {
      analysisQuality = 'poor';
    }
    const keyInsights = [];
    const embeddingsUsed = auditTrail.some(e => e.metadata?.embeddingsUsed);
    if (embeddingsUsed) {
      keyInsights.push('Semantische Suche wurde erfolgreich eingesetzt');
    }
    const aiDecisionsWithReasoning = auditTrail.filter(e => 
      e.action === 'ai-decision' && e.metadata?.reasoning
    ).length;
    if (aiDecisionsWithReasoning > 0) {
      keyInsights.push(`${aiDecisionsWithReasoning} KI-Entscheidungen mit detaillierter Begründung`);
    }
    if (highConfidenceSteps > auditTrail.length * 0.7) {
      keyInsights.push('Mehrheit der Analyseschritte mit hoher Sicherheit');
    }
-    const responseQualityEntries = auditTrail.filter(e => 
+    // Validate expected counts based on mode detection
-      e.metadata?.responseConfidence && e.metadata.finalConfidence
+    const isWorkflowMode = phaseToolSelectionCount > 0 || phaseEnhancementCount > 0;
-    );
+    const expectedMinAI = isWorkflowMode ? 11 : 8; // Workflow: 5 common + 6 phase selections, Tool: 5 common + 3 evaluations
-    if (responseQualityEntries.length > 0) {
+    const expectedMinEmbeddings = 1; // Both modes should have initial search
      const avgResponseQuality = responseQualityEntries.reduce((sum, e) => 
        sum + (e.metadata.responseConfidence || 0), 0
      ) / responseQualityEntries.length;
-      if (avgResponseQuality >= 70) {
+    if (aiDecisionCount < expectedMinAI) {
-        keyInsights.push(`Hohe AI-Antwortqualität (∅ ${Math.round(avgResponseQuality)}%)`);
+      potentialIssues.push(`${expectedMinAI - aiDecisionCount} fehlende KI-Entscheidungen für ${isWorkflowMode ? 'Workflow' : 'Tool'}-Modus`);
      }
    }
-    const potentialIssues = [];
+    if (embeddingsUsageCount < expectedMinEmbeddings) {
-    if (lowConfidenceSteps > 2) {
+      potentialIssues.push(`${expectedMinEmbeddings - embeddingsUsageCount} fehlende semantische Suchen`);
      potentialIssues.push(`${lowConfidenceSteps} Analyseschritte mit niedriger Konfidenz`);
    }
    // FIXED: Only detect actual AI incompleteness, not display truncation
    // The old code incorrectly flagged display truncation as incomplete responses:
    // OLD (WRONG): e.output.response && e.output.response.includes('...')
    // NEW (CORRECT): Check metadata.aiResponse for actual incompleteness
    const incompleteAIResponses = auditTrail.filter(e => 
      e.action === 'ai-decision' && 
      e.metadata?.aiResponse && 
      (
        // Detect actual AI incompleteness patterns:
        e.metadata.aiResponse.trim().length < 10 || // Very short response
        e.metadata.aiResponse.endsWith('...') || // AI itself truncated (rare but possible)
        e.metadata.aiResponse.includes('[TRUNCATED]') || // Explicit truncation marker
        e.metadata.aiResponse.includes('I cannot continue') || // AI stopped unexpectedly
        e.metadata.aiResponse.includes('I need to stop here') || // AI indicated incompleteness
        e.metadata.aiResponse.includes('[RESPONSE_TOO_LONG]') || // Length limit hit
        // Also check if the AI response seems cut off mid-sentence
        (e.metadata.aiResponse.length > 50 && 
        !e.metadata.aiResponse.trim().match(/[.!?:]$/)) // Doesn't end with proper punctuation
      )
    ).length;
    if (incompleteAIResponses > 0) {
      potentialIssues.push(`${incompleteAIResponses} möglicherweise unvollständige AI-Antworten`);
    }
    // Additional quality checks
    const veryShortResponses = auditTrail.filter(e => 
      e.action === 'ai-decision' && 
      e.metadata?.aiResponse && 
      e.metadata.aiResponse.trim().length < 20
    ).length;
    if (veryShortResponses > 1) {
      potentialIssues.push(`${veryShortResponses} ungewöhnlich kurze AI-Antworten`);
    }
    return {
@@ -1260,7 +1250,14 @@ class AIQueryInterface {
      },
      analysisQuality,
      keyInsights,
-      potentialIssues
+      potentialIssues,
      // Debug information
      debugCounts: {
        microTaskCount,
        phaseToolSelectionCount, 
        phaseEnhancementCount,
        detectedMode: isWorkflowMode ? 'workflow' : 'tool'
      }
    };
  }
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -1184,6 +1184,37 @@ class AIPipeline {
    try {
      const response = await aiService.callMicroTaskAI(contextPrompt);
      // FIX: Ensure ALL AI calls generate audit entries
      const toolsDataHash = getDataVersion?.() || 'unknown';
      const aiConfig = aiService.getConfig();
      // Calculate response confidence for audit trail
      const responseConfidence = auditService.calculateAIResponseConfidence(
        response.content,
        this.getExpectedLengthForTaskType(taskType),
        taskType
      );
      // FIX: Always add AI decision audit entry for micro-tasks
      auditService.addAIDecision(
        this.getPhaseForTaskType(taskType),
        prompt, // Store original prompt without context
        response.content,
        responseConfidence,
        this.getReasoningForTaskType(taskType, response.content),
        startTime,
        {
          toolsDataHash: toolsDataHash,
          microTaskType: taskType,
          aiModel: aiConfig.model,
          contextLength: contextPrompt.length,
          originalPromptLength: prompt.length,
          contextHistoryUsed: context.contextHistory.length > 0,
          decisionBasis: 'ai-analysis',
          ...response.usage
        }
      );
      return {
        taskType,
        content: response.content,
@@ -1193,6 +1224,29 @@ class AIPipeline {
      };
    } catch (error) {
      // FIX: Also audit failed AI calls for completeness
      auditService.addEntry(
        this.getPhaseForTaskType(taskType),
        'ai-decision-failed',
        { 
          prompt: prompt.slice(0, 200) + '...',
          taskType: taskType,
          error: error.message
        },
        { 
          error: error.message,
          success: false
        },
        0, // Zero confidence for failed calls
        startTime,
        {
          toolsDataHash: getDataVersion?.() || 'unknown',
          microTaskType: taskType,
          failed: true,
          decisionBasis: 'ai-analysis'
        }
      );
      return {
        taskType,
        content: '',
@@ -1203,6 +1257,51 @@ class AIPipeline {
    }
  }
  private getPhaseForTaskType(taskType: string): string {
    const phaseMap: Record<string, string> = {
      'scenario-analysis': 'contextual-analysis',
      'investigation-approach': 'contextual-analysis',
      'critical-considerations': 'contextual-analysis',
      'tool-evaluation': 'tool-evaluation',
      'background-knowledge': 'knowledge-synthesis',
      'final-recommendations': 'synthesis',
      'phase-completion-selection': 'phase-completion',
      'phase-completion-reasoning': 'phase-completion'
    };
    return phaseMap[taskType] || 'contextual-analysis';
  }
  private getExpectedLengthForTaskType(taskType: string): { min: number; max: number } {
    const lengthMap: Record<string, { min: number; max: number }> = {
      'scenario-analysis': { min: 100, max: 500 },
      'investigation-approach': { min: 100, max: 400 },
      'critical-considerations': { min: 80, max: 300 },
      'tool-evaluation': { min: 200, max: 800 },
      'background-knowledge': { min: 50, max: 300 },
      'final-recommendations': { min: 150, max: 600 },
      'phase-completion-selection': { min: 50, max: 200 },
      'phase-completion-reasoning': { min: 100, max: 300 }
    };
    return lengthMap[taskType] || { min: 50, max: 300 };
  }
  private getReasoningForTaskType(taskType: string, response: string): string {
    const responseLength = response.length;
    const taskNames: Record<string, string> = {
      'scenario-analysis': 'Szenario-Analyse',
      'investigation-approach': 'Untersuchungsansatz',
      'critical-considerations': 'Kritische Überlegungen',
      'tool-evaluation': 'Tool-Bewertung',
      'background-knowledge': 'Hintergrundwissen-Auswahl',
      'final-recommendations': 'Abschließende Empfehlungen',
      'phase-completion-selection': 'Phasen-Vervollständigung',
      'phase-completion-reasoning': 'Phasen-Begründung'
    };
    const taskName = taskNames[taskType] || taskType;
    return `KI generierte ${taskName} (${responseLength} Zeichen) - forensisch fundierte Analyse mit methodischer Begründung`;
  }
  private addToContextHistory(context: PipelineContext, newEntry: string): void {
    const entryTokens = aiService.estimateTokens(newEntry);
--- a/src/utils/toolSelector.ts
+++ b/src/utils/toolSelector.ts
@@ -99,6 +99,9 @@ class ToolSelector {
    console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
    // FIX: Record the start time for audit trail
    const embeddingsSearchStart = Date.now();
    const similarItems = await embeddingsService.findSimilar(
      userQuery,
      this.config.embeddingCandidates,
@@ -107,6 +110,27 @@ class ToolSelector {
    console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
    // FIX: Import and use auditService to record this embeddings search
    const { auditService } = await import('./auditService.js');
    const { getDataVersion } = await import('./dataService.js');
    const toolsDataHash = getDataVersion() || 'unknown';
    // FIX: Add audit entry for initial embeddings search that happens in BOTH modes
    auditService.addEmbeddingsSearch(
      userQuery,
      similarItems,
      this.config.similarityThreshold,
      embeddingsSearchStart,
      {
        toolsDataHash: toolsDataHash,
        selectionPhase: 'initial-candidate-selection',
        candidateLimit: this.config.embeddingCandidates,
        mode: mode,
        reasoning: `Initiale semantische Suche für ${mode}-Modus - Reduzierung der ${toolsData.tools.length} verfügbaren Tools auf ${similarItems.length} relevante Kandidaten`
      }
    );
    similarItems.forEach(item => {
      context.embeddingsSimilarities.set(item.name, item.similarity);
    });