enhancement 1: audit trail

2025-08-03 12:41:02 +02:00
parent 57c507915f
commit 6308c03709
6 changed files with 639 additions and 71 deletions
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -1,4 +1,4 @@
-// src/utils/aiPipeline.ts 
+// src/utils/aiPipeline.ts - Enhanced with Audit Trail System

 import { getCompressedToolsDataForAI } from './dataService.js';
 import { embeddingsService, type EmbeddingData } from './embeddings.js';
@@ -30,6 +30,19 @@ interface AnalysisResult {
  };
 }

+// NEW: Audit Trail Types
+interface AuditEntry {
+  timestamp: number;
+  phase: string;           // 'retrieval', 'selection', 'micro-task-N'
+  action: string;          // 'embeddings-search', 'ai-selection', 'tool-evaluation'
+  input: any;              // What went into this step
+  output: any;             // What came out of this step
+  confidence: number;      // 0-100: How confident we are in this step
+  processingTimeMs: number;
+  metadata: Record<string, any>; // Additional context
+}
+
+// Enhanced AnalysisContext with Audit Trail
 interface AnalysisContext {
  userQuery: string;
  mode: string;
@@ -47,6 +60,9 @@ interface AnalysisContext {
  backgroundKnowledge?: Array<{concept: any, relevance: string}>;
  
  seenToolNames: Set<string>;
+  
+  // NEW: Audit Trail
+  auditTrail: AuditEntry[];
 }

 class ImprovedMicroTaskAIPipeline {
@@ -58,6 +74,16 @@ class ImprovedMicroTaskAIPipeline {
  
  private maxContextTokens: number;
  private maxPromptTokens: number;
+  
+  // NEW: Audit Configuration
+  private auditConfig: {
+    enabled: boolean;
+    detailLevel: 'minimal' | 'standard' | 'verbose';
+    retentionHours: number;
+  };
+  
+  // NEW: Temporary audit storage for pre-context operations
+  private tempAuditEntries: AuditEntry[] = [];

  constructor() {
    this.config = {
@@ -73,6 +99,13 @@ class ImprovedMicroTaskAIPipeline {
    
    this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
    this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
+    
+    // NEW: Initialize Audit Configuration
+    this.auditConfig = {
+      enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
+      detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as any) || 'standard',
+      retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
+    };
  }

  private getEnv(key: string): string {
@@ -83,6 +116,94 @@ class ImprovedMicroTaskAIPipeline {
    return value;
  }

+  // NEW: Audit Trail Utility Functions
+  private addAuditEntry(
+    context: AnalysisContext | null, 
+    phase: string, 
+    action: string, 
+    input: any, 
+    output: any, 
+    confidence: number, 
+    startTime: number, 
+    metadata: Record<string, any> = {}
+  ): void {
+    if (!this.auditConfig.enabled) return;
+    
+    const auditEntry: AuditEntry = {
+      timestamp: Date.now(),
+      phase,
+      action, 
+      input: this.auditConfig.detailLevel === 'verbose' ? input : this.summarizeForAudit(input),
+      output: this.auditConfig.detailLevel === 'verbose' ? output : this.summarizeForAudit(output),
+      confidence,
+      processingTimeMs: Date.now() - startTime,
+      metadata
+    };
+    
+    if (context) {
+      context.auditTrail.push(auditEntry);
+    } else {
+      // Store in temporary array for later merging
+      this.tempAuditEntries.push(auditEntry);
+    }
+    
+    // Log for debugging when audit is enabled
+    console.log(`[AUDIT] ${phase}/${action}: ${confidence}% confidence, ${Date.now() - startTime}ms`);
+  }
+  
+  // NEW: Merge temporary audit entries into context
+  private mergeTemporaryAuditEntries(context: AnalysisContext): void {
+    if (!this.auditConfig.enabled || this.tempAuditEntries.length === 0) return;
+    
+    const entryCount = this.tempAuditEntries.length;
+    // Add temp entries to the beginning of the context audit trail
+    context.auditTrail.unshift(...this.tempAuditEntries);
+    this.tempAuditEntries = []; // Clear temp storage
+    
+    console.log(`[AUDIT] Merged ${entryCount} temporary audit entries into context`);
+  }
+
+  private summarizeForAudit(data: any): any {
+    if (this.auditConfig.detailLevel === 'minimal') {
+      if (typeof data === 'string' && data.length > 100) {
+        return data.slice(0, 100) + '...[truncated]';
+      }
+      if (Array.isArray(data) && data.length > 3) {
+        return [...data.slice(0, 3), `...[${data.length - 3} more items]`];
+      }
+    } else if (this.auditConfig.detailLevel === 'standard') {
+      if (typeof data === 'string' && data.length > 500) {
+        return data.slice(0, 500) + '...[truncated]';
+      }
+      if (Array.isArray(data) && data.length > 10) {
+        return [...data.slice(0, 10), `...[${data.length - 10} more items]`];
+      }
+    }
+    return data;
+  }
+
+  private calculateSelectionConfidence(result: any, candidateCount: number): number {
+    if (!result || !result.selectedTools) return 30;
+    
+    const selectionRatio = result.selectedTools.length / candidateCount;
+    const hasReasoning = result.reasoning && result.reasoning.length > 50;
+    
+    let confidence = 60; // Base confidence
+    
+    // Good selection ratio (not too many, not too few)
+    if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
+    else if (selectionRatio <= 0.05) confidence -= 10; // Too few
+    else confidence -= 15; // Too many
+    
+    // Has detailed reasoning
+    if (hasReasoning) confidence += 15;
+    
+    // Selected tools have good distribution
+    if (result.selectedConcepts && result.selectedConcepts.length > 0) confidence += 5;
+    
+    return Math.min(95, Math.max(25, confidence));
+  }
+
  private estimateTokens(text: string): number {
    return Math.ceil(text.length / 4); 
  }
@@ -140,6 +261,7 @@ class ImprovedMicroTaskAIPipeline {
    let selectionMethod = 'unknown';
    
    if (embeddingsService.isEnabled()) {
+      const embeddingsStart = Date.now();
      const similarItems = await embeddingsService.findSimilar(
        userQuery, 
        this.embeddingCandidates, 
@@ -168,6 +290,17 @@ class ImprovedMicroTaskAIPipeline {
        candidateConcepts = toolsData.concepts;
        selectionMethod = 'full_dataset';
      }
+      
+      // NEW: Add Audit Entry for Embeddings Search
+      if (this.auditConfig.enabled) {
+        this.addAuditEntry(null, 'retrieval', 'embeddings-search', 
+          { query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates }, 
+          { candidatesFound: similarItems.length, toolNames: Array.from(toolNames), conceptNames: Array.from(conceptNames) },
+          similarItems.length >= 15 ? 85 : 60, // Confidence based on result quality
+          embeddingsStart,
+          { selectionMethod, embeddingsEnabled: true }
+        );
+      }
    } else {
      console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
      candidateTools = toolsData.tools;
@@ -194,6 +327,8 @@ class ImprovedMicroTaskAIPipeline {
    mode: string,
    selectionMethod: string
  ) {
+    const selectionStart = Date.now();
+    
    const modeInstruction = mode === 'workflow' 
      ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases. Select 15-25 tools that cover the full investigation lifecycle.'
      : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem. Select 3-8 tools that are most relevant and effective.';
@@ -298,6 +433,18 @@ Respond with ONLY this JSON format:
      
      if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
        console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
+        
+        // NEW: Add Audit Entry for Failed Selection
+        if (this.auditConfig.enabled) {
+          this.addAuditEntry(null, 'selection', 'ai-tool-selection-failed',
+            { candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
+            { error: 'Invalid JSON structure', response: response.slice(0, 200) },
+            10, // Very low confidence
+            selectionStart,
+            { aiModel: this.config.model, selectionMethod }
+          );
+        }
+        
        throw new Error('AI selection failed to return valid tool selection');
      }

@@ -315,6 +462,24 @@ Respond with ONLY this JSON format:
      
      console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
      
+      // NEW: Add Audit Entry for Successful Selection
+      if (this.auditConfig.enabled) {
+        const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
+        
+        this.addAuditEntry(null, 'selection', 'ai-tool-selection',
+          { candidateCount: candidateTools.length, mode, promptLength: prompt.length },
+          { 
+            selectedToolCount: result.selectedTools.length, 
+            selectedConceptCount: result.selectedConcepts.length,
+            reasoning: result.reasoning?.slice(0, 200) + '...',
+            finalToolNames: selectedTools.map(t => t.name)
+          },
+          confidence,
+          selectionStart,
+          { aiModel: this.config.model, selectionMethod, promptTokens: this.estimateTokens(prompt) }
+        );
+      }
+      
      return {
        selectedTools,
        selectedConcepts
@@ -323,12 +488,25 @@ Respond with ONLY this JSON format:
    } catch (error) {
      console.error('[IMPROVED PIPELINE] AI selection failed:', error);
      
+      // NEW: Add Audit Entry for Selection Error
+      if (this.auditConfig.enabled) {
+        this.addAuditEntry(null, 'selection', 'ai-tool-selection-error',
+          { candidateCount: candidateTools.length, mode },
+          { error: error.message },
+          5, // Very low confidence
+          selectionStart,
+          { aiModel: this.config.model, selectionMethod }
+        );
+      }
+      
      console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
      return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
    }
  }

  private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
+    const emergencyStart = Date.now();
+    
    const queryLower = userQuery.toLowerCase();
    const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
    
@@ -354,6 +532,17 @@ Respond with ONLY this JSON format:
    
    console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
    
+    // NEW: Add Audit Entry for Emergency Selection
+    if (this.auditConfig.enabled) {
+      this.addAuditEntry(null, 'selection', 'emergency-keyword-selection',
+        { keywords: keywords.slice(0, 10), candidateCount: candidateTools.length },
+        { selectedCount: selectedTools.length, topScores: scoredTools.slice(0, 5).map(s => ({ name: s.tool.name, score: s.score })) },
+        40, // Moderate confidence for emergency selection
+        emergencyStart,
+        { selectionMethod: 'emergency_keyword' }
+      );
+    }
+    
    return {
      selectedTools,
      selectedConcepts: candidateConcepts.slice(0, 3)
@@ -382,21 +571,43 @@ Respond with ONLY this JSON format:
    try {
      const response = await this.callAI(contextPrompt, maxTokens);
      
-      return {
+      const result = {
        taskType: 'micro-task',
        content: response.trim(),
        processingTimeMs: Date.now() - startTime,
        success: true
      };
+      
+      // NEW: Add Audit Entry for Successful Micro-Task
+      this.addAuditEntry(context, 'micro-task', 'ai-analysis',
+        { promptLength: contextPrompt.length, maxTokens },
+        { responseLength: response.length, contentPreview: response.slice(0, 100) },
+        response.length > 50 ? 80 : 60, // Confidence based on response quality
+        startTime,
+        { aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
+      );
+      
+      return result;

    } catch (error) {
-      return {
+      const result = {
        taskType: 'micro-task',
        content: '',
        processingTimeMs: Date.now() - startTime,
        success: false,
        error: error.message
      };
+      
+      // NEW: Add Audit Entry for Failed Micro-Task
+      this.addAuditEntry(context, 'micro-task', 'ai-analysis-failed',
+        { promptLength: contextPrompt.length, maxTokens },
+        { error: error.message },
+        5, // Very low confidence
+        startTime,
+        { aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
+      );
+      
+      return result;
    }
  }

@@ -550,6 +761,15 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
            this.addToolToSelection(context, tool, phase.id, sel.priority, sel.justification);
          }
        });
+        
+        // NEW: Add audit entry for tool selection
+        this.addAuditEntry(context, 'micro-task', 'phase-tool-selection',
+          { phase: phase.id, availableTools: phaseTools.length },
+          { validSelections: validSelections.length, selectedTools: validSelections.map(s => s.toolName) },
+          validSelections.length > 0 ? 75 : 30,
+          Date.now() - result.processingTimeMs,
+          { phaseName: phase.name }
+        );
      }
    }
    
@@ -595,6 +815,15 @@ Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit d
          rank
        }
      }, 'evaluation', evaluation.suitability_score);
+      
+      // NEW: Add audit entry for tool evaluation
+      this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
+        { toolName: tool.name, rank },
+        { suitabilityScore: evaluation.suitability_score, hasExplanation: !!evaluation.detailed_explanation },
+        evaluation.suitability_score === 'high' ? 85 : evaluation.suitability_score === 'medium' ? 70 : 50,
+        Date.now() - result.processingTimeMs,
+        { toolType: tool.type }
+      );
    }
    
    return result;
@@ -644,6 +873,15 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
          concept: availableConcepts.find((c: any) => c.name === sel.conceptName),
          relevance: sel.relevance
        }));
+        
+        // NEW: Add audit entry for background knowledge selection
+        this.addAuditEntry(context, 'micro-task', 'background-knowledge-selection',
+          { availableConcepts: availableConcepts.length },
+          { selectedConcepts: context.backgroundKnowledge?.length || 0 },
+          context.backgroundKnowledge && context.backgroundKnowledge.length > 0 ? 75 : 40,
+          Date.now() - result.processingTimeMs,
+          {}
+        );
      }
    }
    
@@ -711,7 +949,10 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
    let completedTasks = 0;
    let failedTasks = 0;
    
-    console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity`);
+    // NEW: Clear any previous temporary audit entries
+    this.tempAuditEntries = [];
+    
+    console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);

    try {
      // Stage 1: Get intelligent candidates (embeddings + AI selection)
@@ -725,11 +966,25 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
        contextHistory: [],
        maxContextLength: this.maxContextTokens,
        currentContextLength: 0,
-        seenToolNames: new Set<string>()
+        seenToolNames: new Set<string>(),
+        // NEW: Initialize audit trail
+        auditTrail: []
      };

+      // NEW: Merge any temporary audit entries from pre-context operations
+      this.mergeTemporaryAuditEntries(context);
+
      console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);

+      // NEW: Add initial audit entry
+      this.addAuditEntry(context, 'initialization', 'pipeline-start',
+        { userQuery, mode, toolsDataLoaded: !!toolsData },
+        { candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length },
+        90, // High confidence for initialization
+        startTime,
+        { auditEnabled: this.auditConfig.enabled }
+      );
+
      // MICRO-TASK SEQUENCE
      
      // Task 1: Scenario/Problem Analysis
@@ -776,6 +1031,15 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
      // Build final recommendation
      const recommendation = this.buildRecommendation(context, mode, finalResult.content);

+      // NEW: Add final audit entry
+      this.addAuditEntry(context, 'completion', 'pipeline-end',
+        { completedTasks, failedTasks },
+        { finalRecommendation: !!recommendation, auditEntriesGenerated: context.auditTrail.length },
+        completedTasks > failedTasks ? 85 : 60,
+        startTime,
+        { totalProcessingTimeMs: Date.now() - startTime }
+      );
+
      const processingStats = {
        embeddingsUsed: embeddingsService.isEnabled(),
        candidatesFromEmbeddings: filteredData.tools.length,
@@ -789,14 +1053,23 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo

      console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
      console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
+      console.log(`[IMPROVED PIPELINE] Audit trail entries: ${context.auditTrail.length}`);

      return {
-        recommendation,
+        recommendation: {
+          ...recommendation,
+          // NEW: Include audit trail in response
+          auditTrail: this.auditConfig.enabled ? context.auditTrail : undefined
+        },
        processingStats
      };

    } catch (error) {
      console.error('[IMPROVED PIPELINE] Processing failed:', error);
+      
+      // NEW: Ensure temp audit entries are cleared even on error
+      this.tempAuditEntries = [];
+      
      throw error;
    }
  }