enhancement 1: audit trail

This commit is contained in:
overcuriousity
2025-08-03 12:41:02 +02:00
parent 57c507915f
commit 6308c03709
6 changed files with 639 additions and 71 deletions

View File

@@ -1,4 +1,4 @@
// src/utils/aiPipeline.ts
// src/utils/aiPipeline.ts - Enhanced with Audit Trail System
import { getCompressedToolsDataForAI } from './dataService.js';
import { embeddingsService, type EmbeddingData } from './embeddings.js';
@@ -30,6 +30,19 @@ interface AnalysisResult {
};
}
// NEW: Audit Trail Types
interface AuditEntry {
timestamp: number;
phase: string; // 'retrieval', 'selection', 'micro-task-N'
action: string; // 'embeddings-search', 'ai-selection', 'tool-evaluation'
input: any; // What went into this step
output: any; // What came out of this step
confidence: number; // 0-100: How confident we are in this step
processingTimeMs: number;
metadata: Record<string, any>; // Additional context
}
// Enhanced AnalysisContext with Audit Trail
interface AnalysisContext {
userQuery: string;
mode: string;
@@ -47,6 +60,9 @@ interface AnalysisContext {
backgroundKnowledge?: Array<{concept: any, relevance: string}>;
seenToolNames: Set<string>;
// NEW: Audit Trail
auditTrail: AuditEntry[];
}
class ImprovedMicroTaskAIPipeline {
@@ -58,6 +74,16 @@ class ImprovedMicroTaskAIPipeline {
private maxContextTokens: number;
private maxPromptTokens: number;
// NEW: Audit Configuration
private auditConfig: {
enabled: boolean;
detailLevel: 'minimal' | 'standard' | 'verbose';
retentionHours: number;
};
// NEW: Temporary audit storage for pre-context operations
private tempAuditEntries: AuditEntry[] = [];
constructor() {
this.config = {
@@ -73,6 +99,13 @@ class ImprovedMicroTaskAIPipeline {
this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
// NEW: Initialize Audit Configuration
this.auditConfig = {
enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as any) || 'standard',
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
};
}
private getEnv(key: string): string {
@@ -83,6 +116,94 @@ class ImprovedMicroTaskAIPipeline {
return value;
}
// NEW: Audit Trail Utility Functions
private addAuditEntry(
context: AnalysisContext | null,
phase: string,
action: string,
input: any,
output: any,
confidence: number,
startTime: number,
metadata: Record<string, any> = {}
): void {
if (!this.auditConfig.enabled) return;
const auditEntry: AuditEntry = {
timestamp: Date.now(),
phase,
action,
input: this.auditConfig.detailLevel === 'verbose' ? input : this.summarizeForAudit(input),
output: this.auditConfig.detailLevel === 'verbose' ? output : this.summarizeForAudit(output),
confidence,
processingTimeMs: Date.now() - startTime,
metadata
};
if (context) {
context.auditTrail.push(auditEntry);
} else {
// Store in temporary array for later merging
this.tempAuditEntries.push(auditEntry);
}
// Log for debugging when audit is enabled
console.log(`[AUDIT] ${phase}/${action}: ${confidence}% confidence, ${Date.now() - startTime}ms`);
}
// NEW: Merge temporary audit entries into context
private mergeTemporaryAuditEntries(context: AnalysisContext): void {
if (!this.auditConfig.enabled || this.tempAuditEntries.length === 0) return;
const entryCount = this.tempAuditEntries.length;
// Add temp entries to the beginning of the context audit trail
context.auditTrail.unshift(...this.tempAuditEntries);
this.tempAuditEntries = []; // Clear temp storage
console.log(`[AUDIT] Merged ${entryCount} temporary audit entries into context`);
}
private summarizeForAudit(data: any): any {
if (this.auditConfig.detailLevel === 'minimal') {
if (typeof data === 'string' && data.length > 100) {
return data.slice(0, 100) + '...[truncated]';
}
if (Array.isArray(data) && data.length > 3) {
return [...data.slice(0, 3), `...[${data.length - 3} more items]`];
}
} else if (this.auditConfig.detailLevel === 'standard') {
if (typeof data === 'string' && data.length > 500) {
return data.slice(0, 500) + '...[truncated]';
}
if (Array.isArray(data) && data.length > 10) {
return [...data.slice(0, 10), `...[${data.length - 10} more items]`];
}
}
return data;
}
private calculateSelectionConfidence(result: any, candidateCount: number): number {
if (!result || !result.selectedTools) return 30;
const selectionRatio = result.selectedTools.length / candidateCount;
const hasReasoning = result.reasoning && result.reasoning.length > 50;
let confidence = 60; // Base confidence
// Good selection ratio (not too many, not too few)
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
else if (selectionRatio <= 0.05) confidence -= 10; // Too few
else confidence -= 15; // Too many
// Has detailed reasoning
if (hasReasoning) confidence += 15;
// Selected tools have good distribution
if (result.selectedConcepts && result.selectedConcepts.length > 0) confidence += 5;
return Math.min(95, Math.max(25, confidence));
}
private estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
@@ -140,6 +261,7 @@ class ImprovedMicroTaskAIPipeline {
let selectionMethod = 'unknown';
if (embeddingsService.isEnabled()) {
const embeddingsStart = Date.now();
const similarItems = await embeddingsService.findSimilar(
userQuery,
this.embeddingCandidates,
@@ -168,6 +290,17 @@ class ImprovedMicroTaskAIPipeline {
candidateConcepts = toolsData.concepts;
selectionMethod = 'full_dataset';
}
// NEW: Add Audit Entry for Embeddings Search
if (this.auditConfig.enabled) {
this.addAuditEntry(null, 'retrieval', 'embeddings-search',
{ query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates },
{ candidatesFound: similarItems.length, toolNames: Array.from(toolNames), conceptNames: Array.from(conceptNames) },
similarItems.length >= 15 ? 85 : 60, // Confidence based on result quality
embeddingsStart,
{ selectionMethod, embeddingsEnabled: true }
);
}
} else {
console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
candidateTools = toolsData.tools;
@@ -194,6 +327,8 @@ class ImprovedMicroTaskAIPipeline {
mode: string,
selectionMethod: string
) {
const selectionStart = Date.now();
const modeInstruction = mode === 'workflow'
? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases. Select 15-25 tools that cover the full investigation lifecycle.'
: 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem. Select 3-8 tools that are most relevant and effective.';
@@ -298,6 +433,18 @@ Respond with ONLY this JSON format:
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
// NEW: Add Audit Entry for Failed Selection
if (this.auditConfig.enabled) {
this.addAuditEntry(null, 'selection', 'ai-tool-selection-failed',
{ candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
{ error: 'Invalid JSON structure', response: response.slice(0, 200) },
10, // Very low confidence
selectionStart,
{ aiModel: this.config.model, selectionMethod }
);
}
throw new Error('AI selection failed to return valid tool selection');
}
@@ -315,6 +462,24 @@ Respond with ONLY this JSON format:
console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
// NEW: Add Audit Entry for Successful Selection
if (this.auditConfig.enabled) {
const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
this.addAuditEntry(null, 'selection', 'ai-tool-selection',
{ candidateCount: candidateTools.length, mode, promptLength: prompt.length },
{
selectedToolCount: result.selectedTools.length,
selectedConceptCount: result.selectedConcepts.length,
reasoning: result.reasoning?.slice(0, 200) + '...',
finalToolNames: selectedTools.map(t => t.name)
},
confidence,
selectionStart,
{ aiModel: this.config.model, selectionMethod, promptTokens: this.estimateTokens(prompt) }
);
}
return {
selectedTools,
selectedConcepts
@@ -323,12 +488,25 @@ Respond with ONLY this JSON format:
} catch (error) {
console.error('[IMPROVED PIPELINE] AI selection failed:', error);
// NEW: Add Audit Entry for Selection Error
if (this.auditConfig.enabled) {
this.addAuditEntry(null, 'selection', 'ai-tool-selection-error',
{ candidateCount: candidateTools.length, mode },
{ error: error.message },
5, // Very low confidence
selectionStart,
{ aiModel: this.config.model, selectionMethod }
);
}
console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
}
}
private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
const emergencyStart = Date.now();
const queryLower = userQuery.toLowerCase();
const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
@@ -354,6 +532,17 @@ Respond with ONLY this JSON format:
console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
// NEW: Add Audit Entry for Emergency Selection
if (this.auditConfig.enabled) {
this.addAuditEntry(null, 'selection', 'emergency-keyword-selection',
{ keywords: keywords.slice(0, 10), candidateCount: candidateTools.length },
{ selectedCount: selectedTools.length, topScores: scoredTools.slice(0, 5).map(s => ({ name: s.tool.name, score: s.score })) },
40, // Moderate confidence for emergency selection
emergencyStart,
{ selectionMethod: 'emergency_keyword' }
);
}
return {
selectedTools,
selectedConcepts: candidateConcepts.slice(0, 3)
@@ -382,21 +571,43 @@ Respond with ONLY this JSON format:
try {
const response = await this.callAI(contextPrompt, maxTokens);
return {
const result = {
taskType: 'micro-task',
content: response.trim(),
processingTimeMs: Date.now() - startTime,
success: true
};
// NEW: Add Audit Entry for Successful Micro-Task
this.addAuditEntry(context, 'micro-task', 'ai-analysis',
{ promptLength: contextPrompt.length, maxTokens },
{ responseLength: response.length, contentPreview: response.slice(0, 100) },
response.length > 50 ? 80 : 60, // Confidence based on response quality
startTime,
{ aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
);
return result;
} catch (error) {
return {
const result = {
taskType: 'micro-task',
content: '',
processingTimeMs: Date.now() - startTime,
success: false,
error: error.message
};
// NEW: Add Audit Entry for Failed Micro-Task
this.addAuditEntry(context, 'micro-task', 'ai-analysis-failed',
{ promptLength: contextPrompt.length, maxTokens },
{ error: error.message },
5, // Very low confidence
startTime,
{ aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
);
return result;
}
}
@@ -550,6 +761,15 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
this.addToolToSelection(context, tool, phase.id, sel.priority, sel.justification);
}
});
// NEW: Add audit entry for tool selection
this.addAuditEntry(context, 'micro-task', 'phase-tool-selection',
{ phase: phase.id, availableTools: phaseTools.length },
{ validSelections: validSelections.length, selectedTools: validSelections.map(s => s.toolName) },
validSelections.length > 0 ? 75 : 30,
Date.now() - result.processingTimeMs,
{ phaseName: phase.name }
);
}
}
@@ -595,6 +815,15 @@ Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit d
rank
}
}, 'evaluation', evaluation.suitability_score);
// NEW: Add audit entry for tool evaluation
this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
{ toolName: tool.name, rank },
{ suitabilityScore: evaluation.suitability_score, hasExplanation: !!evaluation.detailed_explanation },
evaluation.suitability_score === 'high' ? 85 : evaluation.suitability_score === 'medium' ? 70 : 50,
Date.now() - result.processingTimeMs,
{ toolType: tool.type }
);
}
return result;
@@ -644,6 +873,15 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
concept: availableConcepts.find((c: any) => c.name === sel.conceptName),
relevance: sel.relevance
}));
// NEW: Add audit entry for background knowledge selection
this.addAuditEntry(context, 'micro-task', 'background-knowledge-selection',
{ availableConcepts: availableConcepts.length },
{ selectedConcepts: context.backgroundKnowledge?.length || 0 },
context.backgroundKnowledge && context.backgroundKnowledge.length > 0 ? 75 : 40,
Date.now() - result.processingTimeMs,
{}
);
}
}
@@ -711,7 +949,10 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
let completedTasks = 0;
let failedTasks = 0;
console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity`);
// NEW: Clear any previous temporary audit entries
this.tempAuditEntries = [];
console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
try {
// Stage 1: Get intelligent candidates (embeddings + AI selection)
@@ -725,11 +966,25 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
contextHistory: [],
maxContextLength: this.maxContextTokens,
currentContextLength: 0,
seenToolNames: new Set<string>()
seenToolNames: new Set<string>(),
// NEW: Initialize audit trail
auditTrail: []
};
// NEW: Merge any temporary audit entries from pre-context operations
this.mergeTemporaryAuditEntries(context);
console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
// NEW: Add initial audit entry
this.addAuditEntry(context, 'initialization', 'pipeline-start',
{ userQuery, mode, toolsDataLoaded: !!toolsData },
{ candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length },
90, // High confidence for initialization
startTime,
{ auditEnabled: this.auditConfig.enabled }
);
// MICRO-TASK SEQUENCE
// Task 1: Scenario/Problem Analysis
@@ -776,6 +1031,15 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
// Build final recommendation
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
// NEW: Add final audit entry
this.addAuditEntry(context, 'completion', 'pipeline-end',
{ completedTasks, failedTasks },
{ finalRecommendation: !!recommendation, auditEntriesGenerated: context.auditTrail.length },
completedTasks > failedTasks ? 85 : 60,
startTime,
{ totalProcessingTimeMs: Date.now() - startTime }
);
const processingStats = {
embeddingsUsed: embeddingsService.isEnabled(),
candidatesFromEmbeddings: filteredData.tools.length,
@@ -789,14 +1053,23 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
console.log(`[IMPROVED PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
return {
recommendation,
recommendation: {
...recommendation,
// NEW: Include audit trail in response
auditTrail: this.auditConfig.enabled ? context.auditTrail : undefined
},
processingStats
};
} catch (error) {
console.error('[IMPROVED PIPELINE] Processing failed:', error);
// NEW: Ensure temp audit entries are cleared even on error
this.tempAuditEntries = [];
throw error;
}
}