audit trail detail

This commit is contained in:
overcuriousity
2025-08-17 16:30:58 +02:00
parent 5c3c308225
commit e63ec367a5
3 changed files with 716 additions and 379 deletions

View File

@@ -1,4 +1,4 @@
// src/utils/aiPipeline.ts - Enhanced with comprehensive audit logging and restored sophisticated logic
// src/utils/aiPipeline.ts - Fixed with accurate audit data and meaningful confidence
import { getCompressedToolsDataForAI, getDataVersion } from './dataService.js';
import { aiService } from './aiService.js';
import { toolSelector, type SelectionContext } from './toolSelector.js';
@@ -95,7 +95,7 @@ class AIPipeline {
}
};
console.log('[AI-PIPELINE] Initialized orchestration pipeline with enhanced logic');
console.log('[AI-PIPELINE] Initialized with improved audit accuracy');
}
async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
@@ -124,40 +124,26 @@ class AIPipeline {
embeddingsSimilarities: new Map<string, number>()
};
auditService.addEntry(
'initialization',
'pipeline-start',
{
userQuery: this.truncateForAudit(userQuery),
mode,
toolsDataLoaded: !!toolsData,
aiConfig: { model: aiConfig.model }
},
{
totalAvailableTools: toolsData.tools.length,
totalAvailableConcepts: toolsData.concepts.length,
embeddingsEnabled: embeddingsService.isEnabled()
},
90,
startTime,
{
toolsDataHash,
aiModel: aiConfig.model,
embeddingsUsed: embeddingsService.isEnabled(),
pipelineVersion: '2.1-enhanced'
}
);
// Skip initialization audit entry - it doesn't add transparency value
console.log('[AI-PIPELINE] Phase 1: Tool candidate selection');
const candidateSelectionStart = Date.now();
const candidateData = await toolSelector.getIntelligentCandidates(userQuery, toolsData, mode, context);
// Calculate meaningful confidence for tool selection
const selectionConfidence = this.calculateToolSelectionConfidence(
candidateData.tools.length,
toolsData.tools.length,
candidateData.selectionMethod,
candidateData.concepts.length
);
auditService.addToolSelection(
candidateData.tools.map(t => t.name),
toolsData.tools.map(t => t.name),
candidateData.selectionMethod,
85,
selectionConfidence,
candidateSelectionStart,
{
embeddingsUsed: embeddingsService.isEnabled(),
@@ -211,25 +197,7 @@ class AIPipeline {
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
auditService.addEntry(
'completion',
'pipeline-end',
{ completedTasks, failedTasks, totalTokensUsed: this.totalTokensUsed },
{
finalRecommendation: !!recommendation,
auditEntriesGenerated: auditService.getCurrentAuditTrail().length,
selectedToolsCount: context.selectedTools?.length || 0,
backgroundKnowledgeCount: context.backgroundKnowledge?.length || 0
},
completedTasks > failedTasks ? 85 : 60,
startTime,
{
totalProcessingTimeMs: Date.now() - startTime,
aiModel: aiConfig.model,
finalTokenUsage: this.totalTokensUsed,
pipelineEfficiency: completedTasks / (completedTasks + failedTasks)
}
);
// Skip completion audit entry - it doesn't add transparency value
const processingStats = {
embeddingsUsed: embeddingsService.isEnabled(),
@@ -270,21 +238,47 @@ class AIPipeline {
} catch (error) {
console.error('[AI-PIPELINE] Pipeline failed:', error);
auditService.addEntry(
'error',
'pipeline-failure',
{ userQuery: this.truncateForAudit(userQuery), mode },
{ error: error.message, completedTasks, failedTasks },
0,
startTime,
{ errorType: error.constructor.name, totalTokensUsed: this.totalTokensUsed }
);
throw error;
}
}
private calculateToolSelectionConfidence(
selectedCount: number,
totalCount: number,
method: string,
conceptsCount: number
): number {
let confidence = 50;
const selectionRatio = selectedCount / totalCount;
// Good selection ratio (5-20% is optimal)
if (selectionRatio >= 0.05 && selectionRatio <= 0.20) {
confidence += 25;
} else if (selectionRatio < 0.05) {
confidence += 15; // Very selective
} else if (selectionRatio > 0.30) {
confidence -= 15; // Too inclusive
}
// Embeddings method bonus
if (method.includes('embeddings')) {
confidence += 15;
}
// Concepts also selected
if (conceptsCount > 0) {
confidence += 10;
}
// Reasonable absolute numbers
if (selectedCount >= 8 && selectedCount <= 25) {
confidence += 10;
}
return Math.min(95, Math.max(40, confidence));
}
private async processWorkflowMode(
context: PipelineContext,
toolsData: any,
@@ -300,27 +294,44 @@ class AIPipeline {
tool && tool.phases && Array.isArray(tool.phases) && tool.phases.includes(phase.id)
);
if (phaseTools.length === 0) {
console.log(`[AI-PIPELINE] No tools available for phase: ${phase.id}`);
continue;
}
const selections = await toolSelector.selectToolsForPhase(context.userQuery, phase, phaseTools, context);
// Calculate meaningful confidence based on phase selection quality
const phaseConfidence = this.calculatePhaseSelectionConfidence(
selections.length,
phaseTools.length,
phase.id,
selections
);
auditService.addEntry(
'workflow-phase',
'phase-tool-selection',
{
phaseId: phase.id,
phaseName: phase.name,
availableTools: phaseTools.map(t => t.name)
availableTools: phaseTools.map(t => t.name),
toolCount: phaseTools.length
},
{
selectedTools: selections.map(s => s.toolName),
selectionCount: selections.length
selectionCount: selections.length,
avgTaskRelevance: selections.length > 0 ?
Math.round(selections.reduce((sum, s) => sum + (s.taskRelevance || 70), 0) / selections.length) : 0
},
selections.length > 0 ? 80 : 50,
phaseConfidence,
phaseStart,
{
phaseId: phase.id,
availableToolsCount: phaseTools.length,
selectedToolsCount: selections.length,
microTaskType: 'phase-tool-selection'
microTaskType: 'phase-tool-selection',
reasoning: `${selections.length} von ${phaseTools.length} verfügbaren Tools für ${phase.name} ausgewählt - KI bewertete Eignung für spezifische Phasenaufgaben`
}
);
@@ -335,15 +346,24 @@ class AIPipeline {
auditService.addEntry(
'tool-reasoning',
'tool-added-to-phase',
{ toolName: tool.name, phaseId: phase.id, originalTaskRelevance: sel.taskRelevance, moderatedTaskRelevance },
{ justification: sel.justification, limitations: sel.limitations },
{
toolName: tool.name,
phaseId: phase.id,
taskRelevance: moderatedTaskRelevance,
priority: priority
},
{
justification: sel.justification,
limitations: sel.limitations,
addedToPhase: phase.name
},
moderatedTaskRelevance || 70,
phaseStart,
{
toolType: tool.type,
priority,
selectionReasoning: sel.justification,
moderationApplied: sel.taskRelevance !== moderatedTaskRelevance
moderationApplied: sel.taskRelevance !== moderatedTaskRelevance,
reasoning: `${tool.name} als ${priority}-Priorität für ${phase.name} ausgewählt: ${sel.justification?.slice(0, 100)}...`
}
);
}
@@ -360,6 +380,51 @@ class AIPipeline {
return { completed: completedTasks, failed: failedTasks };
}
private calculatePhaseSelectionConfidence(
selectedCount: number,
availableCount: number,
phaseId: string,
selections: any[]
): number {
let confidence = 60;
// Phase-specific expectations
const criticalPhases = ['acquisition', 'examination', 'analysis'];
const isCritical = criticalPhases.includes(phaseId);
// Selection made
if (selectedCount > 0) {
confidence += 20;
} else {
return 30; // No selection is concerning
}
// Selection ratio (for phases, 20-50% is reasonable)
const ratio = selectedCount / availableCount;
if (ratio >= 0.2 && ratio <= 0.5) {
confidence += 15;
} else if (ratio < 0.2 && selectedCount >= 1) {
confidence += 10; // Selective is ok
}
// Critical phases should have adequate tools
if (isCritical && selectedCount >= 2) {
confidence += 10;
}
// Quality of selections (based on task relevance)
const avgRelevance = selections.length > 0 ?
selections.reduce((sum, s) => sum + (s.taskRelevance || 70), 0) / selections.length : 0;
if (avgRelevance >= 75) {
confidence += 10;
} else if (avgRelevance >= 65) {
confidence += 5;
}
return Math.min(95, Math.max(30, confidence));
}
private async processToolMode(
context: PipelineContext,
completedTasks: number,
@@ -405,26 +470,6 @@ class AIPipeline {
console.log('[AI-PIPELINE] Completing underrepresented phases:', underrepresentedPhases.map((p: any) => p.id).join(', '));
auditService.addEntry(
'phase-completion',
'underrepresented-phases-detected',
{
underrepresentedPhases: underrepresentedPhases.map(p => p.id),
currentPhaseDistribution: Array.from(selectedPhases.entries())
},
{
phasesToComplete: underrepresentedPhases.length,
completionStrategy: 'semantic-search-with-ai-reasoning'
},
70,
pipelineStart,
{
totalPhases: phases.length,
adequatelyRepresented: phases.length - underrepresentedPhases.length,
completionMethod: 'sophisticated-ai-reasoning'
}
);
for (const phase of underrepresentedPhases) {
const result = await this.completePhaseWithSemanticSearchAndAI(context, phase, toolsData, pipelineStart);
if (result.success) completedTasks++; else failedTasks++;
@@ -537,6 +582,9 @@ class AIPipeline {
};
}
// This is the fix for "0 tools added" - use the actual valid tools
const actualToolsAdded = validTools.map(tool => tool.name);
for (const tool of validTools) {
console.log('[AI-PIPELINE] Generating AI reasoning for phase completion tool:', tool.name);
@@ -572,25 +620,26 @@ class AIPipeline {
['Nachträgliche Ergänzung via semantische Phasensuche mit KI-Bewertung']
);
auditService.addPhaseCompletion(
phase.id,
[tool.name],
detailedJustification,
phaseStart,
{
toolName: tool.name,
toolType: tool.type,
semanticSimilarity: phaseResults.find(r => r.name === tool.name)?.similarity,
completionReason: 'underrepresented-phase',
originalSelectionMissed: true,
aiReasoningUsed: reasoningResult.success,
moderatedTaskRelevance
}
);
console.log('[AI-PIPELINE] Added phase completion tool with AI reasoning:', tool.name);
}
// Use the actual tools added for audit
auditService.addPhaseCompletion(
phase.id,
actualToolsAdded, // This ensures correct count
selection.completionReasoning || `${actualToolsAdded.length} Tools für ${phase.name} hinzugefügt`,
phaseStart,
{
toolsAdded: actualToolsAdded,
toolType: validTools[0]?.type,
semanticSimilarity: phaseResults.find(r => r.name === validTools[0]?.name)?.similarity,
completionReason: 'underrepresented-phase',
originalSelectionMissed: true,
aiReasoningUsed: true,
moderatedTaskRelevance: 75
}
);
return {
taskType: 'phase-completion',
content: selection.completionReasoning || '',
@@ -601,19 +650,6 @@ class AIPipeline {
} catch (error) {
console.error('[AI-PIPELINE] Phase completion failed for:', phase.id, error);
auditService.addEntry(
'phase-completion',
'completion-failed',
{ phaseId: phase.id, error: error.message },
{ success: false },
20,
phaseStart,
{
errorType: error.constructor.name,
phaseId: phase.id
}
);
return {
taskType: 'phase-completion',
content: '',
@@ -795,14 +831,12 @@ class AIPipeline {
}
}, 'evaluation', priority, evaluation.detailed_explanation, moderatedTaskRelevance, evaluation.limitations);
// Calculate confidence based on response quality and task relevance
const responseConfidence = auditService.calculateAIResponseConfidence(
result.content,
{ min: 200, max: 800 },
'tool-evaluation'
);
// Use the higher of response quality confidence or moderated task relevance
const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
auditService.addAIDecision(
@@ -870,8 +904,9 @@ class AIPipeline {
'background-knowledge'
);
const selectionBonus = context.backgroundKnowledge.length > 0 ? 15 : 0;
const finalConfidence = Math.min(95, responseConfidence + selectionBonus);
// Calculate confidence based on quality of selections
const selectionQualityBonus = this.calculateKnowledgeSelectionBonus(context.backgroundKnowledge, availableConcepts);
const finalConfidence = Math.min(95, responseConfidence + selectionQualityBonus);
auditService.addEntry(
'knowledge-synthesis',
@@ -893,7 +928,7 @@ class AIPipeline {
selectedConceptsCount: context.backgroundKnowledge.length,
selectionRatio: context.backgroundKnowledge.length / availableConcepts.length,
responseConfidence,
selectionBonus,
selectionQualityBonus,
decisionBasis: 'ai-analysis',
reasoning: `Wählte ${context.backgroundKnowledge.length} von ${availableConcepts.length} verfügbaren Konzepten für methodische Fundierung der Empfehlungen`,
aiModel: aiService.getConfig().model,
@@ -906,6 +941,33 @@ class AIPipeline {
return result;
}
private calculateKnowledgeSelectionBonus(
selectedKnowledge: Array<{concept: any; relevance: string}>,
availableConcepts: any[]
): number {
let bonus = 0;
if (selectedKnowledge.length > 0) {
bonus += 10;
}
// Good selection ratio (10-30% of available concepts)
const ratio = selectedKnowledge.length / availableConcepts.length;
if (ratio >= 0.1 && ratio <= 0.3) {
bonus += 15;
}
// Quality reasoning provided
const hasGoodReasonings = selectedKnowledge.some(bk =>
bk.relevance && bk.relevance.length > 30
);
if (hasGoodReasonings) {
bonus += 10;
}
return bonus;
}
private async generateFinalRecommendations(context: PipelineContext, pipelineStart: number): Promise<MicroTaskResult> {
console.log('[AI-PIPELINE] Micro-task: Final recommendations');
const taskStart = Date.now();
@@ -921,7 +983,8 @@ class AIPipeline {
'final-recommendations'
);
const contextBonus = selectedToolNames.length >= 3 ? 10 : 0;
// Calculate bonus based on context quality
const contextBonus = this.calculateSynthesisBonus(selectedToolNames, context);
const finalConfidence = Math.min(95, confidence + contextBonus);
auditService.addAIDecision(
@@ -948,6 +1011,28 @@ class AIPipeline {
return result;
}
private calculateSynthesisBonus(selectedToolNames: string[], context: PipelineContext): number {
let bonus = 0;
if (selectedToolNames.length >= 3) {
bonus += 10;
}
if (context.backgroundKnowledge && context.backgroundKnowledge.length > 0) {
bonus += 10;
}
if (context.scenarioAnalysis || context.problemAnalysis) {
bonus += 5;
}
if (context.investigationApproach) {
bonus += 5;
}
return bonus;
}
private buildRecommendation(context: PipelineContext, mode: string, finalContent: string): any {
const isWorkflow = mode === 'workflow';
@@ -1140,12 +1225,6 @@ class AIPipeline {
return 'low';
}
private truncateForAudit(text: string, maxLength: number = 200): string {
if (typeof text !== 'string') return String(text);
if (text.length <= maxLength) return text;
return text.slice(0, maxLength) + '...[audit-truncated]';
}
private trackTokenUsage(usage?: { promptTokens?: number; completionTokens?: number; totalTokens?: number }): void {
if (usage?.totalTokens) {
this.totalTokensUsed += usage.totalTokens;