fix audit trail
This commit is contained in:
parent
3d5d2506e9
commit
28af56d6ef
@ -1131,10 +1131,55 @@ class AIQueryInterface {
|
|||||||
const lowConfidenceSteps = auditTrail.filter(entry => (entry.confidence || 0) < 60).length;
|
const lowConfidenceSteps = auditTrail.filter(entry => (entry.confidence || 0) < 60).length;
|
||||||
const mediumConfidenceSteps = auditTrail.length - highConfidenceSteps - lowConfidenceSteps;
|
const mediumConfidenceSteps = auditTrail.length - highConfidenceSteps - lowConfidenceSteps;
|
||||||
|
|
||||||
|
// FIX 1: Count actual AI decision actions only
|
||||||
const aiDecisionCount = auditTrail.filter(entry => entry.action === 'ai-decision').length;
|
const aiDecisionCount = auditTrail.filter(entry => entry.action === 'ai-decision').length;
|
||||||
const embeddingsUsageCount = auditTrail.filter(entry => entry.metadata?.embeddingsUsed).length;
|
|
||||||
|
// FIX 2: Count actual similarity search actions, not metadata flags
|
||||||
|
const embeddingsUsageCount = auditTrail.filter(entry => entry.action === 'similarity-search').length;
|
||||||
|
|
||||||
|
// FIX 3: Maintain tool selection count (this was correct)
|
||||||
const toolSelectionCount = auditTrail.filter(entry => entry.action === 'selection-decision').length;
|
const toolSelectionCount = auditTrail.filter(entry => entry.action === 'selection-decision').length;
|
||||||
|
|
||||||
|
// Additional diagnostic counts for debugging
|
||||||
|
const microTaskCount = auditTrail.filter(entry =>
|
||||||
|
entry.action === 'ai-decision' && entry.metadata?.microTaskType
|
||||||
|
).length;
|
||||||
|
|
||||||
|
const phaseToolSelectionCount = auditTrail.filter(entry =>
|
||||||
|
entry.action === 'phase-tool-selection'
|
||||||
|
).length;
|
||||||
|
|
||||||
|
const phaseEnhancementCount = auditTrail.filter(entry =>
|
||||||
|
entry.action === 'phase-enhancement'
|
||||||
|
).length;
|
||||||
|
|
||||||
|
// Enhanced insights with diagnostic information
|
||||||
|
const keyInsights = [];
|
||||||
|
const potentialIssues = [];
|
||||||
|
|
||||||
|
if (embeddingsUsageCount > 0) {
|
||||||
|
keyInsights.push(`Semantische Suche wurde ${embeddingsUsageCount}x erfolgreich eingesetzt`);
|
||||||
|
} else {
|
||||||
|
potentialIssues.push('Keine semantischen Suchen dokumentiert - möglicherweise fehlerhafte Auditierung');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (aiDecisionCount >= 5) {
|
||||||
|
keyInsights.push(`${aiDecisionCount} KI-Entscheidungen mit detaillierter Begründung`);
|
||||||
|
} else {
|
||||||
|
potentialIssues.push(`Nur ${aiDecisionCount} KI-Entscheidungen dokumentiert - erwartet mindestens 5 für Vollständigkeit`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (microTaskCount > 0) {
|
||||||
|
keyInsights.push(`${microTaskCount} spezialisierte Micro-Task-Analysen durchgeführt`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detect mode-specific patterns for validation
|
||||||
|
if (phaseToolSelectionCount > 0 || phaseEnhancementCount > 0) {
|
||||||
|
keyInsights.push('Workflow-Modus: Phasenspezifische Analyse durchgeführt');
|
||||||
|
} else if (microTaskCount >= 3) {
|
||||||
|
keyInsights.push('Tool-Modus: Detaillierte Einzelbewertungen durchgeführt');
|
||||||
|
}
|
||||||
|
|
||||||
const phaseBreakdown = {};
|
const phaseBreakdown = {};
|
||||||
auditTrail.forEach(entry => {
|
auditTrail.forEach(entry => {
|
||||||
const phase = entry.phase || 'unknown';
|
const phase = entry.phase || 'unknown';
|
||||||
@ -1168,76 +1213,21 @@ class AIQueryInterface {
|
|||||||
analysisQuality = 'poor';
|
analysisQuality = 'poor';
|
||||||
}
|
}
|
||||||
|
|
||||||
const keyInsights = [];
|
|
||||||
const embeddingsUsed = auditTrail.some(e => e.metadata?.embeddingsUsed);
|
|
||||||
if (embeddingsUsed) {
|
|
||||||
keyInsights.push('Semantische Suche wurde erfolgreich eingesetzt');
|
|
||||||
}
|
|
||||||
|
|
||||||
const aiDecisionsWithReasoning = auditTrail.filter(e =>
|
|
||||||
e.action === 'ai-decision' && e.metadata?.reasoning
|
|
||||||
).length;
|
|
||||||
if (aiDecisionsWithReasoning > 0) {
|
|
||||||
keyInsights.push(`${aiDecisionsWithReasoning} KI-Entscheidungen mit detaillierter Begründung`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (highConfidenceSteps > auditTrail.length * 0.7) {
|
if (highConfidenceSteps > auditTrail.length * 0.7) {
|
||||||
keyInsights.push('Mehrheit der Analyseschritte mit hoher Sicherheit');
|
keyInsights.push('Mehrheit der Analyseschritte mit hoher Sicherheit');
|
||||||
}
|
}
|
||||||
|
|
||||||
const responseQualityEntries = auditTrail.filter(e =>
|
// Validate expected counts based on mode detection
|
||||||
e.metadata?.responseConfidence && e.metadata.finalConfidence
|
const isWorkflowMode = phaseToolSelectionCount > 0 || phaseEnhancementCount > 0;
|
||||||
);
|
const expectedMinAI = isWorkflowMode ? 11 : 8; // Workflow: 5 common + 6 phase selections, Tool: 5 common + 3 evaluations
|
||||||
if (responseQualityEntries.length > 0) {
|
const expectedMinEmbeddings = 1; // Both modes should have initial search
|
||||||
const avgResponseQuality = responseQualityEntries.reduce((sum, e) =>
|
|
||||||
sum + (e.metadata.responseConfidence || 0), 0
|
|
||||||
) / responseQualityEntries.length;
|
|
||||||
|
|
||||||
if (avgResponseQuality >= 70) {
|
if (aiDecisionCount < expectedMinAI) {
|
||||||
keyInsights.push(`Hohe AI-Antwortqualität (∅ ${Math.round(avgResponseQuality)}%)`);
|
potentialIssues.push(`${expectedMinAI - aiDecisionCount} fehlende KI-Entscheidungen für ${isWorkflowMode ? 'Workflow' : 'Tool'}-Modus`);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const potentialIssues = [];
|
if (embeddingsUsageCount < expectedMinEmbeddings) {
|
||||||
if (lowConfidenceSteps > 2) {
|
potentialIssues.push(`${expectedMinEmbeddings - embeddingsUsageCount} fehlende semantische Suchen`);
|
||||||
potentialIssues.push(`${lowConfidenceSteps} Analyseschritte mit niedriger Konfidenz`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// FIXED: Only detect actual AI incompleteness, not display truncation
|
|
||||||
// The old code incorrectly flagged display truncation as incomplete responses:
|
|
||||||
// OLD (WRONG): e.output.response && e.output.response.includes('...')
|
|
||||||
|
|
||||||
// NEW (CORRECT): Check metadata.aiResponse for actual incompleteness
|
|
||||||
const incompleteAIResponses = auditTrail.filter(e =>
|
|
||||||
e.action === 'ai-decision' &&
|
|
||||||
e.metadata?.aiResponse &&
|
|
||||||
(
|
|
||||||
// Detect actual AI incompleteness patterns:
|
|
||||||
e.metadata.aiResponse.trim().length < 10 || // Very short response
|
|
||||||
e.metadata.aiResponse.endsWith('...') || // AI itself truncated (rare but possible)
|
|
||||||
e.metadata.aiResponse.includes('[TRUNCATED]') || // Explicit truncation marker
|
|
||||||
e.metadata.aiResponse.includes('I cannot continue') || // AI stopped unexpectedly
|
|
||||||
e.metadata.aiResponse.includes('I need to stop here') || // AI indicated incompleteness
|
|
||||||
e.metadata.aiResponse.includes('[RESPONSE_TOO_LONG]') || // Length limit hit
|
|
||||||
// Also check if the AI response seems cut off mid-sentence
|
|
||||||
(e.metadata.aiResponse.length > 50 &&
|
|
||||||
!e.metadata.aiResponse.trim().match(/[.!?:]$/)) // Doesn't end with proper punctuation
|
|
||||||
)
|
|
||||||
).length;
|
|
||||||
|
|
||||||
if (incompleteAIResponses > 0) {
|
|
||||||
potentialIssues.push(`${incompleteAIResponses} möglicherweise unvollständige AI-Antworten`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Additional quality checks
|
|
||||||
const veryShortResponses = auditTrail.filter(e =>
|
|
||||||
e.action === 'ai-decision' &&
|
|
||||||
e.metadata?.aiResponse &&
|
|
||||||
e.metadata.aiResponse.trim().length < 20
|
|
||||||
).length;
|
|
||||||
|
|
||||||
if (veryShortResponses > 1) {
|
|
||||||
potentialIssues.push(`${veryShortResponses} ungewöhnlich kurze AI-Antworten`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@ -1260,7 +1250,14 @@ class AIQueryInterface {
|
|||||||
},
|
},
|
||||||
analysisQuality,
|
analysisQuality,
|
||||||
keyInsights,
|
keyInsights,
|
||||||
potentialIssues
|
potentialIssues,
|
||||||
|
// Debug information
|
||||||
|
debugCounts: {
|
||||||
|
microTaskCount,
|
||||||
|
phaseToolSelectionCount,
|
||||||
|
phaseEnhancementCount,
|
||||||
|
detectedMode: isWorkflowMode ? 'workflow' : 'tool'
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1184,6 +1184,37 @@ class AIPipeline {
|
|||||||
try {
|
try {
|
||||||
const response = await aiService.callMicroTaskAI(contextPrompt);
|
const response = await aiService.callMicroTaskAI(contextPrompt);
|
||||||
|
|
||||||
|
// FIX: Ensure ALL AI calls generate audit entries
|
||||||
|
const toolsDataHash = getDataVersion?.() || 'unknown';
|
||||||
|
const aiConfig = aiService.getConfig();
|
||||||
|
|
||||||
|
// Calculate response confidence for audit trail
|
||||||
|
const responseConfidence = auditService.calculateAIResponseConfidence(
|
||||||
|
response.content,
|
||||||
|
this.getExpectedLengthForTaskType(taskType),
|
||||||
|
taskType
|
||||||
|
);
|
||||||
|
|
||||||
|
// FIX: Always add AI decision audit entry for micro-tasks
|
||||||
|
auditService.addAIDecision(
|
||||||
|
this.getPhaseForTaskType(taskType),
|
||||||
|
prompt, // Store original prompt without context
|
||||||
|
response.content,
|
||||||
|
responseConfidence,
|
||||||
|
this.getReasoningForTaskType(taskType, response.content),
|
||||||
|
startTime,
|
||||||
|
{
|
||||||
|
toolsDataHash: toolsDataHash,
|
||||||
|
microTaskType: taskType,
|
||||||
|
aiModel: aiConfig.model,
|
||||||
|
contextLength: contextPrompt.length,
|
||||||
|
originalPromptLength: prompt.length,
|
||||||
|
contextHistoryUsed: context.contextHistory.length > 0,
|
||||||
|
decisionBasis: 'ai-analysis',
|
||||||
|
...response.usage
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
taskType,
|
taskType,
|
||||||
content: response.content,
|
content: response.content,
|
||||||
@ -1193,6 +1224,29 @@ class AIPipeline {
|
|||||||
};
|
};
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
// FIX: Also audit failed AI calls for completeness
|
||||||
|
auditService.addEntry(
|
||||||
|
this.getPhaseForTaskType(taskType),
|
||||||
|
'ai-decision-failed',
|
||||||
|
{
|
||||||
|
prompt: prompt.slice(0, 200) + '...',
|
||||||
|
taskType: taskType,
|
||||||
|
error: error.message
|
||||||
|
},
|
||||||
|
{
|
||||||
|
error: error.message,
|
||||||
|
success: false
|
||||||
|
},
|
||||||
|
0, // Zero confidence for failed calls
|
||||||
|
startTime,
|
||||||
|
{
|
||||||
|
toolsDataHash: getDataVersion?.() || 'unknown',
|
||||||
|
microTaskType: taskType,
|
||||||
|
failed: true,
|
||||||
|
decisionBasis: 'ai-analysis'
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
taskType,
|
taskType,
|
||||||
content: '',
|
content: '',
|
||||||
@ -1203,6 +1257,51 @@ class AIPipeline {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private getPhaseForTaskType(taskType: string): string {
|
||||||
|
const phaseMap: Record<string, string> = {
|
||||||
|
'scenario-analysis': 'contextual-analysis',
|
||||||
|
'investigation-approach': 'contextual-analysis',
|
||||||
|
'critical-considerations': 'contextual-analysis',
|
||||||
|
'tool-evaluation': 'tool-evaluation',
|
||||||
|
'background-knowledge': 'knowledge-synthesis',
|
||||||
|
'final-recommendations': 'synthesis',
|
||||||
|
'phase-completion-selection': 'phase-completion',
|
||||||
|
'phase-completion-reasoning': 'phase-completion'
|
||||||
|
};
|
||||||
|
return phaseMap[taskType] || 'contextual-analysis';
|
||||||
|
}
|
||||||
|
|
||||||
|
private getExpectedLengthForTaskType(taskType: string): { min: number; max: number } {
|
||||||
|
const lengthMap: Record<string, { min: number; max: number }> = {
|
||||||
|
'scenario-analysis': { min: 100, max: 500 },
|
||||||
|
'investigation-approach': { min: 100, max: 400 },
|
||||||
|
'critical-considerations': { min: 80, max: 300 },
|
||||||
|
'tool-evaluation': { min: 200, max: 800 },
|
||||||
|
'background-knowledge': { min: 50, max: 300 },
|
||||||
|
'final-recommendations': { min: 150, max: 600 },
|
||||||
|
'phase-completion-selection': { min: 50, max: 200 },
|
||||||
|
'phase-completion-reasoning': { min: 100, max: 300 }
|
||||||
|
};
|
||||||
|
return lengthMap[taskType] || { min: 50, max: 300 };
|
||||||
|
}
|
||||||
|
|
||||||
|
private getReasoningForTaskType(taskType: string, response: string): string {
|
||||||
|
const responseLength = response.length;
|
||||||
|
const taskNames: Record<string, string> = {
|
||||||
|
'scenario-analysis': 'Szenario-Analyse',
|
||||||
|
'investigation-approach': 'Untersuchungsansatz',
|
||||||
|
'critical-considerations': 'Kritische Überlegungen',
|
||||||
|
'tool-evaluation': 'Tool-Bewertung',
|
||||||
|
'background-knowledge': 'Hintergrundwissen-Auswahl',
|
||||||
|
'final-recommendations': 'Abschließende Empfehlungen',
|
||||||
|
'phase-completion-selection': 'Phasen-Vervollständigung',
|
||||||
|
'phase-completion-reasoning': 'Phasen-Begründung'
|
||||||
|
};
|
||||||
|
|
||||||
|
const taskName = taskNames[taskType] || taskType;
|
||||||
|
return `KI generierte ${taskName} (${responseLength} Zeichen) - forensisch fundierte Analyse mit methodischer Begründung`;
|
||||||
|
}
|
||||||
|
|
||||||
private addToContextHistory(context: PipelineContext, newEntry: string): void {
|
private addToContextHistory(context: PipelineContext, newEntry: string): void {
|
||||||
const entryTokens = aiService.estimateTokens(newEntry);
|
const entryTokens = aiService.estimateTokens(newEntry);
|
||||||
|
|
||||||
|
@ -99,6 +99,9 @@ class ToolSelector {
|
|||||||
|
|
||||||
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
|
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
|
||||||
|
|
||||||
|
// FIX: Record the start time for audit trail
|
||||||
|
const embeddingsSearchStart = Date.now();
|
||||||
|
|
||||||
const similarItems = await embeddingsService.findSimilar(
|
const similarItems = await embeddingsService.findSimilar(
|
||||||
userQuery,
|
userQuery,
|
||||||
this.config.embeddingCandidates,
|
this.config.embeddingCandidates,
|
||||||
@ -107,6 +110,27 @@ class ToolSelector {
|
|||||||
|
|
||||||
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
|
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
|
||||||
|
|
||||||
|
// FIX: Import and use auditService to record this embeddings search
|
||||||
|
const { auditService } = await import('./auditService.js');
|
||||||
|
const { getDataVersion } = await import('./dataService.js');
|
||||||
|
|
||||||
|
const toolsDataHash = getDataVersion() || 'unknown';
|
||||||
|
|
||||||
|
// FIX: Add audit entry for initial embeddings search that happens in BOTH modes
|
||||||
|
auditService.addEmbeddingsSearch(
|
||||||
|
userQuery,
|
||||||
|
similarItems,
|
||||||
|
this.config.similarityThreshold,
|
||||||
|
embeddingsSearchStart,
|
||||||
|
{
|
||||||
|
toolsDataHash: toolsDataHash,
|
||||||
|
selectionPhase: 'initial-candidate-selection',
|
||||||
|
candidateLimit: this.config.embeddingCandidates,
|
||||||
|
mode: mode,
|
||||||
|
reasoning: `Initiale semantische Suche für ${mode}-Modus - Reduzierung der ${toolsData.tools.length} verfügbaren Tools auf ${similarItems.length} relevante Kandidaten`
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
similarItems.forEach(item => {
|
similarItems.forEach(item => {
|
||||||
context.embeddingsSimilarities.set(item.name, item.similarity);
|
context.embeddingsSimilarities.set(item.name, item.similarity);
|
||||||
});
|
});
|
||||||
|
Loading…
x
Reference in New Issue
Block a user