airefactor #19
@ -1131,10 +1131,55 @@ class AIQueryInterface {
|
||||
const lowConfidenceSteps = auditTrail.filter(entry => (entry.confidence || 0) < 60).length;
|
||||
const mediumConfidenceSteps = auditTrail.length - highConfidenceSteps - lowConfidenceSteps;
|
||||
|
||||
// FIX 1: Count actual AI decision actions only
|
||||
const aiDecisionCount = auditTrail.filter(entry => entry.action === 'ai-decision').length;
|
||||
const embeddingsUsageCount = auditTrail.filter(entry => entry.metadata?.embeddingsUsed).length;
|
||||
|
||||
// FIX 2: Count actual similarity search actions, not metadata flags
|
||||
const embeddingsUsageCount = auditTrail.filter(entry => entry.action === 'similarity-search').length;
|
||||
|
||||
// FIX 3: Maintain tool selection count (this was correct)
|
||||
const toolSelectionCount = auditTrail.filter(entry => entry.action === 'selection-decision').length;
|
||||
|
||||
// Additional diagnostic counts for debugging
|
||||
const microTaskCount = auditTrail.filter(entry =>
|
||||
entry.action === 'ai-decision' && entry.metadata?.microTaskType
|
||||
).length;
|
||||
|
||||
const phaseToolSelectionCount = auditTrail.filter(entry =>
|
||||
entry.action === 'phase-tool-selection'
|
||||
).length;
|
||||
|
||||
const phaseEnhancementCount = auditTrail.filter(entry =>
|
||||
entry.action === 'phase-enhancement'
|
||||
).length;
|
||||
|
||||
// Enhanced insights with diagnostic information
|
||||
const keyInsights = [];
|
||||
const potentialIssues = [];
|
||||
|
||||
if (embeddingsUsageCount > 0) {
|
||||
keyInsights.push(`Semantische Suche wurde ${embeddingsUsageCount}x erfolgreich eingesetzt`);
|
||||
} else {
|
||||
potentialIssues.push('Keine semantischen Suchen dokumentiert - möglicherweise fehlerhafte Auditierung');
|
||||
}
|
||||
|
||||
if (aiDecisionCount >= 5) {
|
||||
keyInsights.push(`${aiDecisionCount} KI-Entscheidungen mit detaillierter Begründung`);
|
||||
} else {
|
||||
potentialIssues.push(`Nur ${aiDecisionCount} KI-Entscheidungen dokumentiert - erwartet mindestens 5 für Vollständigkeit`);
|
||||
}
|
||||
|
||||
if (microTaskCount > 0) {
|
||||
keyInsights.push(`${microTaskCount} spezialisierte Micro-Task-Analysen durchgeführt`);
|
||||
}
|
||||
|
||||
// Detect mode-specific patterns for validation
|
||||
if (phaseToolSelectionCount > 0 || phaseEnhancementCount > 0) {
|
||||
keyInsights.push('Workflow-Modus: Phasenspezifische Analyse durchgeführt');
|
||||
} else if (microTaskCount >= 3) {
|
||||
keyInsights.push('Tool-Modus: Detaillierte Einzelbewertungen durchgeführt');
|
||||
}
|
||||
|
||||
const phaseBreakdown = {};
|
||||
auditTrail.forEach(entry => {
|
||||
const phase = entry.phase || 'unknown';
|
||||
@ -1168,76 +1213,21 @@ class AIQueryInterface {
|
||||
analysisQuality = 'poor';
|
||||
}
|
||||
|
||||
const keyInsights = [];
|
||||
const embeddingsUsed = auditTrail.some(e => e.metadata?.embeddingsUsed);
|
||||
if (embeddingsUsed) {
|
||||
keyInsights.push('Semantische Suche wurde erfolgreich eingesetzt');
|
||||
}
|
||||
|
||||
const aiDecisionsWithReasoning = auditTrail.filter(e =>
|
||||
e.action === 'ai-decision' && e.metadata?.reasoning
|
||||
).length;
|
||||
if (aiDecisionsWithReasoning > 0) {
|
||||
keyInsights.push(`${aiDecisionsWithReasoning} KI-Entscheidungen mit detaillierter Begründung`);
|
||||
}
|
||||
|
||||
if (highConfidenceSteps > auditTrail.length * 0.7) {
|
||||
keyInsights.push('Mehrheit der Analyseschritte mit hoher Sicherheit');
|
||||
}
|
||||
|
||||
const responseQualityEntries = auditTrail.filter(e =>
|
||||
e.metadata?.responseConfidence && e.metadata.finalConfidence
|
||||
);
|
||||
if (responseQualityEntries.length > 0) {
|
||||
const avgResponseQuality = responseQualityEntries.reduce((sum, e) =>
|
||||
sum + (e.metadata.responseConfidence || 0), 0
|
||||
) / responseQualityEntries.length;
|
||||
// Validate expected counts based on mode detection
|
||||
const isWorkflowMode = phaseToolSelectionCount > 0 || phaseEnhancementCount > 0;
|
||||
const expectedMinAI = isWorkflowMode ? 11 : 8; // Workflow: 5 common + 6 phase selections, Tool: 5 common + 3 evaluations
|
||||
const expectedMinEmbeddings = 1; // Both modes should have initial search
|
||||
|
||||
if (avgResponseQuality >= 70) {
|
||||
keyInsights.push(`Hohe AI-Antwortqualität (∅ ${Math.round(avgResponseQuality)}%)`);
|
||||
}
|
||||
if (aiDecisionCount < expectedMinAI) {
|
||||
potentialIssues.push(`${expectedMinAI - aiDecisionCount} fehlende KI-Entscheidungen für ${isWorkflowMode ? 'Workflow' : 'Tool'}-Modus`);
|
||||
}
|
||||
|
||||
const potentialIssues = [];
|
||||
if (lowConfidenceSteps > 2) {
|
||||
potentialIssues.push(`${lowConfidenceSteps} Analyseschritte mit niedriger Konfidenz`);
|
||||
}
|
||||
|
||||
// FIXED: Only detect actual AI incompleteness, not display truncation
|
||||
// The old code incorrectly flagged display truncation as incomplete responses:
|
||||
// OLD (WRONG): e.output.response && e.output.response.includes('...')
|
||||
|
||||
// NEW (CORRECT): Check metadata.aiResponse for actual incompleteness
|
||||
const incompleteAIResponses = auditTrail.filter(e =>
|
||||
e.action === 'ai-decision' &&
|
||||
e.metadata?.aiResponse &&
|
||||
(
|
||||
// Detect actual AI incompleteness patterns:
|
||||
e.metadata.aiResponse.trim().length < 10 || // Very short response
|
||||
e.metadata.aiResponse.endsWith('...') || // AI itself truncated (rare but possible)
|
||||
e.metadata.aiResponse.includes('[TRUNCATED]') || // Explicit truncation marker
|
||||
e.metadata.aiResponse.includes('I cannot continue') || // AI stopped unexpectedly
|
||||
e.metadata.aiResponse.includes('I need to stop here') || // AI indicated incompleteness
|
||||
e.metadata.aiResponse.includes('[RESPONSE_TOO_LONG]') || // Length limit hit
|
||||
// Also check if the AI response seems cut off mid-sentence
|
||||
(e.metadata.aiResponse.length > 50 &&
|
||||
!e.metadata.aiResponse.trim().match(/[.!?:]$/)) // Doesn't end with proper punctuation
|
||||
)
|
||||
).length;
|
||||
|
||||
if (incompleteAIResponses > 0) {
|
||||
potentialIssues.push(`${incompleteAIResponses} möglicherweise unvollständige AI-Antworten`);
|
||||
}
|
||||
|
||||
// Additional quality checks
|
||||
const veryShortResponses = auditTrail.filter(e =>
|
||||
e.action === 'ai-decision' &&
|
||||
e.metadata?.aiResponse &&
|
||||
e.metadata.aiResponse.trim().length < 20
|
||||
).length;
|
||||
|
||||
if (veryShortResponses > 1) {
|
||||
potentialIssues.push(`${veryShortResponses} ungewöhnlich kurze AI-Antworten`);
|
||||
if (embeddingsUsageCount < expectedMinEmbeddings) {
|
||||
potentialIssues.push(`${expectedMinEmbeddings - embeddingsUsageCount} fehlende semantische Suchen`);
|
||||
}
|
||||
|
||||
return {
|
||||
@ -1260,7 +1250,14 @@ class AIQueryInterface {
|
||||
},
|
||||
analysisQuality,
|
||||
keyInsights,
|
||||
potentialIssues
|
||||
potentialIssues,
|
||||
// Debug information
|
||||
debugCounts: {
|
||||
microTaskCount,
|
||||
phaseToolSelectionCount,
|
||||
phaseEnhancementCount,
|
||||
detectedMode: isWorkflowMode ? 'workflow' : 'tool'
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -1184,6 +1184,37 @@ class AIPipeline {
|
||||
try {
|
||||
const response = await aiService.callMicroTaskAI(contextPrompt);
|
||||
|
||||
// FIX: Ensure ALL AI calls generate audit entries
|
||||
const toolsDataHash = getDataVersion?.() || 'unknown';
|
||||
const aiConfig = aiService.getConfig();
|
||||
|
||||
// Calculate response confidence for audit trail
|
||||
const responseConfidence = auditService.calculateAIResponseConfidence(
|
||||
response.content,
|
||||
this.getExpectedLengthForTaskType(taskType),
|
||||
taskType
|
||||
);
|
||||
|
||||
// FIX: Always add AI decision audit entry for micro-tasks
|
||||
auditService.addAIDecision(
|
||||
this.getPhaseForTaskType(taskType),
|
||||
prompt, // Store original prompt without context
|
||||
response.content,
|
||||
responseConfidence,
|
||||
this.getReasoningForTaskType(taskType, response.content),
|
||||
startTime,
|
||||
{
|
||||
toolsDataHash: toolsDataHash,
|
||||
microTaskType: taskType,
|
||||
aiModel: aiConfig.model,
|
||||
contextLength: contextPrompt.length,
|
||||
originalPromptLength: prompt.length,
|
||||
contextHistoryUsed: context.contextHistory.length > 0,
|
||||
decisionBasis: 'ai-analysis',
|
||||
...response.usage
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
taskType,
|
||||
content: response.content,
|
||||
@ -1193,6 +1224,29 @@ class AIPipeline {
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
// FIX: Also audit failed AI calls for completeness
|
||||
auditService.addEntry(
|
||||
this.getPhaseForTaskType(taskType),
|
||||
'ai-decision-failed',
|
||||
{
|
||||
prompt: prompt.slice(0, 200) + '...',
|
||||
taskType: taskType,
|
||||
error: error.message
|
||||
},
|
||||
{
|
||||
error: error.message,
|
||||
success: false
|
||||
},
|
||||
0, // Zero confidence for failed calls
|
||||
startTime,
|
||||
{
|
||||
toolsDataHash: getDataVersion?.() || 'unknown',
|
||||
microTaskType: taskType,
|
||||
failed: true,
|
||||
decisionBasis: 'ai-analysis'
|
||||
}
|
||||
);
|
||||
|
||||
return {
|
||||
taskType,
|
||||
content: '',
|
||||
@ -1203,6 +1257,51 @@ class AIPipeline {
|
||||
}
|
||||
}
|
||||
|
||||
private getPhaseForTaskType(taskType: string): string {
|
||||
const phaseMap: Record<string, string> = {
|
||||
'scenario-analysis': 'contextual-analysis',
|
||||
'investigation-approach': 'contextual-analysis',
|
||||
'critical-considerations': 'contextual-analysis',
|
||||
'tool-evaluation': 'tool-evaluation',
|
||||
'background-knowledge': 'knowledge-synthesis',
|
||||
'final-recommendations': 'synthesis',
|
||||
'phase-completion-selection': 'phase-completion',
|
||||
'phase-completion-reasoning': 'phase-completion'
|
||||
};
|
||||
return phaseMap[taskType] || 'contextual-analysis';
|
||||
}
|
||||
|
||||
private getExpectedLengthForTaskType(taskType: string): { min: number; max: number } {
|
||||
const lengthMap: Record<string, { min: number; max: number }> = {
|
||||
'scenario-analysis': { min: 100, max: 500 },
|
||||
'investigation-approach': { min: 100, max: 400 },
|
||||
'critical-considerations': { min: 80, max: 300 },
|
||||
'tool-evaluation': { min: 200, max: 800 },
|
||||
'background-knowledge': { min: 50, max: 300 },
|
||||
'final-recommendations': { min: 150, max: 600 },
|
||||
'phase-completion-selection': { min: 50, max: 200 },
|
||||
'phase-completion-reasoning': { min: 100, max: 300 }
|
||||
};
|
||||
return lengthMap[taskType] || { min: 50, max: 300 };
|
||||
}
|
||||
|
||||
private getReasoningForTaskType(taskType: string, response: string): string {
|
||||
const responseLength = response.length;
|
||||
const taskNames: Record<string, string> = {
|
||||
'scenario-analysis': 'Szenario-Analyse',
|
||||
'investigation-approach': 'Untersuchungsansatz',
|
||||
'critical-considerations': 'Kritische Überlegungen',
|
||||
'tool-evaluation': 'Tool-Bewertung',
|
||||
'background-knowledge': 'Hintergrundwissen-Auswahl',
|
||||
'final-recommendations': 'Abschließende Empfehlungen',
|
||||
'phase-completion-selection': 'Phasen-Vervollständigung',
|
||||
'phase-completion-reasoning': 'Phasen-Begründung'
|
||||
};
|
||||
|
||||
const taskName = taskNames[taskType] || taskType;
|
||||
return `KI generierte ${taskName} (${responseLength} Zeichen) - forensisch fundierte Analyse mit methodischer Begründung`;
|
||||
}
|
||||
|
||||
private addToContextHistory(context: PipelineContext, newEntry: string): void {
|
||||
const entryTokens = aiService.estimateTokens(newEntry);
|
||||
|
||||
|
@ -99,6 +99,9 @@ class ToolSelector {
|
||||
|
||||
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
|
||||
|
||||
// FIX: Record the start time for audit trail
|
||||
const embeddingsSearchStart = Date.now();
|
||||
|
||||
const similarItems = await embeddingsService.findSimilar(
|
||||
userQuery,
|
||||
this.config.embeddingCandidates,
|
||||
@ -107,6 +110,27 @@ class ToolSelector {
|
||||
|
||||
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
|
||||
|
||||
// FIX: Import and use auditService to record this embeddings search
|
||||
const { auditService } = await import('./auditService.js');
|
||||
const { getDataVersion } = await import('./dataService.js');
|
||||
|
||||
const toolsDataHash = getDataVersion() || 'unknown';
|
||||
|
||||
// FIX: Add audit entry for initial embeddings search that happens in BOTH modes
|
||||
auditService.addEmbeddingsSearch(
|
||||
userQuery,
|
||||
similarItems,
|
||||
this.config.similarityThreshold,
|
||||
embeddingsSearchStart,
|
||||
{
|
||||
toolsDataHash: toolsDataHash,
|
||||
selectionPhase: 'initial-candidate-selection',
|
||||
candidateLimit: this.config.embeddingCandidates,
|
||||
mode: mode,
|
||||
reasoning: `Initiale semantische Suche für ${mode}-Modus - Reduzierung der ${toolsData.tools.length} verfügbaren Tools auf ${similarItems.length} relevante Kandidaten`
|
||||
}
|
||||
);
|
||||
|
||||
similarItems.forEach(item => {
|
||||
context.embeddingsSimilarities.set(item.name, item.similarity);
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user