airefactor #19

Merged
mstoeck3 merged 25 commits from airefactor into main 2025-08-17 22:59:31 +00:00
3 changed files with 186 additions and 66 deletions
Showing only changes of commit 28af56d6ef - Show all commits

View File

@ -1131,10 +1131,55 @@ class AIQueryInterface {
const lowConfidenceSteps = auditTrail.filter(entry => (entry.confidence || 0) < 60).length; const lowConfidenceSteps = auditTrail.filter(entry => (entry.confidence || 0) < 60).length;
const mediumConfidenceSteps = auditTrail.length - highConfidenceSteps - lowConfidenceSteps; const mediumConfidenceSteps = auditTrail.length - highConfidenceSteps - lowConfidenceSteps;
// FIX 1: Count actual AI decision actions only
const aiDecisionCount = auditTrail.filter(entry => entry.action === 'ai-decision').length; const aiDecisionCount = auditTrail.filter(entry => entry.action === 'ai-decision').length;
const embeddingsUsageCount = auditTrail.filter(entry => entry.metadata?.embeddingsUsed).length;
// FIX 2: Count actual similarity search actions, not metadata flags
const embeddingsUsageCount = auditTrail.filter(entry => entry.action === 'similarity-search').length;
// FIX 3: Maintain tool selection count (this was correct)
const toolSelectionCount = auditTrail.filter(entry => entry.action === 'selection-decision').length; const toolSelectionCount = auditTrail.filter(entry => entry.action === 'selection-decision').length;
// Additional diagnostic counts for debugging
const microTaskCount = auditTrail.filter(entry =>
entry.action === 'ai-decision' && entry.metadata?.microTaskType
).length;
const phaseToolSelectionCount = auditTrail.filter(entry =>
entry.action === 'phase-tool-selection'
).length;
const phaseEnhancementCount = auditTrail.filter(entry =>
entry.action === 'phase-enhancement'
).length;
// Enhanced insights with diagnostic information
const keyInsights = [];
const potentialIssues = [];
if (embeddingsUsageCount > 0) {
keyInsights.push(`Semantische Suche wurde ${embeddingsUsageCount}x erfolgreich eingesetzt`);
} else {
potentialIssues.push('Keine semantischen Suchen dokumentiert - möglicherweise fehlerhafte Auditierung');
}
if (aiDecisionCount >= 5) {
keyInsights.push(`${aiDecisionCount} KI-Entscheidungen mit detaillierter Begründung`);
} else {
potentialIssues.push(`Nur ${aiDecisionCount} KI-Entscheidungen dokumentiert - erwartet mindestens 5 für Vollständigkeit`);
}
if (microTaskCount > 0) {
keyInsights.push(`${microTaskCount} spezialisierte Micro-Task-Analysen durchgeführt`);
}
// Detect mode-specific patterns for validation
if (phaseToolSelectionCount > 0 || phaseEnhancementCount > 0) {
keyInsights.push('Workflow-Modus: Phasenspezifische Analyse durchgeführt');
} else if (microTaskCount >= 3) {
keyInsights.push('Tool-Modus: Detaillierte Einzelbewertungen durchgeführt');
}
const phaseBreakdown = {}; const phaseBreakdown = {};
auditTrail.forEach(entry => { auditTrail.forEach(entry => {
const phase = entry.phase || 'unknown'; const phase = entry.phase || 'unknown';
@ -1168,76 +1213,21 @@ class AIQueryInterface {
analysisQuality = 'poor'; analysisQuality = 'poor';
} }
const keyInsights = [];
const embeddingsUsed = auditTrail.some(e => e.metadata?.embeddingsUsed);
if (embeddingsUsed) {
keyInsights.push('Semantische Suche wurde erfolgreich eingesetzt');
}
const aiDecisionsWithReasoning = auditTrail.filter(e =>
e.action === 'ai-decision' && e.metadata?.reasoning
).length;
if (aiDecisionsWithReasoning > 0) {
keyInsights.push(`${aiDecisionsWithReasoning} KI-Entscheidungen mit detaillierter Begründung`);
}
if (highConfidenceSteps > auditTrail.length * 0.7) { if (highConfidenceSteps > auditTrail.length * 0.7) {
keyInsights.push('Mehrheit der Analyseschritte mit hoher Sicherheit'); keyInsights.push('Mehrheit der Analyseschritte mit hoher Sicherheit');
} }
const responseQualityEntries = auditTrail.filter(e => // Validate expected counts based on mode detection
e.metadata?.responseConfidence && e.metadata.finalConfidence const isWorkflowMode = phaseToolSelectionCount > 0 || phaseEnhancementCount > 0;
); const expectedMinAI = isWorkflowMode ? 11 : 8; // Workflow: 5 common + 6 phase selections, Tool: 5 common + 3 evaluations
if (responseQualityEntries.length > 0) { const expectedMinEmbeddings = 1; // Both modes should have initial search
const avgResponseQuality = responseQualityEntries.reduce((sum, e) =>
sum + (e.metadata.responseConfidence || 0), 0
) / responseQualityEntries.length;
if (avgResponseQuality >= 70) { if (aiDecisionCount < expectedMinAI) {
keyInsights.push(`Hohe AI-Antwortqualität (∅ ${Math.round(avgResponseQuality)}%)`); potentialIssues.push(`${expectedMinAI - aiDecisionCount} fehlende KI-Entscheidungen für ${isWorkflowMode ? 'Workflow' : 'Tool'}-Modus`);
}
} }
const potentialIssues = []; if (embeddingsUsageCount < expectedMinEmbeddings) {
if (lowConfidenceSteps > 2) { potentialIssues.push(`${expectedMinEmbeddings - embeddingsUsageCount} fehlende semantische Suchen`);
potentialIssues.push(`${lowConfidenceSteps} Analyseschritte mit niedriger Konfidenz`);
}
// FIXED: Only detect actual AI incompleteness, not display truncation
// The old code incorrectly flagged display truncation as incomplete responses:
// OLD (WRONG): e.output.response && e.output.response.includes('...')
// NEW (CORRECT): Check metadata.aiResponse for actual incompleteness
const incompleteAIResponses = auditTrail.filter(e =>
e.action === 'ai-decision' &&
e.metadata?.aiResponse &&
(
// Detect actual AI incompleteness patterns:
e.metadata.aiResponse.trim().length < 10 || // Very short response
e.metadata.aiResponse.endsWith('...') || // AI itself truncated (rare but possible)
e.metadata.aiResponse.includes('[TRUNCATED]') || // Explicit truncation marker
e.metadata.aiResponse.includes('I cannot continue') || // AI stopped unexpectedly
e.metadata.aiResponse.includes('I need to stop here') || // AI indicated incompleteness
e.metadata.aiResponse.includes('[RESPONSE_TOO_LONG]') || // Length limit hit
// Also check if the AI response seems cut off mid-sentence
(e.metadata.aiResponse.length > 50 &&
!e.metadata.aiResponse.trim().match(/[.!?:]$/)) // Doesn't end with proper punctuation
)
).length;
if (incompleteAIResponses > 0) {
potentialIssues.push(`${incompleteAIResponses} möglicherweise unvollständige AI-Antworten`);
}
// Additional quality checks
const veryShortResponses = auditTrail.filter(e =>
e.action === 'ai-decision' &&
e.metadata?.aiResponse &&
e.metadata.aiResponse.trim().length < 20
).length;
if (veryShortResponses > 1) {
potentialIssues.push(`${veryShortResponses} ungewöhnlich kurze AI-Antworten`);
} }
return { return {
@ -1260,7 +1250,14 @@ class AIQueryInterface {
}, },
analysisQuality, analysisQuality,
keyInsights, keyInsights,
potentialIssues potentialIssues,
// Debug information
debugCounts: {
microTaskCount,
phaseToolSelectionCount,
phaseEnhancementCount,
detectedMode: isWorkflowMode ? 'workflow' : 'tool'
}
}; };
} }

View File

@ -1184,6 +1184,37 @@ class AIPipeline {
try { try {
const response = await aiService.callMicroTaskAI(contextPrompt); const response = await aiService.callMicroTaskAI(contextPrompt);
// FIX: Ensure ALL AI calls generate audit entries
const toolsDataHash = getDataVersion?.() || 'unknown';
const aiConfig = aiService.getConfig();
// Calculate response confidence for audit trail
const responseConfidence = auditService.calculateAIResponseConfidence(
response.content,
this.getExpectedLengthForTaskType(taskType),
taskType
);
// FIX: Always add AI decision audit entry for micro-tasks
auditService.addAIDecision(
this.getPhaseForTaskType(taskType),
prompt, // Store original prompt without context
response.content,
responseConfidence,
this.getReasoningForTaskType(taskType, response.content),
startTime,
{
toolsDataHash: toolsDataHash,
microTaskType: taskType,
aiModel: aiConfig.model,
contextLength: contextPrompt.length,
originalPromptLength: prompt.length,
contextHistoryUsed: context.contextHistory.length > 0,
decisionBasis: 'ai-analysis',
...response.usage
}
);
return { return {
taskType, taskType,
content: response.content, content: response.content,
@ -1193,6 +1224,29 @@ class AIPipeline {
}; };
} catch (error) { } catch (error) {
// FIX: Also audit failed AI calls for completeness
auditService.addEntry(
this.getPhaseForTaskType(taskType),
'ai-decision-failed',
{
prompt: prompt.slice(0, 200) + '...',
taskType: taskType,
error: error.message
},
{
error: error.message,
success: false
},
0, // Zero confidence for failed calls
startTime,
{
toolsDataHash: getDataVersion?.() || 'unknown',
microTaskType: taskType,
failed: true,
decisionBasis: 'ai-analysis'
}
);
return { return {
taskType, taskType,
content: '', content: '',
@ -1203,6 +1257,51 @@ class AIPipeline {
} }
} }
private getPhaseForTaskType(taskType: string): string {
const phaseMap: Record<string, string> = {
'scenario-analysis': 'contextual-analysis',
'investigation-approach': 'contextual-analysis',
'critical-considerations': 'contextual-analysis',
'tool-evaluation': 'tool-evaluation',
'background-knowledge': 'knowledge-synthesis',
'final-recommendations': 'synthesis',
'phase-completion-selection': 'phase-completion',
'phase-completion-reasoning': 'phase-completion'
};
return phaseMap[taskType] || 'contextual-analysis';
}
private getExpectedLengthForTaskType(taskType: string): { min: number; max: number } {
const lengthMap: Record<string, { min: number; max: number }> = {
'scenario-analysis': { min: 100, max: 500 },
'investigation-approach': { min: 100, max: 400 },
'critical-considerations': { min: 80, max: 300 },
'tool-evaluation': { min: 200, max: 800 },
'background-knowledge': { min: 50, max: 300 },
'final-recommendations': { min: 150, max: 600 },
'phase-completion-selection': { min: 50, max: 200 },
'phase-completion-reasoning': { min: 100, max: 300 }
};
return lengthMap[taskType] || { min: 50, max: 300 };
}
private getReasoningForTaskType(taskType: string, response: string): string {
const responseLength = response.length;
const taskNames: Record<string, string> = {
'scenario-analysis': 'Szenario-Analyse',
'investigation-approach': 'Untersuchungsansatz',
'critical-considerations': 'Kritische Überlegungen',
'tool-evaluation': 'Tool-Bewertung',
'background-knowledge': 'Hintergrundwissen-Auswahl',
'final-recommendations': 'Abschließende Empfehlungen',
'phase-completion-selection': 'Phasen-Vervollständigung',
'phase-completion-reasoning': 'Phasen-Begründung'
};
const taskName = taskNames[taskType] || taskType;
return `KI generierte ${taskName} (${responseLength} Zeichen) - forensisch fundierte Analyse mit methodischer Begründung`;
}
private addToContextHistory(context: PipelineContext, newEntry: string): void { private addToContextHistory(context: PipelineContext, newEntry: string): void {
const entryTokens = aiService.estimateTokens(newEntry); const entryTokens = aiService.estimateTokens(newEntry);

View File

@ -99,6 +99,9 @@ class ToolSelector {
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection'); console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
// FIX: Record the start time for audit trail
const embeddingsSearchStart = Date.now();
const similarItems = await embeddingsService.findSimilar( const similarItems = await embeddingsService.findSimilar(
userQuery, userQuery,
this.config.embeddingCandidates, this.config.embeddingCandidates,
@ -107,6 +110,27 @@ class ToolSelector {
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items'); console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
// FIX: Import and use auditService to record this embeddings search
const { auditService } = await import('./auditService.js');
const { getDataVersion } = await import('./dataService.js');
const toolsDataHash = getDataVersion() || 'unknown';
// FIX: Add audit entry for initial embeddings search that happens in BOTH modes
auditService.addEmbeddingsSearch(
userQuery,
similarItems,
this.config.similarityThreshold,
embeddingsSearchStart,
{
toolsDataHash: toolsDataHash,
selectionPhase: 'initial-candidate-selection',
candidateLimit: this.config.embeddingCandidates,
mode: mode,
reasoning: `Initiale semantische Suche für ${mode}-Modus - Reduzierung der ${toolsData.tools.length} verfügbaren Tools auf ${similarItems.length} relevante Kandidaten`
}
);
similarItems.forEach(item => { similarItems.forEach(item => {
context.embeddingsSimilarities.set(item.name, item.similarity); context.embeddingsSimilarities.set(item.name, item.similarity);
}); });