This commit is contained in:
overcuriousity
2025-08-05 22:09:46 +02:00
parent 1c0025796a
commit b515a45e1e
6 changed files with 22 additions and 79 deletions

View File

@@ -63,17 +63,15 @@ interface AnalysisContext {
auditTrail: AuditEntry[];
// Store actual similarity data from embeddings
embeddingsSimilarities: Map<string, number>;
}
interface ConfidenceMetrics {
overall: number; // 0-100: Combined confidence score
semanticRelevance: number; // How well tool description matches query (from embeddings)
taskSuitability: number; // AI-determined fitness for this specific task
methodologicalConsistency: number; // How well different analysis steps agree
uncertaintyFactors: string[]; // Specific reasons why this might not work
strengthIndicators: string[]; // Specific reasons why this is a good choice
overall: number;
semanticRelevance: number;
taskSuitability: number;
uncertaintyFactors: string[];
strengthIndicators: string[];
}
class ImprovedMicroTaskAIPipeline {
@@ -102,10 +100,10 @@ class ImprovedMicroTaskAIPipeline {
};
private confidenceConfig: {
semanticWeight: number; // Weight for embeddings similarity
suitabilityWeight: number; // Weight for AI task fit evaluation
consistencyWeight: number; // Weight for cross-validation agreement
reliabilityWeight: number; // Weight for tool quality indicators
semanticWeight: number;
suitabilityWeight: number;
consistencyWeight: number;
reliabilityWeight: number;
minimumThreshold: number;
mediumThreshold: number;
highThreshold: number;
@@ -143,10 +141,9 @@ class ImprovedMicroTaskAIPipeline {
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
};
// Updated confidence weights - more focused on AI evaluation
this.confidenceConfig = {
semanticWeight: parseFloat(process.env.CONFIDENCE_SEMANTIC_WEIGHT || '0.3'), // Embeddings similarity
suitabilityWeight: parseFloat(process.env.CONFIDENCE_SUITABILITY_WEIGHT || '0.7'), // AI task fit evaluation
semanticWeight: parseFloat(process.env.CONFIDENCE_SEMANTIC_WEIGHT || '0.3'),
suitabilityWeight: parseFloat(process.env.CONFIDENCE_SUITABILITY_WEIGHT || '0.7'),
consistencyWeight: 0,
reliabilityWeight: 0,
minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
@@ -235,7 +232,7 @@ class ImprovedMicroTaskAIPipeline {
const selectionRatio = result.selectedTools.length / candidateCount;
const hasReasoning = result.reasoning && result.reasoning.length > 50;
let confidence = 60; // Base confidence
let confidence = 60;
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
else if (selectionRatio <= 0.05) confidence -= 10;
@@ -386,7 +383,6 @@ class ImprovedMicroTaskAIPipeline {
let candidateConcepts: any[] = [];
let selectionMethod = 'unknown';
// Initialize embeddings similarities storage
context.embeddingsSimilarities = new Map<string, number>();
if (process.env.AI_EMBEDDINGS_ENABLED === 'true') {
@@ -409,7 +405,6 @@ class ImprovedMicroTaskAIPipeline {
console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
// Store actual similarity scores for confidence calculation
similarItems.forEach(item => {
context.embeddingsSimilarities.set(item.name, item.similarity);
});
@@ -707,18 +702,14 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
limitations: string[] = []
): ConfidenceMetrics {
// 1. Semantic Relevance: Real embeddings similarity score
const rawSemanticRelevance = context.embeddingsSimilarities.has(tool.name) ?
context.embeddingsSimilarities.get(tool.name)! * 100 : 50;
// 2. Task Suitability: Enhanced with phase-awareness for workflow mode
let enhancedTaskSuitability = taskRelevance;
if (context.mode === 'workflow') {
// In workflow mode, boost score if tool is well-matched to its assigned phase
const toolSelection = context.selectedTools?.find(st => st.tool.name === tool.name);
if (toolSelection && tool.phases && tool.phases.includes(toolSelection.phase)) {
// Boost for phase alignment (but cap at 100)
const phaseBonus = Math.min(15, 100 - taskRelevance);
enhancedTaskSuitability = Math.min(100, taskRelevance + phaseBonus);
@@ -726,7 +717,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
}
}
// Simple weighted combination - no artificial scaling
const overall = (
rawSemanticRelevance * this.confidenceConfig.semanticWeight +
enhancedTaskSuitability * this.confidenceConfig.suitabilityWeight
@@ -747,7 +737,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
overall: Math.round(overall),
semanticRelevance: Math.round(rawSemanticRelevance),
taskSuitability: Math.round(enhancedTaskSuitability),
methodologicalConsistency: 0,
uncertaintyFactors,
strengthIndicators
};
@@ -756,18 +745,15 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
private identifySpecificUncertaintyFactors(tool: any, context: AnalysisContext, limitations: string[], confidence: number): string[] {
const factors: string[] = [];
// Add AI-identified limitations first (most specific)
if (limitations && limitations.length > 0) {
factors.push(...limitations.slice(0, 2)); // Limit to top 2 to leave room for others
factors.push(...limitations.slice(0, 2));
}
// Low semantic similarity
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
if (similarity < 0.7) {
factors.push('Geringe semantische Ähnlichkeit zur Anfrage - Tool-Beschreibung passt möglicherweise nicht optimal');
}
// Skill level vs scenario complexity mismatch
if (tool.skillLevel === 'expert' && /schnell|rapid|triage|urgent|sofort/i.test(context.userQuery)) {
factors.push('Experten-Tool für zeitkritisches Szenario - Setup und Einarbeitung könnten zu lange dauern');
}
@@ -776,35 +762,29 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
factors.push('Einsteiger-Tool für komplexe Analyse - könnte funktionale Limitierungen haben');
}
// Access and deployment concerns
if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') {
factors.push('Installation und Setup erforderlich');
}
// License restrictions
if (tool.license === 'Proprietary') {
factors.push('Kommerzielle Software - Lizenzkosten und rechtliche Beschränkungen zu beachten');
}
// Low overall confidence warning
if (confidence < 60) {
factors.push('Moderate Gesamtbewertung - alternative Ansätze sollten ebenfalls betrachtet werden');
}
return factors.slice(0, 4); // Limit to 4 most relevant factors
return factors.slice(0, 4);
}
// NEW: Identify specific strength indicators
private identifySpecificStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
const indicators: string[] = [];
// High semantic similarity
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
if (similarity >= 0.7) {
indicators.push('Sehr gute semantische Übereinstimmung mit Ihrer Anfrage');
}
// Quality indicators
if (tool.knowledgebase === true) {
indicators.push('Umfassende Dokumentation und Wissensbasis verfügbar');
}
@@ -813,17 +793,15 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
indicators.push('Sofort verfügbar über gehostete Lösung - kein Setup erforderlich');
}
// Skill level match
if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') {
indicators.push('Ausgewogenes Verhältnis zwischen Funktionalität und Benutzerfreundlichkeit');
}
// Method alignment
if (tool.type === 'method' && /methodik|vorgehen|prozess|ansatz/i.test(context.userQuery)) {
indicators.push('Methodischer Ansatz passt zu Ihrer prozeduralen Anfrage');
}
return indicators.slice(0, 4); // Limit to 4 most important indicators
return indicators.slice(0, 4);
}
private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
@@ -902,11 +880,9 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
validSelections.forEach((sel: any) => {
const tool = phaseTools.find((t: any) => t.name === sel.toolName);
if (tool) {
// Ensure taskRelevance is a number
const taskRelevance = typeof sel.taskRelevance === 'number' ?
sel.taskRelevance : parseInt(String(sel.taskRelevance)) || 70;
// Derive priority automatically from score
const priority = this.derivePriorityFromScore(taskRelevance);
this.addToolToSelection(context, tool, phase.id, priority, sel.justification, taskRelevance, sel.limitations);
@@ -967,7 +943,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
hasExplanation: !!evaluation.detailed_explanation,
hasImplementationApproach: !!evaluation.implementation_approach,
prosCount: evaluation.pros?.length || 0,
limitationsCount: evaluation.limitations?.length || 0, // ← Updated field name
limitationsCount: evaluation.limitations?.length || 0,
hasLimitations: Array.isArray(evaluation.limitations) && evaluation.limitations.length > 0
},
70,
@@ -1101,7 +1077,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
const context: AnalysisContext = {
userQuery,
mode,
filteredData: {}, // Will be populated by getIntelligentCandidates
filteredData: {},
contextHistory: [],
maxContextLength: this.maxContextTokens,
currentContextLength: 0,
@@ -1124,9 +1100,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
startTime,
{ auditEnabled: this.auditConfig.enabled, confidenceScoringEnabled: true }
);
// MICRO-TASK SEQUENCE WITH ENHANCED CONFIDENCE TRACKING
const analysisResult = await this.analyzeScenario(context);
if (analysisResult.success) completeTasks++; else failedTasks++;
await this.delay(this.microTaskDelay);
@@ -1234,7 +1208,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
components: {
semantic: confidence.semanticRelevance,
suitability: confidence.taskSuitability,
consistency: confidence.methodologicalConsistency
}
},
confidence.overall,
@@ -1286,7 +1259,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
detailed_explanation: st.tool.evaluation?.detailed_explanation || '',
implementation_approach: st.tool.evaluation?.implementation_approach || '',
pros: st.tool.evaluation?.pros || [],
cons: st.tool.evaluation?.limitations || [], // ← FIXED: Use limitations as cons for display
cons: st.tool.evaluation?.limitations || [],
alternatives: st.tool.evaluation?.alternatives || '',
confidence: confidence,
recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' :