cleanup
This commit is contained in:
@@ -63,17 +63,15 @@ interface AnalysisContext {
|
||||
|
||||
auditTrail: AuditEntry[];
|
||||
|
||||
// Store actual similarity data from embeddings
|
||||
embeddingsSimilarities: Map<string, number>;
|
||||
}
|
||||
|
||||
interface ConfidenceMetrics {
|
||||
overall: number; // 0-100: Combined confidence score
|
||||
semanticRelevance: number; // How well tool description matches query (from embeddings)
|
||||
taskSuitability: number; // AI-determined fitness for this specific task
|
||||
methodologicalConsistency: number; // How well different analysis steps agree
|
||||
uncertaintyFactors: string[]; // Specific reasons why this might not work
|
||||
strengthIndicators: string[]; // Specific reasons why this is a good choice
|
||||
overall: number;
|
||||
semanticRelevance: number;
|
||||
taskSuitability: number;
|
||||
uncertaintyFactors: string[];
|
||||
strengthIndicators: string[];
|
||||
}
|
||||
|
||||
class ImprovedMicroTaskAIPipeline {
|
||||
@@ -102,10 +100,10 @@ class ImprovedMicroTaskAIPipeline {
|
||||
};
|
||||
|
||||
private confidenceConfig: {
|
||||
semanticWeight: number; // Weight for embeddings similarity
|
||||
suitabilityWeight: number; // Weight for AI task fit evaluation
|
||||
consistencyWeight: number; // Weight for cross-validation agreement
|
||||
reliabilityWeight: number; // Weight for tool quality indicators
|
||||
semanticWeight: number;
|
||||
suitabilityWeight: number;
|
||||
consistencyWeight: number;
|
||||
reliabilityWeight: number;
|
||||
minimumThreshold: number;
|
||||
mediumThreshold: number;
|
||||
highThreshold: number;
|
||||
@@ -143,10 +141,9 @@ class ImprovedMicroTaskAIPipeline {
|
||||
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
|
||||
};
|
||||
|
||||
// Updated confidence weights - more focused on AI evaluation
|
||||
this.confidenceConfig = {
|
||||
semanticWeight: parseFloat(process.env.CONFIDENCE_SEMANTIC_WEIGHT || '0.3'), // Embeddings similarity
|
||||
suitabilityWeight: parseFloat(process.env.CONFIDENCE_SUITABILITY_WEIGHT || '0.7'), // AI task fit evaluation
|
||||
semanticWeight: parseFloat(process.env.CONFIDENCE_SEMANTIC_WEIGHT || '0.3'),
|
||||
suitabilityWeight: parseFloat(process.env.CONFIDENCE_SUITABILITY_WEIGHT || '0.7'),
|
||||
consistencyWeight: 0,
|
||||
reliabilityWeight: 0,
|
||||
minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
|
||||
@@ -235,7 +232,7 @@ class ImprovedMicroTaskAIPipeline {
|
||||
const selectionRatio = result.selectedTools.length / candidateCount;
|
||||
const hasReasoning = result.reasoning && result.reasoning.length > 50;
|
||||
|
||||
let confidence = 60; // Base confidence
|
||||
let confidence = 60;
|
||||
|
||||
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
|
||||
else if (selectionRatio <= 0.05) confidence -= 10;
|
||||
@@ -386,7 +383,6 @@ class ImprovedMicroTaskAIPipeline {
|
||||
let candidateConcepts: any[] = [];
|
||||
let selectionMethod = 'unknown';
|
||||
|
||||
// Initialize embeddings similarities storage
|
||||
context.embeddingsSimilarities = new Map<string, number>();
|
||||
|
||||
if (process.env.AI_EMBEDDINGS_ENABLED === 'true') {
|
||||
@@ -409,7 +405,6 @@ class ImprovedMicroTaskAIPipeline {
|
||||
|
||||
console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
|
||||
|
||||
// Store actual similarity scores for confidence calculation
|
||||
similarItems.forEach(item => {
|
||||
context.embeddingsSimilarities.set(item.name, item.similarity);
|
||||
});
|
||||
@@ -707,18 +702,14 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
limitations: string[] = []
|
||||
): ConfidenceMetrics {
|
||||
|
||||
// 1. Semantic Relevance: Real embeddings similarity score
|
||||
const rawSemanticRelevance = context.embeddingsSimilarities.has(tool.name) ?
|
||||
context.embeddingsSimilarities.get(tool.name)! * 100 : 50;
|
||||
|
||||
// 2. Task Suitability: Enhanced with phase-awareness for workflow mode
|
||||
let enhancedTaskSuitability = taskRelevance;
|
||||
|
||||
if (context.mode === 'workflow') {
|
||||
// In workflow mode, boost score if tool is well-matched to its assigned phase
|
||||
const toolSelection = context.selectedTools?.find(st => st.tool.name === tool.name);
|
||||
if (toolSelection && tool.phases && tool.phases.includes(toolSelection.phase)) {
|
||||
// Boost for phase alignment (but cap at 100)
|
||||
const phaseBonus = Math.min(15, 100 - taskRelevance);
|
||||
enhancedTaskSuitability = Math.min(100, taskRelevance + phaseBonus);
|
||||
|
||||
@@ -726,7 +717,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Simple weighted combination - no artificial scaling
|
||||
const overall = (
|
||||
rawSemanticRelevance * this.confidenceConfig.semanticWeight +
|
||||
enhancedTaskSuitability * this.confidenceConfig.suitabilityWeight
|
||||
@@ -747,7 +737,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
overall: Math.round(overall),
|
||||
semanticRelevance: Math.round(rawSemanticRelevance),
|
||||
taskSuitability: Math.round(enhancedTaskSuitability),
|
||||
methodologicalConsistency: 0,
|
||||
uncertaintyFactors,
|
||||
strengthIndicators
|
||||
};
|
||||
@@ -756,18 +745,15 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
private identifySpecificUncertaintyFactors(tool: any, context: AnalysisContext, limitations: string[], confidence: number): string[] {
|
||||
const factors: string[] = [];
|
||||
|
||||
// Add AI-identified limitations first (most specific)
|
||||
if (limitations && limitations.length > 0) {
|
||||
factors.push(...limitations.slice(0, 2)); // Limit to top 2 to leave room for others
|
||||
factors.push(...limitations.slice(0, 2));
|
||||
}
|
||||
|
||||
// Low semantic similarity
|
||||
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
|
||||
if (similarity < 0.7) {
|
||||
factors.push('Geringe semantische Ähnlichkeit zur Anfrage - Tool-Beschreibung passt möglicherweise nicht optimal');
|
||||
}
|
||||
|
||||
// Skill level vs scenario complexity mismatch
|
||||
if (tool.skillLevel === 'expert' && /schnell|rapid|triage|urgent|sofort/i.test(context.userQuery)) {
|
||||
factors.push('Experten-Tool für zeitkritisches Szenario - Setup und Einarbeitung könnten zu lange dauern');
|
||||
}
|
||||
@@ -776,35 +762,29 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
factors.push('Einsteiger-Tool für komplexe Analyse - könnte funktionale Limitierungen haben');
|
||||
}
|
||||
|
||||
// Access and deployment concerns
|
||||
if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') {
|
||||
factors.push('Installation und Setup erforderlich');
|
||||
}
|
||||
|
||||
// License restrictions
|
||||
if (tool.license === 'Proprietary') {
|
||||
factors.push('Kommerzielle Software - Lizenzkosten und rechtliche Beschränkungen zu beachten');
|
||||
}
|
||||
|
||||
// Low overall confidence warning
|
||||
if (confidence < 60) {
|
||||
factors.push('Moderate Gesamtbewertung - alternative Ansätze sollten ebenfalls betrachtet werden');
|
||||
}
|
||||
|
||||
return factors.slice(0, 4); // Limit to 4 most relevant factors
|
||||
return factors.slice(0, 4);
|
||||
}
|
||||
|
||||
// NEW: Identify specific strength indicators
|
||||
private identifySpecificStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
|
||||
const indicators: string[] = [];
|
||||
|
||||
// High semantic similarity
|
||||
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
|
||||
if (similarity >= 0.7) {
|
||||
indicators.push('Sehr gute semantische Übereinstimmung mit Ihrer Anfrage');
|
||||
}
|
||||
|
||||
// Quality indicators
|
||||
if (tool.knowledgebase === true) {
|
||||
indicators.push('Umfassende Dokumentation und Wissensbasis verfügbar');
|
||||
}
|
||||
@@ -813,17 +793,15 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
indicators.push('Sofort verfügbar über gehostete Lösung - kein Setup erforderlich');
|
||||
}
|
||||
|
||||
// Skill level match
|
||||
if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') {
|
||||
indicators.push('Ausgewogenes Verhältnis zwischen Funktionalität und Benutzerfreundlichkeit');
|
||||
}
|
||||
|
||||
// Method alignment
|
||||
if (tool.type === 'method' && /methodik|vorgehen|prozess|ansatz/i.test(context.userQuery)) {
|
||||
indicators.push('Methodischer Ansatz passt zu Ihrer prozeduralen Anfrage');
|
||||
}
|
||||
|
||||
return indicators.slice(0, 4); // Limit to 4 most important indicators
|
||||
return indicators.slice(0, 4);
|
||||
}
|
||||
|
||||
private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
@@ -902,11 +880,9 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
validSelections.forEach((sel: any) => {
|
||||
const tool = phaseTools.find((t: any) => t.name === sel.toolName);
|
||||
if (tool) {
|
||||
// Ensure taskRelevance is a number
|
||||
const taskRelevance = typeof sel.taskRelevance === 'number' ?
|
||||
sel.taskRelevance : parseInt(String(sel.taskRelevance)) || 70;
|
||||
|
||||
// Derive priority automatically from score
|
||||
const priority = this.derivePriorityFromScore(taskRelevance);
|
||||
|
||||
this.addToolToSelection(context, tool, phase.id, priority, sel.justification, taskRelevance, sel.limitations);
|
||||
@@ -967,7 +943,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
hasExplanation: !!evaluation.detailed_explanation,
|
||||
hasImplementationApproach: !!evaluation.implementation_approach,
|
||||
prosCount: evaluation.pros?.length || 0,
|
||||
limitationsCount: evaluation.limitations?.length || 0, // ← Updated field name
|
||||
limitationsCount: evaluation.limitations?.length || 0,
|
||||
hasLimitations: Array.isArray(evaluation.limitations) && evaluation.limitations.length > 0
|
||||
},
|
||||
70,
|
||||
@@ -1101,7 +1077,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
const context: AnalysisContext = {
|
||||
userQuery,
|
||||
mode,
|
||||
filteredData: {}, // Will be populated by getIntelligentCandidates
|
||||
filteredData: {},
|
||||
contextHistory: [],
|
||||
maxContextLength: this.maxContextTokens,
|
||||
currentContextLength: 0,
|
||||
@@ -1124,9 +1100,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
startTime,
|
||||
{ auditEnabled: this.auditConfig.enabled, confidenceScoringEnabled: true }
|
||||
);
|
||||
|
||||
// MICRO-TASK SEQUENCE WITH ENHANCED CONFIDENCE TRACKING
|
||||
|
||||
|
||||
const analysisResult = await this.analyzeScenario(context);
|
||||
if (analysisResult.success) completeTasks++; else failedTasks++;
|
||||
await this.delay(this.microTaskDelay);
|
||||
@@ -1234,7 +1208,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
components: {
|
||||
semantic: confidence.semanticRelevance,
|
||||
suitability: confidence.taskSuitability,
|
||||
consistency: confidence.methodologicalConsistency
|
||||
}
|
||||
},
|
||||
confidence.overall,
|
||||
@@ -1286,7 +1259,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
detailed_explanation: st.tool.evaluation?.detailed_explanation || '',
|
||||
implementation_approach: st.tool.evaluation?.implementation_approach || '',
|
||||
pros: st.tool.evaluation?.pros || [],
|
||||
cons: st.tool.evaluation?.limitations || [], // ← FIXED: Use limitations as cons for display
|
||||
cons: st.tool.evaluation?.limitations || [],
|
||||
alternatives: st.tool.evaluation?.alternatives || '',
|
||||
confidence: confidence,
|
||||
recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' :
|
||||
|
||||
Reference in New Issue
Block a user