|
|
|
|
@@ -1,7 +1,7 @@
|
|
|
|
|
// src/utils/aiPipeline.ts - Enhanced with Audit Trail System
|
|
|
|
|
// src/utils/aiPipeline.ts - Enhanced with Proper Confidence Scoring
|
|
|
|
|
|
|
|
|
|
import { getCompressedToolsDataForAI } from './dataService.js';
|
|
|
|
|
import { embeddingsService, type EmbeddingData } from './embeddings.js';
|
|
|
|
|
import { embeddingsService, type EmbeddingData, type SimilarityResult } from './embeddings.js';
|
|
|
|
|
import { AI_PROMPTS, getPrompt } from '../config/prompts.js';
|
|
|
|
|
import { isToolHosted } from './toolHelpers.js';
|
|
|
|
|
|
|
|
|
|
@@ -34,11 +34,11 @@ interface AnalysisResult {
|
|
|
|
|
|
|
|
|
|
interface AuditEntry {
|
|
|
|
|
timestamp: number;
|
|
|
|
|
phase: string; // 'retrieval', 'selection', 'micro-task-N'
|
|
|
|
|
action: string; // 'embeddings-search', 'ai-selection', 'tool-evaluation'
|
|
|
|
|
input: any; // What went into this step
|
|
|
|
|
output: any; // What came out of this step
|
|
|
|
|
confidence: number; // 0-100: How confident we are in this step
|
|
|
|
|
phase: string;
|
|
|
|
|
action: string;
|
|
|
|
|
input: any;
|
|
|
|
|
output: any;
|
|
|
|
|
confidence: number;
|
|
|
|
|
processingTimeMs: number;
|
|
|
|
|
metadata: Record<string, any>;
|
|
|
|
|
}
|
|
|
|
|
@@ -56,29 +56,27 @@ interface AnalysisContext {
|
|
|
|
|
problemAnalysis?: string;
|
|
|
|
|
investigationApproach?: string;
|
|
|
|
|
criticalConsiderations?: string;
|
|
|
|
|
selectedTools?: Array<{tool: any, phase: string, priority: string, justification?: string}>;
|
|
|
|
|
selectedTools?: Array<{tool: any, phase: string, priority: string, justification?: string, taskRelevance?: number, limitations?: string[]}>;
|
|
|
|
|
backgroundKnowledge?: Array<{concept: any, relevance: string}>;
|
|
|
|
|
|
|
|
|
|
seenToolNames: Set<string>;
|
|
|
|
|
|
|
|
|
|
auditTrail: AuditEntry[];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface SimilarityResult extends EmbeddingData {
|
|
|
|
|
similarity: number;
|
|
|
|
|
// Store actual similarity data from embeddings
|
|
|
|
|
embeddingsSimilarities: Map<string, number>;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface ConfidenceMetrics {
|
|
|
|
|
overall: number; // 0-100: Combined confidence score
|
|
|
|
|
embeddingsQuality: number; // How well embeddings matched
|
|
|
|
|
domainAlignment: number; // How well tools match scenario domain
|
|
|
|
|
consensus: number; // How much micro-tasks agree
|
|
|
|
|
freshness: number; // How recent/up-to-date the selection is
|
|
|
|
|
uncertaintyFactors: string[]; // What could make this wrong
|
|
|
|
|
strengthIndicators: string[]; // What makes this recommendation strong
|
|
|
|
|
semanticRelevance: number; // How well tool description matches query (from embeddings)
|
|
|
|
|
taskSuitability: number; // AI-determined fitness for this specific task
|
|
|
|
|
methodologicalConsistency: number; // How well different analysis steps agree
|
|
|
|
|
toolReliability: number; // Indicators of tool quality and maintenance
|
|
|
|
|
uncertaintyFactors: string[]; // Specific reasons why this might not work
|
|
|
|
|
strengthIndicators: string[]; // Specific reasons why this is a good choice
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
private config: AIConfig;
|
|
|
|
|
private maxSelectedItems: number;
|
|
|
|
|
@@ -105,10 +103,10 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
private confidenceConfig: {
|
|
|
|
|
embeddingsWeight: number;
|
|
|
|
|
consensusWeight: number;
|
|
|
|
|
domainMatchWeight: number;
|
|
|
|
|
freshnessWeight: number;
|
|
|
|
|
semanticWeight: number; // Weight for embeddings similarity
|
|
|
|
|
suitabilityWeight: number; // Weight for AI task fit evaluation
|
|
|
|
|
consistencyWeight: number; // Weight for cross-validation agreement
|
|
|
|
|
reliabilityWeight: number; // Weight for tool quality indicators
|
|
|
|
|
minimumThreshold: number;
|
|
|
|
|
mediumThreshold: number;
|
|
|
|
|
highThreshold: number;
|
|
|
|
|
@@ -146,25 +144,19 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
console.log('[AI PIPELINE] Configuration loaded:', {
|
|
|
|
|
embeddingCandidates: this.embeddingCandidates,
|
|
|
|
|
embeddingSelection: `${this.embeddingSelectionLimit} tools, ${this.embeddingConceptsLimit} concepts`,
|
|
|
|
|
noEmbeddingsLimits: `${this.noEmbeddingsToolLimit || 'unlimited'} tools, ${this.noEmbeddingsConceptLimit || 'unlimited'} concepts`,
|
|
|
|
|
auditEnabled: this.auditConfig.enabled
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Updated confidence weights - more focused on AI evaluation
|
|
|
|
|
this.confidenceConfig = {
|
|
|
|
|
embeddingsWeight: parseFloat(process.env.CONFIDENCE_EMBEDDINGS_WEIGHT || '0.3'),
|
|
|
|
|
consensusWeight: parseFloat(process.env.CONFIDENCE_CONSENSUS_WEIGHT || '0.25'),
|
|
|
|
|
domainMatchWeight: parseFloat(process.env.CONFIDENCE_DOMAIN_MATCH_WEIGHT || '0.25'),
|
|
|
|
|
freshnessWeight: parseFloat(process.env.CONFIDENCE_FRESHNESS_WEIGHT || '0.2'),
|
|
|
|
|
semanticWeight: parseFloat(process.env.CONFIDENCE_SEMANTIC_WEIGHT || '0.25'), // Embeddings similarity
|
|
|
|
|
suitabilityWeight: parseFloat(process.env.CONFIDENCE_SUITABILITY_WEIGHT || '0.4'), // AI task fit evaluation
|
|
|
|
|
consistencyWeight: parseFloat(process.env.CONFIDENCE_CONSISTENCY_WEIGHT || '0.2'), // Cross-validation agreement
|
|
|
|
|
reliabilityWeight: parseFloat(process.env.CONFIDENCE_RELIABILITY_WEIGHT || '0.15'), // Tool quality indicators
|
|
|
|
|
minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
|
|
|
|
|
mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10),
|
|
|
|
|
highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10)
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
console.log('[AI PIPELINE] Confidence scoring enabled:', {
|
|
|
|
|
weights: `E:${this.confidenceConfig.embeddingsWeight} C:${this.confidenceConfig.consensusWeight} D:${this.confidenceConfig.domainMatchWeight} F:${this.confidenceConfig.freshnessWeight}`,
|
|
|
|
|
console.log('[AI PIPELINE] Enhanced confidence scoring enabled:', {
|
|
|
|
|
weights: `Semantic:${this.confidenceConfig.semanticWeight} Suitability:${this.confidenceConfig.suitabilityWeight} Consistency:${this.confidenceConfig.consistencyWeight} Reliability:${this.confidenceConfig.reliabilityWeight}`,
|
|
|
|
|
thresholds: `${this.confidenceConfig.minimumThreshold}/${this.confidenceConfig.mediumThreshold}/${this.confidenceConfig.highThreshold}`
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
@@ -247,8 +239,8 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
let confidence = 60; // Base confidence
|
|
|
|
|
|
|
|
|
|
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
|
|
|
|
|
else if (selectionRatio <= 0.05) confidence -= 10; // Too few
|
|
|
|
|
else confidence -= 15; // Too many
|
|
|
|
|
else if (selectionRatio <= 0.05) confidence -= 10;
|
|
|
|
|
else confidence -= 15;
|
|
|
|
|
|
|
|
|
|
if (hasReasoning) confidence += 15;
|
|
|
|
|
|
|
|
|
|
@@ -357,7 +349,7 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
const possibleTools = toolMatches
|
|
|
|
|
.map(match => match.replace(/"/g, ''))
|
|
|
|
|
.filter(name => name.length > 2 && !['selectedTools', 'selectedConcepts', 'reasoning'].includes(name))
|
|
|
|
|
.slice(0, 15); // Reasonable limit
|
|
|
|
|
.slice(0, 15);
|
|
|
|
|
|
|
|
|
|
if (possibleTools.length > 0) {
|
|
|
|
|
console.log(`[AI PIPELINE] Recovered ${possibleTools.length} possible tool names from broken JSON`);
|
|
|
|
|
@@ -374,7 +366,7 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
|
|
|
|
|
private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string, taskRelevance?: number, limitations?: string[]): boolean {
|
|
|
|
|
context.seenToolNames.add(tool.name);
|
|
|
|
|
if (!context.selectedTools) context.selectedTools = [];
|
|
|
|
|
|
|
|
|
|
@@ -382,18 +374,22 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
tool,
|
|
|
|
|
phase,
|
|
|
|
|
priority,
|
|
|
|
|
justification
|
|
|
|
|
justification,
|
|
|
|
|
taskRelevance,
|
|
|
|
|
limitations
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
|
|
|
|
|
private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string, context: AnalysisContext) {
|
|
|
|
|
let candidateTools: any[] = [];
|
|
|
|
|
let candidateConcepts: any[] = [];
|
|
|
|
|
let selectionMethod = 'unknown';
|
|
|
|
|
|
|
|
|
|
// WAIT for embeddings initialization if embeddings are enabled
|
|
|
|
|
// Initialize embeddings similarities storage
|
|
|
|
|
context.embeddingsSimilarities = new Map<string, number>();
|
|
|
|
|
|
|
|
|
|
if (process.env.AI_EMBEDDINGS_ENABLED === 'true') {
|
|
|
|
|
try {
|
|
|
|
|
console.log('[AI PIPELINE] Waiting for embeddings initialization...');
|
|
|
|
|
@@ -414,6 +410,11 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
|
|
|
|
|
console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
|
|
|
|
|
|
|
|
|
|
// Store actual similarity scores for confidence calculation
|
|
|
|
|
similarItems.forEach(item => {
|
|
|
|
|
context.embeddingsSimilarities.set(item.name, item.similarity);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
|
|
|
|
|
const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
|
|
|
|
|
|
|
|
|
|
@@ -450,7 +451,7 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (this.auditConfig.enabled) {
|
|
|
|
|
this.addAuditEntry(null, 'retrieval', 'embeddings-search',
|
|
|
|
|
this.addAuditEntry(context, 'retrieval', 'embeddings-search',
|
|
|
|
|
{ query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates },
|
|
|
|
|
{
|
|
|
|
|
candidatesFound: similarItems.length,
|
|
|
|
|
@@ -459,7 +460,8 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
reductionRatio: reductionRatio,
|
|
|
|
|
usingEmbeddings: selectionMethod === 'embeddings_candidates',
|
|
|
|
|
totalAvailable: totalAvailableTools,
|
|
|
|
|
filtered: similarTools.length
|
|
|
|
|
filtered: similarTools.length,
|
|
|
|
|
avgSimilarity: similarItems.length > 0 ? similarItems.reduce((sum, item) => sum + item.similarity, 0) / similarItems.length : 0
|
|
|
|
|
},
|
|
|
|
|
selectionMethod === 'embeddings_candidates' ? 85 : 60,
|
|
|
|
|
embeddingsStart,
|
|
|
|
|
@@ -479,7 +481,7 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
console.log(`[AI PIPELINE] AI will analyze ${candidateTools.length} candidate tools (method: ${selectionMethod})`);
|
|
|
|
|
const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
|
|
|
|
|
const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod, context);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
tools: finalSelection.selectedTools,
|
|
|
|
|
@@ -495,7 +497,8 @@ class ImprovedMicroTaskAIPipeline {
|
|
|
|
|
candidateTools: any[],
|
|
|
|
|
candidateConcepts: any[],
|
|
|
|
|
mode: string,
|
|
|
|
|
selectionMethod: string
|
|
|
|
|
selectionMethod: string,
|
|
|
|
|
context: AnalysisContext
|
|
|
|
|
) {
|
|
|
|
|
const selectionStart = Date.now();
|
|
|
|
|
|
|
|
|
|
@@ -576,7 +579,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
console.error('[AI PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
|
|
|
|
|
|
|
|
|
|
if (this.auditConfig.enabled) {
|
|
|
|
|
this.addAuditEntry(null, 'selection', 'ai-tool-selection-failed',
|
|
|
|
|
this.addAuditEntry(context, 'selection', 'ai-tool-selection-failed',
|
|
|
|
|
{ candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
|
|
|
|
|
{ error: 'Invalid JSON structure', response: response.slice(0, 200) },
|
|
|
|
|
10,
|
|
|
|
|
@@ -602,7 +605,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
if (this.auditConfig.enabled) {
|
|
|
|
|
const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
|
|
|
|
|
|
|
|
|
|
this.addAuditEntry(null, 'selection', 'ai-tool-selection',
|
|
|
|
|
this.addAuditEntry(context, 'selection', 'ai-tool-selection',
|
|
|
|
|
{ candidateCount: candidateTools.length, mode, promptLength: prompt.length },
|
|
|
|
|
{
|
|
|
|
|
selectedToolCount: result.selectedTools.length,
|
|
|
|
|
@@ -626,7 +629,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
console.error('[AI PIPELINE] AI selection failed:', error);
|
|
|
|
|
|
|
|
|
|
if (this.auditConfig.enabled) {
|
|
|
|
|
this.addAuditEntry(null, 'selection', 'ai-tool-selection-error',
|
|
|
|
|
this.addAuditEntry(context, 'selection', 'ai-tool-selection-error',
|
|
|
|
|
{ candidateCount: candidateTools.length, mode },
|
|
|
|
|
{ error: error.message },
|
|
|
|
|
5,
|
|
|
|
|
@@ -700,38 +703,225 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
|
|
|
|
|
private calculateRecommendationConfidence(
|
|
|
|
|
tool: any,
|
|
|
|
|
embeddingsSimilarity: number,
|
|
|
|
|
domainMatch: boolean,
|
|
|
|
|
microTaskAgreement: number,
|
|
|
|
|
context: AnalysisContext
|
|
|
|
|
context: AnalysisContext,
|
|
|
|
|
taskRelevance: number = 70,
|
|
|
|
|
limitations: string[] = []
|
|
|
|
|
): ConfidenceMetrics {
|
|
|
|
|
|
|
|
|
|
const embeddingsQuality = Math.min(100, embeddingsSimilarity * 100 * 2); // Scale 0.5 similarity to 100%
|
|
|
|
|
const domainAlignment = domainMatch ? 90 : (tool.domains?.length > 0 ? 60 : 30);
|
|
|
|
|
const consensus = Math.min(100, microTaskAgreement * 100);
|
|
|
|
|
const freshness = this.calculateToolFreshness(tool);
|
|
|
|
|
// 1. Semantic Relevance: Real embeddings similarity score
|
|
|
|
|
const semanticRelevance = context.embeddingsSimilarities.has(tool.name) ?
|
|
|
|
|
Math.round(context.embeddingsSimilarities.get(tool.name)! * 100) : 50;
|
|
|
|
|
|
|
|
|
|
// 2. Task Suitability: AI-determined fitness for specific task
|
|
|
|
|
const taskSuitability = Math.round(taskRelevance);
|
|
|
|
|
|
|
|
|
|
// 3. Methodological Consistency: Cross-validation between micro-tasks
|
|
|
|
|
const methodologicalConsistency = this.calculateCrossValidationScore(tool.name, context);
|
|
|
|
|
|
|
|
|
|
// 4. Tool Reliability: Quality indicators
|
|
|
|
|
const toolReliability = this.calculateToolReliability(tool);
|
|
|
|
|
|
|
|
|
|
// Debug logging
|
|
|
|
|
console.log(`[CONFIDENCE DEBUG] ${tool.name}:`, {
|
|
|
|
|
semantic: semanticRelevance,
|
|
|
|
|
taskSuitability: taskSuitability,
|
|
|
|
|
consistency: methodologicalConsistency,
|
|
|
|
|
reliability: toolReliability,
|
|
|
|
|
hasEmbeddingsSimilarity: context.embeddingsSimilarities.has(tool.name),
|
|
|
|
|
rawTaskRelevance: taskRelevance
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Calculate weighted overall score
|
|
|
|
|
const overall = (
|
|
|
|
|
embeddingsQuality * this.confidenceConfig.embeddingsWeight +
|
|
|
|
|
domainAlignment * this.confidenceConfig.domainMatchWeight +
|
|
|
|
|
consensus * this.confidenceConfig.consensusWeight +
|
|
|
|
|
freshness * this.confidenceConfig.freshnessWeight
|
|
|
|
|
semanticRelevance * this.confidenceConfig.semanticWeight +
|
|
|
|
|
taskSuitability * this.confidenceConfig.suitabilityWeight +
|
|
|
|
|
methodologicalConsistency * this.confidenceConfig.consistencyWeight +
|
|
|
|
|
toolReliability * this.confidenceConfig.reliabilityWeight
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, overall);
|
|
|
|
|
const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall);
|
|
|
|
|
const uncertaintyFactors = this.identifySpecificUncertaintyFactors(tool, context, limitations, overall);
|
|
|
|
|
const strengthIndicators = this.identifySpecificStrengthIndicators(tool, context, overall);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
overall: Math.round(overall),
|
|
|
|
|
embeddingsQuality: Math.round(embeddingsQuality),
|
|
|
|
|
domainAlignment: Math.round(domainAlignment),
|
|
|
|
|
consensus: Math.round(consensus),
|
|
|
|
|
freshness: Math.round(freshness),
|
|
|
|
|
semanticRelevance: Math.round(semanticRelevance),
|
|
|
|
|
taskSuitability: Math.round(taskSuitability),
|
|
|
|
|
methodologicalConsistency: Math.round(methodologicalConsistency),
|
|
|
|
|
toolReliability: Math.round(toolReliability),
|
|
|
|
|
uncertaintyFactors,
|
|
|
|
|
strengthIndicators
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private calculateCrossValidationScore(toolName: string, context: AnalysisContext): number {
|
|
|
|
|
// Look for entries where this tool was mentioned across different phases
|
|
|
|
|
const relevantEntries = context.auditTrail.filter(entry =>
|
|
|
|
|
entry.phase === 'micro-task' || entry.phase === 'selection'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
let toolMentions = 0;
|
|
|
|
|
let positiveEvaluations = 0;
|
|
|
|
|
let confidenceSum = 0;
|
|
|
|
|
|
|
|
|
|
relevantEntries.forEach(entry => {
|
|
|
|
|
let toolFound = false;
|
|
|
|
|
|
|
|
|
|
// Check various ways the tool might be referenced in output
|
|
|
|
|
if (entry.output && typeof entry.output === 'object') {
|
|
|
|
|
// Check selectedTools arrays
|
|
|
|
|
if (Array.isArray(entry.output.selectedTools) &&
|
|
|
|
|
entry.output.selectedTools.includes(toolName)) {
|
|
|
|
|
toolFound = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check finalToolNames arrays
|
|
|
|
|
if (Array.isArray(entry.output.finalToolNames) &&
|
|
|
|
|
entry.output.finalToolNames.includes(toolName)) {
|
|
|
|
|
toolFound = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check toolName in individual evaluation
|
|
|
|
|
if (entry.output.toolName === toolName) {
|
|
|
|
|
toolFound = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (toolFound) {
|
|
|
|
|
toolMentions++;
|
|
|
|
|
confidenceSum += entry.confidence;
|
|
|
|
|
|
|
|
|
|
// Consider it positive if confidence >= 60
|
|
|
|
|
if (entry.confidence >= 60) {
|
|
|
|
|
positiveEvaluations++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
console.log(`[AI PIPELINE] Cross-validation for ${toolName}: ${toolMentions} mentions, ${positiveEvaluations} positive, avg confidence: ${toolMentions > 0 ? Math.round(confidenceSum / toolMentions) : 0}`);
|
|
|
|
|
|
|
|
|
|
if (toolMentions === 0) {
|
|
|
|
|
return 60; // Default when no cross-validation data available
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (toolMentions === 1) {
|
|
|
|
|
// Single mention - use confidence directly but cap it
|
|
|
|
|
return Math.min(85, Math.max(40, confidenceSum));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Multiple mentions - calculate agreement ratio
|
|
|
|
|
const agreementRatio = positiveEvaluations / toolMentions;
|
|
|
|
|
const avgConfidence = confidenceSum / toolMentions;
|
|
|
|
|
|
|
|
|
|
// Combine agreement ratio with average confidence
|
|
|
|
|
const crossValidationScore = (agreementRatio * 0.7 + (avgConfidence / 100) * 0.3) * 100;
|
|
|
|
|
|
|
|
|
|
return Math.round(Math.min(95, Math.max(30, crossValidationScore)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NEW: Calculate tool reliability based on objective indicators
|
|
|
|
|
private calculateToolReliability(tool: any): number {
|
|
|
|
|
let reliability = 50; // Base score
|
|
|
|
|
|
|
|
|
|
// Documentation availability
|
|
|
|
|
if (tool.knowledgebase === true) reliability += 25;
|
|
|
|
|
|
|
|
|
|
// Active maintenance (hosted tools are typically maintained)
|
|
|
|
|
if (isToolHosted(tool)) reliability += 20;
|
|
|
|
|
|
|
|
|
|
// Community support (open source often has community)
|
|
|
|
|
if (tool.license && tool.license !== 'Proprietary') reliability += 10;
|
|
|
|
|
|
|
|
|
|
// Skill level appropriateness (not too complex, not too simple)
|
|
|
|
|
if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') reliability += 10;
|
|
|
|
|
else if (tool.skillLevel === 'expert') reliability -= 5; // May be overcomplicated
|
|
|
|
|
|
|
|
|
|
// Multi-platform support (more versatile)
|
|
|
|
|
if (tool.platforms && tool.platforms.length > 1) reliability += 5;
|
|
|
|
|
|
|
|
|
|
return Math.min(100, reliability);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NEW: Identify specific uncertainty factors based on analysis
|
|
|
|
|
private identifySpecificUncertaintyFactors(tool: any, context: AnalysisContext, limitations: string[], confidence: number): string[] {
|
|
|
|
|
const factors: string[] = [];
|
|
|
|
|
|
|
|
|
|
// Add AI-identified limitations
|
|
|
|
|
if (limitations && limitations.length > 0) {
|
|
|
|
|
factors.push(...limitations.slice(0, 3)); // Limit to top 3
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Low semantic similarity
|
|
|
|
|
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
|
|
|
|
|
if (similarity < 0.4) {
|
|
|
|
|
factors.push('Geringe semantische Ähnlichkeit zur Anfrage - Tool-Beschreibung passt möglicherweise nicht optimal');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skill level mismatch
|
|
|
|
|
if (tool.skillLevel === 'expert' && /schnell|rapid|triage|urgent/i.test(context.userQuery)) {
|
|
|
|
|
factors.push('Experten-Tool für Eilszenario - möglicherweise zu komplex für schnelle Antworten');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tool.skillLevel === 'novice' && /komplex|erweitert|tiefgehend|advanced/i.test(context.userQuery)) {
|
|
|
|
|
factors.push('Einsteiger-Tool für komplexes Szenario - könnte funktionale Einschränkungen haben');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Access limitations
|
|
|
|
|
if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') {
|
|
|
|
|
factors.push('Installation erforderlich - nicht sofort verfügbar ohne Setup');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Cross-validation disagreement
|
|
|
|
|
const crossValidation = this.calculateCrossValidationScore(tool.name, context);
|
|
|
|
|
if (crossValidation < 50) {
|
|
|
|
|
factors.push('Uneinheitliche Bewertung in verschiedenen Analyseschritten - Empfehlung nicht eindeutig');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return factors.slice(0, 4); // Limit to 4 most important factors
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// NEW: Identify specific strength indicators
|
|
|
|
|
private identifySpecificStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
|
|
|
|
|
const indicators: string[] = [];
|
|
|
|
|
|
|
|
|
|
// High confidence overall
|
|
|
|
|
if (confidence >= this.confidenceConfig.highThreshold) {
|
|
|
|
|
indicators.push('Hohe Gesamtbewertung durch mehrfache Validierung');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// High semantic similarity
|
|
|
|
|
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
|
|
|
|
|
if (similarity >= 0.7) {
|
|
|
|
|
indicators.push('Sehr gute semantische Übereinstimmung mit Ihrer Anfrage');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Strong cross-validation
|
|
|
|
|
const crossValidation = this.calculateCrossValidationScore(tool.name, context);
|
|
|
|
|
if (crossValidation >= 80) {
|
|
|
|
|
indicators.push('Konsistente Empfehlung über verschiedene Analyseschritte hinweg');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Quality indicators
|
|
|
|
|
if (tool.knowledgebase === true) {
|
|
|
|
|
indicators.push('Umfassende Dokumentation und Wissensbasis verfügbar');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isToolHosted(tool)) {
|
|
|
|
|
indicators.push('Sofort verfügbar über gehostete Lösung - kein Setup erforderlich');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Skill level match
|
|
|
|
|
if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') {
|
|
|
|
|
indicators.push('Ausgewogenes Verhältnis zwischen Funktionalität und Benutzerfreundlichkeit');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Method alignment
|
|
|
|
|
if (tool.type === 'method' && /methodik|vorgehen|prozess|ansatz/i.test(context.userQuery)) {
|
|
|
|
|
indicators.push('Methodischer Ansatz passt zu Ihrer prozeduralen Anfrage');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return indicators.slice(0, 4); // Limit to 4 most important indicators
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
|
|
|
|
|
const isWorkflow = context.mode === 'workflow';
|
|
|
|
|
const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
|
|
|
|
|
@@ -833,27 +1023,49 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
if (result.success) {
|
|
|
|
|
const evaluation = this.safeParseJSON(result.content, {
|
|
|
|
|
suitability_score: 'medium',
|
|
|
|
|
task_relevance: '',
|
|
|
|
|
detailed_explanation: 'Evaluation failed',
|
|
|
|
|
implementation_approach: '',
|
|
|
|
|
pros: [],
|
|
|
|
|
cons: [],
|
|
|
|
|
limitations: [],
|
|
|
|
|
alternatives: ''
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Debug logging to see what we're getting
|
|
|
|
|
console.log(`[AI PIPELINE] Tool ${tool.name} evaluation:`, {
|
|
|
|
|
taskRelevance: evaluation.task_relevance,
|
|
|
|
|
suitabilityScore: evaluation.suitability_score,
|
|
|
|
|
limitationsCount: evaluation.limitations?.length || 0
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Ensure task_relevance is a number
|
|
|
|
|
const taskRelevance = typeof evaluation.task_relevance === 'number' ?
|
|
|
|
|
evaluation.task_relevance :
|
|
|
|
|
parseInt(String(evaluation.task_relevance)) || 70;
|
|
|
|
|
|
|
|
|
|
// Store enhanced evaluation data
|
|
|
|
|
this.addToolToSelection(context, {
|
|
|
|
|
...tool,
|
|
|
|
|
evaluation: {
|
|
|
|
|
...evaluation,
|
|
|
|
|
task_relevance: taskRelevance, // Ensure it's stored as number
|
|
|
|
|
rank
|
|
|
|
|
}
|
|
|
|
|
}, 'evaluation', evaluation.suitability_score);
|
|
|
|
|
}, 'evaluation', evaluation.suitability_score, evaluation.detailed_explanation,
|
|
|
|
|
taskRelevance, evaluation.limitations);
|
|
|
|
|
|
|
|
|
|
this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
|
|
|
|
|
{ toolName: tool.name, rank },
|
|
|
|
|
{ suitabilityScore: evaluation.suitability_score, hasExplanation: !!evaluation.detailed_explanation },
|
|
|
|
|
{
|
|
|
|
|
suitabilityScore: evaluation.suitability_score,
|
|
|
|
|
taskRelevance: taskRelevance, // Use the cleaned number
|
|
|
|
|
hasExplanation: !!evaluation.detailed_explanation,
|
|
|
|
|
limitationsIdentified: evaluation.limitations?.length || 0
|
|
|
|
|
},
|
|
|
|
|
evaluation.suitability_score === 'high' ? 85 : evaluation.suitability_score === 'medium' ? 70 : 50,
|
|
|
|
|
Date.now() - result.processingTimeMs,
|
|
|
|
|
{ toolType: tool.type }
|
|
|
|
|
{ toolType: tool.type, taskRelevanceScore: taskRelevance }
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
@@ -963,28 +1175,31 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
|
|
|
|
|
async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
|
|
|
|
|
const startTime = Date.now();
|
|
|
|
|
let completedTasks = 0;
|
|
|
|
|
let completeTasks = 0;
|
|
|
|
|
let failedTasks = 0;
|
|
|
|
|
|
|
|
|
|
this.tempAuditEntries = [];
|
|
|
|
|
|
|
|
|
|
console.log(`[AI PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
|
|
|
|
|
console.log(`[AI PIPELINE] Starting ${mode} query processing with enhanced confidence scoring`);
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const toolsData = await getCompressedToolsDataForAI();
|
|
|
|
|
const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
|
|
|
|
|
|
|
|
|
|
const context: AnalysisContext = {
|
|
|
|
|
userQuery,
|
|
|
|
|
mode,
|
|
|
|
|
filteredData,
|
|
|
|
|
filteredData: {}, // Will be populated by getIntelligentCandidates
|
|
|
|
|
contextHistory: [],
|
|
|
|
|
maxContextLength: this.maxContextTokens,
|
|
|
|
|
currentContextLength: 0,
|
|
|
|
|
seenToolNames: new Set<string>(),
|
|
|
|
|
auditTrail: []
|
|
|
|
|
auditTrail: [],
|
|
|
|
|
embeddingsSimilarities: new Map<string, number>()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode, context);
|
|
|
|
|
context.filteredData = filteredData;
|
|
|
|
|
|
|
|
|
|
this.mergeTemporaryAuditEntries(context);
|
|
|
|
|
|
|
|
|
|
console.log(`[AI PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
|
|
|
|
|
@@ -994,58 +1209,54 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
{ candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length },
|
|
|
|
|
90,
|
|
|
|
|
startTime,
|
|
|
|
|
{ auditEnabled: this.auditConfig.enabled }
|
|
|
|
|
{ auditEnabled: this.auditConfig.enabled, confidenceScoringEnabled: true }
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// MICRO-TASK SEQUENCE
|
|
|
|
|
// MICRO-TASK SEQUENCE WITH ENHANCED CONFIDENCE TRACKING
|
|
|
|
|
|
|
|
|
|
// Task 1: Scenario/Problem Analysis
|
|
|
|
|
const analysisResult = await this.analyzeScenario(context);
|
|
|
|
|
if (analysisResult.success) completedTasks++; else failedTasks++;
|
|
|
|
|
if (analysisResult.success) completeTasks++; else failedTasks++;
|
|
|
|
|
await this.delay(this.microTaskDelay);
|
|
|
|
|
|
|
|
|
|
// Task 2: Investigation/Solution Approach
|
|
|
|
|
const approachResult = await this.generateApproach(context);
|
|
|
|
|
if (approachResult.success) completedTasks++; else failedTasks++;
|
|
|
|
|
if (approachResult.success) completeTasks++; else failedTasks++;
|
|
|
|
|
await this.delay(this.microTaskDelay);
|
|
|
|
|
|
|
|
|
|
// Task 3: Critical Considerations
|
|
|
|
|
const considerationsResult = await this.generateCriticalConsiderations(context);
|
|
|
|
|
if (considerationsResult.success) completedTasks++; else failedTasks++;
|
|
|
|
|
if (considerationsResult.success) completeTasks++; else failedTasks++;
|
|
|
|
|
await this.delay(this.microTaskDelay);
|
|
|
|
|
|
|
|
|
|
// Task 4: Tool Selection/Evaluation (mode-dependent)
|
|
|
|
|
if (mode === 'workflow') {
|
|
|
|
|
const phases = toolsData.phases || [];
|
|
|
|
|
for (const phase of phases) {
|
|
|
|
|
const toolSelectionResult = await this.selectToolsForPhase(context, phase);
|
|
|
|
|
if (toolSelectionResult.success) completedTasks++; else failedTasks++;
|
|
|
|
|
if (toolSelectionResult.success) completeTasks++; else failedTasks++;
|
|
|
|
|
await this.delay(this.microTaskDelay);
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
const topTools = filteredData.tools.slice(0, 3);
|
|
|
|
|
for (let i = 0; i < topTools.length; i++) {
|
|
|
|
|
const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
|
|
|
|
|
if (evaluationResult.success) completedTasks++; else failedTasks++;
|
|
|
|
|
if (evaluationResult.success) completeTasks++; else failedTasks++;
|
|
|
|
|
await this.delay(this.microTaskDelay);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const knowledgeResult = await this.selectBackgroundKnowledge(context);
|
|
|
|
|
if (knowledgeResult.success) completedTasks++; else failedTasks++;
|
|
|
|
|
if (knowledgeResult.success) completeTasks++; else failedTasks++;
|
|
|
|
|
await this.delay(this.microTaskDelay);
|
|
|
|
|
|
|
|
|
|
const finalResult = await this.generateFinalRecommendations(context);
|
|
|
|
|
if (finalResult.success) completedTasks++; else failedTasks++;
|
|
|
|
|
if (finalResult.success) completeTasks++; else failedTasks++;
|
|
|
|
|
|
|
|
|
|
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
|
|
|
|
|
|
|
|
|
|
this.addAuditEntry(context, 'completion', 'pipeline-end',
|
|
|
|
|
{ completedTasks, failedTasks },
|
|
|
|
|
{ completedTasks: completeTasks, failedTasks },
|
|
|
|
|
{ finalRecommendation: !!recommendation, auditEntriesGenerated: context.auditTrail.length },
|
|
|
|
|
completedTasks > failedTasks ? 85 : 60,
|
|
|
|
|
completeTasks > failedTasks ? 85 : 60,
|
|
|
|
|
startTime,
|
|
|
|
|
{ totalProcessingTimeMs: Date.now() - startTime }
|
|
|
|
|
{ totalProcessingTimeMs: Date.now() - startTime, confidenceScoresGenerated: context.selectedTools?.length || 0 }
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const processingStats = {
|
|
|
|
|
@@ -1054,13 +1265,13 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
finalSelectedItems: (context.selectedTools?.length || 0) +
|
|
|
|
|
(context.backgroundKnowledge?.length || 0),
|
|
|
|
|
processingTimeMs: Date.now() - startTime,
|
|
|
|
|
microTasksCompleted: completedTasks,
|
|
|
|
|
microTasksCompleted: completeTasks,
|
|
|
|
|
microTasksFailed: failedTasks,
|
|
|
|
|
contextContinuityUsed: true
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
console.log(`[AI PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
|
|
|
|
|
console.log(`[AI PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
|
|
|
|
|
console.log(`[AI PIPELINE] Completed: ${completeTasks} tasks, Failed: ${failedTasks} tasks`);
|
|
|
|
|
console.log(`[AI PIPELINE] Enhanced confidence scores generated: ${context.selectedTools?.length || 0}`);
|
|
|
|
|
console.log(`[AI PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
@@ -1080,128 +1291,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private calculateToolFreshness(tool: any): number {
|
|
|
|
|
// Base freshness score
|
|
|
|
|
let freshness = 70; // Default for tools without specific freshness data
|
|
|
|
|
|
|
|
|
|
// Boost for tools with knowledge base (more maintained)
|
|
|
|
|
if (tool.knowledgebase === true) freshness += 20;
|
|
|
|
|
|
|
|
|
|
// Boost for hosted tools (actively maintained)
|
|
|
|
|
if (isToolHosted(tool)) freshness += 15;
|
|
|
|
|
|
|
|
|
|
// Slight boost for open source (community maintained)
|
|
|
|
|
if (tool.license && tool.license !== 'Proprietary') freshness += 5;
|
|
|
|
|
|
|
|
|
|
return Math.min(100, freshness);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private checkDomainMatch(tool: any, userQuery: string): boolean {
|
|
|
|
|
if (!tool.domains || tool.domains.length === 0) return false;
|
|
|
|
|
|
|
|
|
|
const queryLower = userQuery.toLowerCase();
|
|
|
|
|
|
|
|
|
|
// Load domain keywords from environment with fallback
|
|
|
|
|
const domainKeywordsEnv = process.env.CONFIDENCE_DOMAIN_KEYWORDS ||
|
|
|
|
|
'incident-response:incident,breach,attack,compromise,response|malware-analysis:malware,virus,trojan,reverse,analysis|network-forensics:network,traffic,packet,pcap,wireshark|mobile-forensics:mobile,android,ios,phone,app|cloud-forensics:cloud,aws,azure,saas,paas';
|
|
|
|
|
|
|
|
|
|
const domainKeywords = domainKeywordsEnv.split('|').reduce((acc, pair) => {
|
|
|
|
|
const [domain, keywords] = pair.split(':');
|
|
|
|
|
if (domain && keywords) {
|
|
|
|
|
acc[domain] = keywords.split(',');
|
|
|
|
|
}
|
|
|
|
|
return acc;
|
|
|
|
|
}, {});
|
|
|
|
|
|
|
|
|
|
return tool.domains.some(domain => {
|
|
|
|
|
const keywords = domainKeywords[domain] || [domain.replace('-', ' ')];
|
|
|
|
|
return keywords.some(keyword => queryLower.includes(keyword));
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private getMicroTaskAgreement(toolName: string, context: AnalysisContext): number {
|
|
|
|
|
// Check how many micro-tasks selected this tool
|
|
|
|
|
const microTaskEntries = context.auditTrail.filter(entry =>
|
|
|
|
|
entry.phase === 'micro-task' &&
|
|
|
|
|
entry.action.includes('selection') &&
|
|
|
|
|
entry.output &&
|
|
|
|
|
typeof entry.output === 'object' &&
|
|
|
|
|
Array.isArray(entry.output.selectedTools) &&
|
|
|
|
|
entry.output.selectedTools.includes(toolName)
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const totalMicroTasks = context.auditTrail.filter(entry =>
|
|
|
|
|
entry.phase === 'micro-task' && entry.action.includes('selection')
|
|
|
|
|
).length;
|
|
|
|
|
|
|
|
|
|
return totalMicroTasks > 0 ? microTaskEntries.length / totalMicroTasks : 0.8; // Default high agreement
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private getEmbeddingsSimilarity(toolName: string, context: AnalysisContext): number {
|
|
|
|
|
// Extract similarity from audit trail embeddings entry
|
|
|
|
|
const embeddingsEntry = context.auditTrail.find(entry =>
|
|
|
|
|
entry.phase === 'retrieval' && entry.action === 'embeddings-search'
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (!embeddingsEntry || !embeddingsEntry.output) return 0.5; // Default medium similarity
|
|
|
|
|
|
|
|
|
|
// Look for similarity data in the output (implementation specific)
|
|
|
|
|
// This would need to be populated during embeddings search
|
|
|
|
|
return 0.7; // Placeholder - would need actual similarity data from embeddings
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private identifyUncertaintyFactors(tool: any, context: AnalysisContext, confidence: number): string[] {
|
|
|
|
|
const factors: string[] = [];
|
|
|
|
|
|
|
|
|
|
if (confidence < this.confidenceConfig.mediumThreshold) {
|
|
|
|
|
factors.push('Low overall confidence - consider manual validation');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!this.checkDomainMatch(tool, context.userQuery)) {
|
|
|
|
|
factors.push('Domain mismatch detected - tool may not be specifically designed for this scenario');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tool.skillLevel === 'expert' && /rapid|quick|urgent|triage/i.test(context.userQuery)) {
|
|
|
|
|
factors.push('Expert-level tool for rapid scenario - may be overcomplicated');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tool.type === 'software' && !isToolHosted(tool) && !tool.url) {
|
|
|
|
|
factors.push('Limited access information - availability uncertain');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tool.skillLevel === 'novice' && /complex|advanced|deep/i.test(context.userQuery)) {
|
|
|
|
|
factors.push('Novice-level tool for complex scenario - may lack required capabilities');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return factors;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
|
|
|
|
|
const indicators: string[] = [];
|
|
|
|
|
|
|
|
|
|
if (confidence >= this.confidenceConfig.highThreshold) {
|
|
|
|
|
indicators.push('High confidence recommendation based on multiple factors');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (this.checkDomainMatch(tool, context.userQuery)) {
|
|
|
|
|
indicators.push('Strong domain alignment with scenario requirements');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tool.knowledgebase === true) {
|
|
|
|
|
indicators.push('Documentation and knowledge base available for guidance');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (isToolHosted(tool)) {
|
|
|
|
|
indicators.push('Hosted solution available for immediate access');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tool.type === 'method' && /methodology|approach|process/i.test(context.userQuery)) {
|
|
|
|
|
indicators.push('Methodological approach matches procedural inquiry');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return indicators;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
|
|
|
|
|
const isWorkflow = mode === 'workflow';
|
|
|
|
|
|
|
|
|
|
@@ -1218,13 +1307,12 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
|
|
|
|
|
if (isWorkflow) {
|
|
|
|
|
const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
|
|
|
|
|
// Calculate confidence for each tool
|
|
|
|
|
// Calculate enhanced confidence for each tool
|
|
|
|
|
const confidence = this.calculateRecommendationConfidence(
|
|
|
|
|
st.tool,
|
|
|
|
|
this.getEmbeddingsSimilarity(st.tool.name, context),
|
|
|
|
|
this.checkDomainMatch(st.tool, context.userQuery),
|
|
|
|
|
this.getMicroTaskAgreement(st.tool.name, context),
|
|
|
|
|
context
|
|
|
|
|
context,
|
|
|
|
|
st.taskRelevance || 70,
|
|
|
|
|
st.limitations || []
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Add audit entry for confidence calculation
|
|
|
|
|
@@ -1233,15 +1321,15 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
{
|
|
|
|
|
overall: confidence.overall,
|
|
|
|
|
components: {
|
|
|
|
|
embeddings: confidence.embeddingsQuality,
|
|
|
|
|
domain: confidence.domainAlignment,
|
|
|
|
|
consensus: confidence.consensus,
|
|
|
|
|
freshness: confidence.freshness
|
|
|
|
|
semantic: confidence.semanticRelevance,
|
|
|
|
|
suitability: confidence.taskSuitability,
|
|
|
|
|
consistency: confidence.methodologicalConsistency,
|
|
|
|
|
reliability: confidence.toolReliability
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
confidence.overall,
|
|
|
|
|
Date.now(),
|
|
|
|
|
{ uncertaintyCount: confidence.uncertaintyFactors.length }
|
|
|
|
|
{ uncertaintyCount: confidence.uncertaintyFactors.length, strengthCount: confidence.strengthIndicators.length }
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
@@ -1264,10 +1352,9 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
|
|
|
|
|
const confidence = this.calculateRecommendationConfidence(
|
|
|
|
|
st.tool,
|
|
|
|
|
this.getEmbeddingsSimilarity(st.tool.name, context),
|
|
|
|
|
this.checkDomainMatch(st.tool, context.userQuery),
|
|
|
|
|
this.getMicroTaskAgreement(st.tool.name, context),
|
|
|
|
|
context
|
|
|
|
|
context,
|
|
|
|
|
st.taskRelevance || 70,
|
|
|
|
|
st.limitations || []
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
this.addAuditEntry(context, 'validation', 'confidence-scoring',
|
|
|
|
|
@@ -1278,7 +1365,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
|
|
|
|
},
|
|
|
|
|
confidence.overall,
|
|
|
|
|
Date.now(),
|
|
|
|
|
{ strengthCount: confidence.strengthIndicators.length }
|
|
|
|
|
{ strengthCount: confidence.strengthIndicators.length, limitationsCount: confidence.uncertaintyFactors.length }
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|