first draft enhancement 2
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
import { getCompressedToolsDataForAI } from './dataService.js';
|
||||
import { embeddingsService, type EmbeddingData } from './embeddings.js';
|
||||
import { AI_PROMPTS, getPrompt } from '../config/prompts.js';
|
||||
import { isToolHosted } from './toolHelpers.js';
|
||||
|
||||
interface AIConfig {
|
||||
endpoint: string;
|
||||
@@ -67,6 +68,16 @@ interface SimilarityResult extends EmbeddingData {
|
||||
similarity: number;
|
||||
}
|
||||
|
||||
interface ConfidenceMetrics {
|
||||
overall: number; // 0-100: Combined confidence score
|
||||
embeddingsQuality: number; // How well embeddings matched
|
||||
domainAlignment: number; // How well tools match scenario domain
|
||||
consensus: number; // How much micro-tasks agree
|
||||
freshness: number; // How recent/up-to-date the selection is
|
||||
uncertaintyFactors: string[]; // What could make this wrong
|
||||
strengthIndicators: string[]; // What makes this recommendation strong
|
||||
}
|
||||
|
||||
|
||||
class ImprovedMicroTaskAIPipeline {
|
||||
private config: AIConfig;
|
||||
@@ -92,6 +103,16 @@ class ImprovedMicroTaskAIPipeline {
|
||||
detailLevel: 'minimal' | 'standard' | 'verbose';
|
||||
retentionHours: number;
|
||||
};
|
||||
|
||||
private confidenceConfig: {
|
||||
embeddingsWeight: number;
|
||||
consensusWeight: number;
|
||||
domainMatchWeight: number;
|
||||
freshnessWeight: number;
|
||||
minimumThreshold: number;
|
||||
mediumThreshold: number;
|
||||
highThreshold: number;
|
||||
};
|
||||
|
||||
private tempAuditEntries: AuditEntry[] = [];
|
||||
|
||||
@@ -131,6 +152,21 @@ class ImprovedMicroTaskAIPipeline {
|
||||
noEmbeddingsLimits: `${this.noEmbeddingsToolLimit || 'unlimited'} tools, ${this.noEmbeddingsConceptLimit || 'unlimited'} concepts`,
|
||||
auditEnabled: this.auditConfig.enabled
|
||||
});
|
||||
|
||||
this.confidenceConfig = {
|
||||
embeddingsWeight: parseFloat(process.env.CONFIDENCE_EMBEDDINGS_WEIGHT || '0.3'),
|
||||
consensusWeight: parseFloat(process.env.CONFIDENCE_CONSENSUS_WEIGHT || '0.25'),
|
||||
domainMatchWeight: parseFloat(process.env.CONFIDENCE_DOMAIN_MATCH_WEIGHT || '0.25'),
|
||||
freshnessWeight: parseFloat(process.env.CONFIDENCE_FRESHNESS_WEIGHT || '0.2'),
|
||||
minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
|
||||
mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10),
|
||||
highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10)
|
||||
};
|
||||
|
||||
console.log('[AI PIPELINE] Confidence scoring enabled:', {
|
||||
weights: `E:${this.confidenceConfig.embeddingsWeight} C:${this.confidenceConfig.consensusWeight} D:${this.confidenceConfig.domainMatchWeight} F:${this.confidenceConfig.freshnessWeight}`,
|
||||
thresholds: `${this.confidenceConfig.minimumThreshold}/${this.confidenceConfig.mediumThreshold}/${this.confidenceConfig.highThreshold}`
|
||||
});
|
||||
}
|
||||
|
||||
private getEnv(key: string): string {
|
||||
@@ -662,6 +698,40 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
}
|
||||
}
|
||||
|
||||
private calculateRecommendationConfidence(
|
||||
tool: any,
|
||||
embeddingsSimilarity: number,
|
||||
domainMatch: boolean,
|
||||
microTaskAgreement: number,
|
||||
context: AnalysisContext
|
||||
): ConfidenceMetrics {
|
||||
|
||||
const embeddingsQuality = Math.min(100, embeddingsSimilarity * 100 * 2); // Scale 0.5 similarity to 100%
|
||||
const domainAlignment = domainMatch ? 90 : (tool.domains?.length > 0 ? 60 : 30);
|
||||
const consensus = Math.min(100, microTaskAgreement * 100);
|
||||
const freshness = this.calculateToolFreshness(tool);
|
||||
|
||||
const overall = (
|
||||
embeddingsQuality * this.confidenceConfig.embeddingsWeight +
|
||||
domainAlignment * this.confidenceConfig.domainMatchWeight +
|
||||
consensus * this.confidenceConfig.consensusWeight +
|
||||
freshness * this.confidenceConfig.freshnessWeight
|
||||
);
|
||||
|
||||
const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, overall);
|
||||
const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall);
|
||||
|
||||
return {
|
||||
overall: Math.round(overall),
|
||||
embeddingsQuality: Math.round(embeddingsQuality),
|
||||
domainAlignment: Math.round(domainAlignment),
|
||||
consensus: Math.round(consensus),
|
||||
freshness: Math.round(freshness),
|
||||
uncertaintyFactors,
|
||||
strengthIndicators
|
||||
};
|
||||
}
|
||||
|
||||
private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const isWorkflow = context.mode === 'workflow';
|
||||
const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
|
||||
@@ -1010,6 +1080,124 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
}
|
||||
}
|
||||
|
||||
private calculateToolFreshness(tool: any): number {
|
||||
// Base freshness score
|
||||
let freshness = 70; // Default for tools without specific freshness data
|
||||
|
||||
// Boost for tools with knowledge base (more maintained)
|
||||
if (tool.knowledgebase === true) freshness += 20;
|
||||
|
||||
// Boost for hosted tools (actively maintained)
|
||||
if (isToolHosted(tool)) freshness += 15;
|
||||
|
||||
// Slight boost for open source (community maintained)
|
||||
if (tool.license && tool.license !== 'Proprietary') freshness += 5;
|
||||
|
||||
return Math.min(100, freshness);
|
||||
}
|
||||
|
||||
private checkDomainMatch(tool: any, userQuery: string): boolean {
|
||||
if (!tool.domains || tool.domains.length === 0) return false;
|
||||
|
||||
const queryLower = userQuery.toLowerCase();
|
||||
|
||||
const domainKeywordsEnv = process.env.CONFIDENCE_DOMAIN_KEYWORDS || '';
|
||||
|
||||
const domainKeywords = domainKeywordsEnv.split('|').reduce((acc, pair) => {
|
||||
const [domain, keywords] = pair.split(':');
|
||||
acc[domain] = keywords.split(',');
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
return tool.domains.some(domain => {
|
||||
const keywords = domainKeywords[domain] || [domain.replace('-', ' ')];
|
||||
return keywords.some(keyword => queryLower.includes(keyword));
|
||||
});
|
||||
}
|
||||
|
||||
private getMicroTaskAgreement(toolName: string, context: AnalysisContext): number {
|
||||
// Check how many micro-tasks selected this tool
|
||||
const microTaskEntries = context.auditTrail.filter(entry =>
|
||||
entry.phase === 'micro-task' &&
|
||||
entry.action.includes('selection') &&
|
||||
entry.output &&
|
||||
typeof entry.output === 'object' &&
|
||||
Array.isArray(entry.output.selectedTools) &&
|
||||
entry.output.selectedTools.includes(toolName)
|
||||
);
|
||||
|
||||
const totalMicroTasks = context.auditTrail.filter(entry =>
|
||||
entry.phase === 'micro-task' && entry.action.includes('selection')
|
||||
).length;
|
||||
|
||||
return totalMicroTasks > 0 ? microTaskEntries.length / totalMicroTasks : 0.8; // Default high agreement
|
||||
}
|
||||
|
||||
private getEmbeddingsSimilarity(toolName: string, context: AnalysisContext): number {
|
||||
// Extract similarity from audit trail embeddings entry
|
||||
const embeddingsEntry = context.auditTrail.find(entry =>
|
||||
entry.phase === 'retrieval' && entry.action === 'embeddings-search'
|
||||
);
|
||||
|
||||
if (!embeddingsEntry || !embeddingsEntry.output) return 0.5; // Default medium similarity
|
||||
|
||||
// Look for similarity data in the output (implementation specific)
|
||||
// This would need to be populated during embeddings search
|
||||
return 0.7; // Placeholder - would need actual similarity data from embeddings
|
||||
}
|
||||
|
||||
private identifyUncertaintyFactors(tool: any, context: AnalysisContext, confidence: number): string[] {
|
||||
const factors: string[] = [];
|
||||
|
||||
if (confidence < this.confidenceConfig.mediumThreshold) {
|
||||
factors.push('Low overall confidence - consider manual validation');
|
||||
}
|
||||
|
||||
if (!this.checkDomainMatch(tool, context.userQuery)) {
|
||||
factors.push('Domain mismatch detected - tool may not be specifically designed for this scenario');
|
||||
}
|
||||
|
||||
if (tool.skillLevel === 'expert' && /rapid|quick|urgent|triage/i.test(context.userQuery)) {
|
||||
factors.push('Expert-level tool for rapid scenario - may be overcomplicated');
|
||||
}
|
||||
|
||||
if (tool.type === 'software' && !isToolHosted(tool) && !tool.url) {
|
||||
factors.push('Limited access information - availability uncertain');
|
||||
}
|
||||
|
||||
if (tool.skillLevel === 'novice' && /complex|advanced|deep/i.test(context.userQuery)) {
|
||||
factors.push('Novice-level tool for complex scenario - may lack required capabilities');
|
||||
}
|
||||
|
||||
return factors;
|
||||
}
|
||||
|
||||
private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
|
||||
const indicators: string[] = [];
|
||||
|
||||
if (confidence >= this.confidenceConfig.highThreshold) {
|
||||
indicators.push('High confidence recommendation based on multiple factors');
|
||||
}
|
||||
|
||||
if (this.checkDomainMatch(tool, context.userQuery)) {
|
||||
indicators.push('Strong domain alignment with scenario requirements');
|
||||
}
|
||||
|
||||
if (tool.knowledgebase === true) {
|
||||
indicators.push('Documentation and knowledge base available for guidance');
|
||||
}
|
||||
|
||||
if (isToolHosted(tool)) {
|
||||
indicators.push('Hosted solution available for immediate access');
|
||||
}
|
||||
|
||||
if (tool.type === 'method' && /methodology|approach|process/i.test(context.userQuery)) {
|
||||
indicators.push('Methodological approach matches procedural inquiry');
|
||||
}
|
||||
|
||||
return indicators;
|
||||
}
|
||||
|
||||
private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
|
||||
const isWorkflow = mode === 'workflow';
|
||||
|
||||
@@ -1025,20 +1213,71 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
};
|
||||
|
||||
if (isWorkflow) {
|
||||
return {
|
||||
...base,
|
||||
recommended_tools: context.selectedTools?.map(st => ({
|
||||
const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
|
||||
// Calculate confidence for each tool
|
||||
const confidence = this.calculateRecommendationConfidence(
|
||||
st.tool,
|
||||
this.getEmbeddingsSimilarity(st.tool.name, context),
|
||||
this.checkDomainMatch(st.tool, context.userQuery),
|
||||
this.getMicroTaskAgreement(st.tool.name, context),
|
||||
context
|
||||
);
|
||||
|
||||
// Add audit entry for confidence calculation
|
||||
this.addAuditEntry(context, 'validation', 'confidence-scoring',
|
||||
{ toolName: st.tool.name, phase: st.phase },
|
||||
{
|
||||
overall: confidence.overall,
|
||||
components: {
|
||||
embeddings: confidence.embeddingsQuality,
|
||||
domain: confidence.domainAlignment,
|
||||
consensus: confidence.consensus,
|
||||
freshness: confidence.freshness
|
||||
}
|
||||
},
|
||||
confidence.overall,
|
||||
Date.now(),
|
||||
{ uncertaintyCount: confidence.uncertaintyFactors.length }
|
||||
);
|
||||
|
||||
return {
|
||||
name: st.tool.name,
|
||||
phase: st.phase,
|
||||
priority: st.priority,
|
||||
justification: st.justification || `Empfohlen für ${st.phase}`
|
||||
})) || [],
|
||||
justification: st.justification || `Empfohlen für ${st.phase}`,
|
||||
confidence: confidence,
|
||||
recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' :
|
||||
confidence.overall >= this.confidenceConfig.mediumThreshold ? 'moderate' : 'weak'
|
||||
};
|
||||
}) || [];
|
||||
|
||||
return {
|
||||
...base,
|
||||
recommended_tools: recommendedToolsWithConfidence,
|
||||
workflow_suggestion: finalContent
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
...base,
|
||||
recommended_tools: context.selectedTools?.map(st => ({
|
||||
const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
|
||||
const confidence = this.calculateRecommendationConfidence(
|
||||
st.tool,
|
||||
this.getEmbeddingsSimilarity(st.tool.name, context),
|
||||
this.checkDomainMatch(st.tool, context.userQuery),
|
||||
this.getMicroTaskAgreement(st.tool.name, context),
|
||||
context
|
||||
);
|
||||
|
||||
this.addAuditEntry(context, 'validation', 'confidence-scoring',
|
||||
{ toolName: st.tool.name, rank: st.tool.evaluation?.rank || 1 },
|
||||
{
|
||||
overall: confidence.overall,
|
||||
suitabilityAlignment: st.priority === 'high' && confidence.overall >= this.confidenceConfig.highThreshold
|
||||
},
|
||||
confidence.overall,
|
||||
Date.now(),
|
||||
{ strengthCount: confidence.strengthIndicators.length }
|
||||
);
|
||||
|
||||
return {
|
||||
name: st.tool.name,
|
||||
rank: st.tool.evaluation?.rank || 1,
|
||||
suitability_score: st.priority,
|
||||
@@ -1046,8 +1285,16 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
implementation_approach: st.tool.evaluation?.implementation_approach || '',
|
||||
pros: st.tool.evaluation?.pros || [],
|
||||
cons: st.tool.evaluation?.cons || [],
|
||||
alternatives: st.tool.evaluation?.alternatives || ''
|
||||
})) || [],
|
||||
alternatives: st.tool.evaluation?.alternatives || '',
|
||||
confidence: confidence,
|
||||
recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' :
|
||||
confidence.overall >= this.confidenceConfig.mediumThreshold ? 'moderate' : 'weak'
|
||||
};
|
||||
}) || [];
|
||||
|
||||
return {
|
||||
...base,
|
||||
recommended_tools: recommendedToolsWithConfidence,
|
||||
additional_considerations: finalContent
|
||||
};
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user