first draft enhancement 2

This commit is contained in:
overcuriousity
2025-08-05 13:03:33 +02:00
parent c267681e7d
commit 99117e8e7a
7 changed files with 714 additions and 67 deletions

View File

@@ -3,6 +3,7 @@
import { getCompressedToolsDataForAI } from './dataService.js';
import { embeddingsService, type EmbeddingData } from './embeddings.js';
import { AI_PROMPTS, getPrompt } from '../config/prompts.js';
import { isToolHosted } from './toolHelpers.js';
interface AIConfig {
endpoint: string;
@@ -67,6 +68,16 @@ interface SimilarityResult extends EmbeddingData {
similarity: number;
}
interface ConfidenceMetrics {
overall: number; // 0-100: Combined confidence score
embeddingsQuality: number; // How well embeddings matched
domainAlignment: number; // How well tools match scenario domain
consensus: number; // How much micro-tasks agree
freshness: number; // How recent/up-to-date the selection is
uncertaintyFactors: string[]; // What could make this wrong
strengthIndicators: string[]; // What makes this recommendation strong
}
class ImprovedMicroTaskAIPipeline {
private config: AIConfig;
@@ -92,6 +103,16 @@ class ImprovedMicroTaskAIPipeline {
detailLevel: 'minimal' | 'standard' | 'verbose';
retentionHours: number;
};
private confidenceConfig: {
embeddingsWeight: number;
consensusWeight: number;
domainMatchWeight: number;
freshnessWeight: number;
minimumThreshold: number;
mediumThreshold: number;
highThreshold: number;
};
private tempAuditEntries: AuditEntry[] = [];
@@ -131,6 +152,21 @@ class ImprovedMicroTaskAIPipeline {
noEmbeddingsLimits: `${this.noEmbeddingsToolLimit || 'unlimited'} tools, ${this.noEmbeddingsConceptLimit || 'unlimited'} concepts`,
auditEnabled: this.auditConfig.enabled
});
this.confidenceConfig = {
embeddingsWeight: parseFloat(process.env.CONFIDENCE_EMBEDDINGS_WEIGHT || '0.3'),
consensusWeight: parseFloat(process.env.CONFIDENCE_CONSENSUS_WEIGHT || '0.25'),
domainMatchWeight: parseFloat(process.env.CONFIDENCE_DOMAIN_MATCH_WEIGHT || '0.25'),
freshnessWeight: parseFloat(process.env.CONFIDENCE_FRESHNESS_WEIGHT || '0.2'),
minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10),
highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10)
};
console.log('[AI PIPELINE] Confidence scoring enabled:', {
weights: `E:${this.confidenceConfig.embeddingsWeight} C:${this.confidenceConfig.consensusWeight} D:${this.confidenceConfig.domainMatchWeight} F:${this.confidenceConfig.freshnessWeight}`,
thresholds: `${this.confidenceConfig.minimumThreshold}/${this.confidenceConfig.mediumThreshold}/${this.confidenceConfig.highThreshold}`
});
}
private getEnv(key: string): string {
@@ -662,6 +698,40 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
}
}
private calculateRecommendationConfidence(
tool: any,
embeddingsSimilarity: number,
domainMatch: boolean,
microTaskAgreement: number,
context: AnalysisContext
): ConfidenceMetrics {
const embeddingsQuality = Math.min(100, embeddingsSimilarity * 100 * 2); // Scale 0.5 similarity to 100%
const domainAlignment = domainMatch ? 90 : (tool.domains?.length > 0 ? 60 : 30);
const consensus = Math.min(100, microTaskAgreement * 100);
const freshness = this.calculateToolFreshness(tool);
const overall = (
embeddingsQuality * this.confidenceConfig.embeddingsWeight +
domainAlignment * this.confidenceConfig.domainMatchWeight +
consensus * this.confidenceConfig.consensusWeight +
freshness * this.confidenceConfig.freshnessWeight
);
const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, overall);
const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall);
return {
overall: Math.round(overall),
embeddingsQuality: Math.round(embeddingsQuality),
domainAlignment: Math.round(domainAlignment),
consensus: Math.round(consensus),
freshness: Math.round(freshness),
uncertaintyFactors,
strengthIndicators
};
}
private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
const isWorkflow = context.mode === 'workflow';
const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
@@ -1010,6 +1080,124 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
}
}
private calculateToolFreshness(tool: any): number {
// Base freshness score
let freshness = 70; // Default for tools without specific freshness data
// Boost for tools with knowledge base (more maintained)
if (tool.knowledgebase === true) freshness += 20;
// Boost for hosted tools (actively maintained)
if (isToolHosted(tool)) freshness += 15;
// Slight boost for open source (community maintained)
if (tool.license && tool.license !== 'Proprietary') freshness += 5;
return Math.min(100, freshness);
}
private checkDomainMatch(tool: any, userQuery: string): boolean {
if (!tool.domains || tool.domains.length === 0) return false;
const queryLower = userQuery.toLowerCase();
const domainKeywordsEnv = process.env.CONFIDENCE_DOMAIN_KEYWORDS || '';
const domainKeywords = domainKeywordsEnv.split('|').reduce((acc, pair) => {
const [domain, keywords] = pair.split(':');
acc[domain] = keywords.split(',');
return acc;
}, {});
return tool.domains.some(domain => {
const keywords = domainKeywords[domain] || [domain.replace('-', ' ')];
return keywords.some(keyword => queryLower.includes(keyword));
});
}
private getMicroTaskAgreement(toolName: string, context: AnalysisContext): number {
// Check how many micro-tasks selected this tool
const microTaskEntries = context.auditTrail.filter(entry =>
entry.phase === 'micro-task' &&
entry.action.includes('selection') &&
entry.output &&
typeof entry.output === 'object' &&
Array.isArray(entry.output.selectedTools) &&
entry.output.selectedTools.includes(toolName)
);
const totalMicroTasks = context.auditTrail.filter(entry =>
entry.phase === 'micro-task' && entry.action.includes('selection')
).length;
return totalMicroTasks > 0 ? microTaskEntries.length / totalMicroTasks : 0.8; // Default high agreement
}
private getEmbeddingsSimilarity(toolName: string, context: AnalysisContext): number {
// Extract similarity from audit trail embeddings entry
const embeddingsEntry = context.auditTrail.find(entry =>
entry.phase === 'retrieval' && entry.action === 'embeddings-search'
);
if (!embeddingsEntry || !embeddingsEntry.output) return 0.5; // Default medium similarity
// Look for similarity data in the output (implementation specific)
// This would need to be populated during embeddings search
return 0.7; // Placeholder - would need actual similarity data from embeddings
}
private identifyUncertaintyFactors(tool: any, context: AnalysisContext, confidence: number): string[] {
const factors: string[] = [];
if (confidence < this.confidenceConfig.mediumThreshold) {
factors.push('Low overall confidence - consider manual validation');
}
if (!this.checkDomainMatch(tool, context.userQuery)) {
factors.push('Domain mismatch detected - tool may not be specifically designed for this scenario');
}
if (tool.skillLevel === 'expert' && /rapid|quick|urgent|triage/i.test(context.userQuery)) {
factors.push('Expert-level tool for rapid scenario - may be overcomplicated');
}
if (tool.type === 'software' && !isToolHosted(tool) && !tool.url) {
factors.push('Limited access information - availability uncertain');
}
if (tool.skillLevel === 'novice' && /complex|advanced|deep/i.test(context.userQuery)) {
factors.push('Novice-level tool for complex scenario - may lack required capabilities');
}
return factors;
}
private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
const indicators: string[] = [];
if (confidence >= this.confidenceConfig.highThreshold) {
indicators.push('High confidence recommendation based on multiple factors');
}
if (this.checkDomainMatch(tool, context.userQuery)) {
indicators.push('Strong domain alignment with scenario requirements');
}
if (tool.knowledgebase === true) {
indicators.push('Documentation and knowledge base available for guidance');
}
if (isToolHosted(tool)) {
indicators.push('Hosted solution available for immediate access');
}
if (tool.type === 'method' && /methodology|approach|process/i.test(context.userQuery)) {
indicators.push('Methodological approach matches procedural inquiry');
}
return indicators;
}
private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
const isWorkflow = mode === 'workflow';
@@ -1025,20 +1213,71 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
};
if (isWorkflow) {
return {
...base,
recommended_tools: context.selectedTools?.map(st => ({
const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
// Calculate confidence for each tool
const confidence = this.calculateRecommendationConfidence(
st.tool,
this.getEmbeddingsSimilarity(st.tool.name, context),
this.checkDomainMatch(st.tool, context.userQuery),
this.getMicroTaskAgreement(st.tool.name, context),
context
);
// Add audit entry for confidence calculation
this.addAuditEntry(context, 'validation', 'confidence-scoring',
{ toolName: st.tool.name, phase: st.phase },
{
overall: confidence.overall,
components: {
embeddings: confidence.embeddingsQuality,
domain: confidence.domainAlignment,
consensus: confidence.consensus,
freshness: confidence.freshness
}
},
confidence.overall,
Date.now(),
{ uncertaintyCount: confidence.uncertaintyFactors.length }
);
return {
name: st.tool.name,
phase: st.phase,
priority: st.priority,
justification: st.justification || `Empfohlen für ${st.phase}`
})) || [],
justification: st.justification || `Empfohlen für ${st.phase}`,
confidence: confidence,
recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' :
confidence.overall >= this.confidenceConfig.mediumThreshold ? 'moderate' : 'weak'
};
}) || [];
return {
...base,
recommended_tools: recommendedToolsWithConfidence,
workflow_suggestion: finalContent
};
} else {
return {
...base,
recommended_tools: context.selectedTools?.map(st => ({
const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
const confidence = this.calculateRecommendationConfidence(
st.tool,
this.getEmbeddingsSimilarity(st.tool.name, context),
this.checkDomainMatch(st.tool, context.userQuery),
this.getMicroTaskAgreement(st.tool.name, context),
context
);
this.addAuditEntry(context, 'validation', 'confidence-scoring',
{ toolName: st.tool.name, rank: st.tool.evaluation?.rank || 1 },
{
overall: confidence.overall,
suitabilityAlignment: st.priority === 'high' && confidence.overall >= this.confidenceConfig.highThreshold
},
confidence.overall,
Date.now(),
{ strengthCount: confidence.strengthIndicators.length }
);
return {
name: st.tool.name,
rank: st.tool.evaluation?.rank || 1,
suitability_score: st.priority,
@@ -1046,8 +1285,16 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
implementation_approach: st.tool.evaluation?.implementation_approach || '',
pros: st.tool.evaluation?.pros || [],
cons: st.tool.evaluation?.cons || [],
alternatives: st.tool.evaluation?.alternatives || ''
})) || [],
alternatives: st.tool.evaluation?.alternatives || '',
confidence: confidence,
recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' :
confidence.overall >= this.confidenceConfig.mediumThreshold ? 'moderate' : 'weak'
};
}) || [];
return {
...base,
recommended_tools: recommendedToolsWithConfidence,
additional_considerations: finalContent
};
}