fix embeddings truncation
This commit is contained in:
@@ -1,126 +0,0 @@
|
||||
// src/config/forensic.config.ts
|
||||
// Centralized configuration for forensic RAG enhancements
|
||||
|
||||
export const FORENSIC_CONFIG = {
|
||||
audit: {
|
||||
enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
|
||||
detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as 'minimal' | 'standard' | 'verbose') || 'standard',
|
||||
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10),
|
||||
maxEntriesPerRequest: parseInt(process.env.FORENSIC_AUDIT_MAX_ENTRIES || '50', 10)
|
||||
},
|
||||
confidence: {
|
||||
embeddingsWeight: parseFloat(process.env.CONFIDENCE_EMBEDDINGS_WEIGHT || '0.3'),
|
||||
consensusWeight: parseFloat(process.env.CONFIDENCE_CONSENSUS_WEIGHT || '0.25'),
|
||||
domainMatchWeight: parseFloat(process.env.CONFIDENCE_DOMAIN_MATCH_WEIGHT || '0.25'),
|
||||
freshnessWeight: parseFloat(process.env.CONFIDENCE_FRESHNESS_WEIGHT || '0.2'),
|
||||
minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
|
||||
highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10),
|
||||
mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10)
|
||||
},
|
||||
bias: {
|
||||
enabled: process.env.BIAS_DETECTION_ENABLED === 'true',
|
||||
popularityThreshold: parseFloat(process.env.BIAS_POPULARITY_THRESHOLD || '0.7'),
|
||||
diversityMinimum: parseFloat(process.env.BIAS_DIVERSITY_MINIMUM || '0.6'),
|
||||
domainMismatchThreshold: parseFloat(process.env.BIAS_DOMAIN_MISMATCH_THRESHOLD || '0.3'),
|
||||
warningThreshold: parseInt(process.env.BIAS_WARNING_THRESHOLD || '3', 10),
|
||||
celebrityTools: (process.env.BIAS_CELEBRITY_TOOLS || 'Volatility 3,Wireshark,Autopsy,Maltego').split(',').map(t => t.trim())
|
||||
},
|
||||
// Quality thresholds for various metrics
|
||||
quality: {
|
||||
minResponseLength: parseInt(process.env.QUALITY_MIN_RESPONSE_LENGTH || '50', 10),
|
||||
minSelectionCount: parseInt(process.env.QUALITY_MIN_SELECTION_COUNT || '1', 10),
|
||||
maxProcessingTime: parseInt(process.env.QUALITY_MAX_PROCESSING_TIME_MS || '30000', 10)
|
||||
},
|
||||
// Display preferences
|
||||
ui: {
|
||||
showAuditTrailByDefault: process.env.UI_SHOW_AUDIT_TRAIL_DEFAULT === 'true',
|
||||
showConfidenceScores: process.env.UI_SHOW_CONFIDENCE_SCORES !== 'false',
|
||||
showBiasWarnings: process.env.UI_SHOW_BIAS_WARNINGS !== 'false',
|
||||
auditTrailCollapsible: process.env.UI_AUDIT_TRAIL_COLLAPSIBLE !== 'false'
|
||||
}
|
||||
};
|
||||
|
||||
// Validation function to ensure configuration is valid
|
||||
export function validateForensicConfig(): { valid: boolean; errors: string[] } {
|
||||
const errors: string[] = [];
|
||||
|
||||
// Validate audit configuration
|
||||
if (FORENSIC_CONFIG.audit.retentionHours < 1 || FORENSIC_CONFIG.audit.retentionHours > 168) {
|
||||
errors.push('FORENSIC_AUDIT_RETENTION_HOURS must be between 1 and 168 (1 week)');
|
||||
}
|
||||
|
||||
if (!['minimal', 'standard', 'verbose'].includes(FORENSIC_CONFIG.audit.detailLevel)) {
|
||||
errors.push('FORENSIC_AUDIT_DETAIL_LEVEL must be one of: minimal, standard, verbose');
|
||||
}
|
||||
|
||||
// Validate confidence weights sum to approximately 1.0
|
||||
const weightSum = FORENSIC_CONFIG.confidence.embeddingsWeight +
|
||||
FORENSIC_CONFIG.confidence.consensusWeight +
|
||||
FORENSIC_CONFIG.confidence.domainMatchWeight +
|
||||
FORENSIC_CONFIG.confidence.freshnessWeight;
|
||||
|
||||
if (Math.abs(weightSum - 1.0) > 0.05) {
|
||||
errors.push(`Confidence weights must sum to 1.0 (currently ${weightSum.toFixed(3)})`);
|
||||
}
|
||||
|
||||
// Validate threshold ranges
|
||||
if (FORENSIC_CONFIG.confidence.minimumThreshold < 0 || FORENSIC_CONFIG.confidence.minimumThreshold > 100) {
|
||||
errors.push('CONFIDENCE_MINIMUM_THRESHOLD must be between 0 and 100');
|
||||
}
|
||||
|
||||
if (FORENSIC_CONFIG.confidence.highThreshold <= FORENSIC_CONFIG.confidence.mediumThreshold) {
|
||||
errors.push('CONFIDENCE_HIGH_THRESHOLD must be greater than CONFIDENCE_MEDIUM_THRESHOLD');
|
||||
}
|
||||
|
||||
// Validate bias thresholds
|
||||
if (FORENSIC_CONFIG.bias.popularityThreshold < 0 || FORENSIC_CONFIG.bias.popularityThreshold > 1) {
|
||||
errors.push('BIAS_POPULARITY_THRESHOLD must be between 0 and 1');
|
||||
}
|
||||
|
||||
if (FORENSIC_CONFIG.bias.diversityMinimum < 0 || FORENSIC_CONFIG.bias.diversityMinimum > 1) {
|
||||
errors.push('BIAS_DIVERSITY_MINIMUM must be between 0 and 1');
|
||||
}
|
||||
|
||||
return {
|
||||
valid: errors.length === 0,
|
||||
errors
|
||||
};
|
||||
}
|
||||
|
||||
// Helper functions for configuration access
|
||||
export function isAuditEnabled(): boolean {
|
||||
return FORENSIC_CONFIG.audit.enabled;
|
||||
}
|
||||
|
||||
export function getAuditDetailLevel(): 'minimal' | 'standard' | 'verbose' {
|
||||
return FORENSIC_CONFIG.audit.detailLevel;
|
||||
}
|
||||
|
||||
export function getConfidenceThresholds() {
|
||||
return {
|
||||
minimum: FORENSIC_CONFIG.confidence.minimumThreshold,
|
||||
medium: FORENSIC_CONFIG.confidence.mediumThreshold,
|
||||
high: FORENSIC_CONFIG.confidence.highThreshold
|
||||
};
|
||||
}
|
||||
|
||||
export function isBiasDetectionEnabled(): boolean {
|
||||
return FORENSIC_CONFIG.bias.enabled;
|
||||
}
|
||||
|
||||
// Initialize and validate configuration on module load
|
||||
const configValidation = validateForensicConfig();
|
||||
if (!configValidation.valid) {
|
||||
console.warn('[FORENSIC CONFIG] Configuration validation failed:', configValidation.errors);
|
||||
// In development, we might want to throw an error
|
||||
if (process.env.NODE_ENV === 'development') {
|
||||
throw new Error(`Forensic configuration invalid: ${configValidation.errors.join(', ')}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log('[FORENSIC CONFIG] Configuration loaded:', {
|
||||
auditEnabled: FORENSIC_CONFIG.audit.enabled,
|
||||
confidenceEnabled: true, // Always enabled
|
||||
biasDetectionEnabled: FORENSIC_CONFIG.bias.enabled,
|
||||
detailLevel: FORENSIC_CONFIG.audit.detailLevel
|
||||
});
|
||||
@@ -78,17 +78,25 @@ class ImprovedMicroTaskAIPipeline {
|
||||
private similarityThreshold: number;
|
||||
private microTaskDelay: number;
|
||||
|
||||
// NEW: Embedding selection limits (top N from pre-filtered candidates)
|
||||
private embeddingSelectionLimit: number;
|
||||
private embeddingConceptsLimit: number;
|
||||
|
||||
// NEW: Embeddings efficiency thresholds
|
||||
private embeddingsMinTools: number;
|
||||
private embeddingsMaxReductionRatio: number;
|
||||
|
||||
private maxContextTokens: number;
|
||||
private maxPromptTokens: number;
|
||||
|
||||
// NEW: Audit Configuration
|
||||
// Audit Configuration
|
||||
private auditConfig: {
|
||||
enabled: boolean;
|
||||
detailLevel: 'minimal' | 'standard' | 'verbose';
|
||||
retentionHours: number;
|
||||
};
|
||||
|
||||
// NEW: Temporary audit storage for pre-context operations
|
||||
// Temporary audit storage for pre-context operations
|
||||
private tempAuditEntries: AuditEntry[] = [];
|
||||
|
||||
constructor() {
|
||||
@@ -98,20 +106,38 @@ class ImprovedMicroTaskAIPipeline {
|
||||
model: this.getEnv('AI_ANALYZER_MODEL')
|
||||
};
|
||||
|
||||
this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
|
||||
this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '60', 10);
|
||||
this.similarityThreshold = 0.3;
|
||||
// Core pipeline configuration
|
||||
this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '25', 10);
|
||||
this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '50', 10);
|
||||
this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
|
||||
this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
|
||||
|
||||
// NEW: Embedding selection limits (top N from pre-filtered candidates)
|
||||
this.embeddingSelectionLimit = parseInt(process.env.AI_EMBEDDING_SELECTION_LIMIT || '30', 10);
|
||||
this.embeddingConceptsLimit = parseInt(process.env.AI_EMBEDDING_CONCEPTS_LIMIT || '15', 10);
|
||||
|
||||
// NEW: Embeddings efficiency thresholds
|
||||
this.embeddingsMinTools = parseInt(process.env.AI_EMBEDDINGS_MIN_TOOLS || '8', 10);
|
||||
this.embeddingsMaxReductionRatio = parseFloat(process.env.AI_EMBEDDINGS_MAX_REDUCTION_RATIO || '0.75');
|
||||
|
||||
// Context management
|
||||
this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
|
||||
this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
|
||||
|
||||
// NEW: Initialize Audit Configuration
|
||||
// Audit configuration
|
||||
this.auditConfig = {
|
||||
enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
|
||||
detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as any) || 'standard',
|
||||
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
|
||||
};
|
||||
|
||||
// Log configuration for debugging
|
||||
console.log('[AI PIPELINE] Configuration loaded:', {
|
||||
embeddingCandidates: this.embeddingCandidates,
|
||||
embeddingSelection: `${this.embeddingSelectionLimit} tools, ${this.embeddingConceptsLimit} concepts`,
|
||||
embeddingsThresholds: `min ${this.embeddingsMinTools} tools, max ${this.embeddingsMaxReductionRatio * 100}% of total`,
|
||||
auditEnabled: this.auditConfig.enabled
|
||||
});
|
||||
}
|
||||
|
||||
private getEnv(key: string): string {
|
||||
@@ -272,50 +298,49 @@ class ImprovedMicroTaskAIPipeline {
|
||||
userQuery,
|
||||
this.embeddingCandidates,
|
||||
this.similarityThreshold
|
||||
) as SimilarityResult[]; // Type assertion for similarity property
|
||||
) as SimilarityResult[];
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Embeddings found ${similarItems.length} similar items`);
|
||||
console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
|
||||
|
||||
// FIXED: Create lookup maps for O(1) access while preserving original data
|
||||
// Create lookup maps for O(1) access while preserving original data
|
||||
const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
|
||||
const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
|
||||
|
||||
// FIXED: Process in similarity order, preserving the ranking
|
||||
// Process in similarity order, preserving the ranking
|
||||
const similarTools = similarItems
|
||||
.filter((item): item is SimilarityResult => item.type === 'tool')
|
||||
.map(item => toolsMap.get(item.name))
|
||||
.filter((tool): tool is any => tool !== undefined); // Proper type guard
|
||||
.filter((tool): tool is any => tool !== undefined);
|
||||
|
||||
const similarConcepts = similarItems
|
||||
.filter((item): item is SimilarityResult => item.type === 'concept')
|
||||
.map(item => conceptsMap.get(item.name))
|
||||
.filter((concept): concept is any => concept !== undefined); // Proper type guard
|
||||
.filter((concept): concept is any => concept !== undefined);
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
|
||||
console.log(`[AI PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
|
||||
|
||||
// Log the first few tools to verify ordering is preserved
|
||||
if (similarTools.length > 0) {
|
||||
console.log(`[IMPROVED PIPELINE] Top similar tools (in similarity order):`);
|
||||
similarTools.slice(0, 5).forEach((tool, idx) => {
|
||||
const originalSimilarItem = similarItems.find(item => item.name === tool.name);
|
||||
console.log(` ${idx + 1}. ${tool.name} (similarity: ${originalSimilarItem?.similarity?.toFixed(4) || 'N/A'})`);
|
||||
});
|
||||
}
|
||||
// FIXED: Better threshold logic - only use embeddings if we get meaningful filtering
|
||||
const totalAvailableTools = toolsData.tools.length;
|
||||
const reductionRatio = similarTools.length / totalAvailableTools;
|
||||
|
||||
if (similarTools.length >= 15) {
|
||||
if (similarTools.length >= this.embeddingsMinTools && reductionRatio <= this.embeddingsMaxReductionRatio) {
|
||||
candidateTools = similarTools;
|
||||
candidateConcepts = similarConcepts;
|
||||
selectionMethod = 'embeddings_candidates';
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Using embeddings candidates in similarity order: ${candidateTools.length} tools`);
|
||||
console.log(`[AI PIPELINE] Using embeddings filtering: ${totalAvailableTools} → ${similarTools.length} tools (${(reductionRatio * 100).toFixed(1)}% reduction)`);
|
||||
} else {
|
||||
console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${similarTools.length} < 15), using full dataset`);
|
||||
if (similarTools.length < this.embeddingsMinTools) {
|
||||
console.log(`[AI PIPELINE] Embeddings found too few tools (${similarTools.length} < ${this.embeddingsMinTools}), using full dataset`);
|
||||
} else {
|
||||
console.log(`[AI PIPELINE] Embeddings didn't filter enough (${(reductionRatio * 100).toFixed(1)}% > ${(this.embeddingsMaxReductionRatio * 100).toFixed(1)}%), using full dataset`);
|
||||
}
|
||||
candidateTools = toolsData.tools;
|
||||
candidateConcepts = toolsData.concepts;
|
||||
selectionMethod = 'full_dataset';
|
||||
}
|
||||
|
||||
// NEW: Add Audit Entry for Embeddings Search with ordering verification
|
||||
// Enhanced audit entry with reduction statistics
|
||||
if (this.auditConfig.enabled) {
|
||||
this.addAuditEntry(null, 'retrieval', 'embeddings-search',
|
||||
{ query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates },
|
||||
@@ -323,21 +348,29 @@ class ImprovedMicroTaskAIPipeline {
|
||||
candidatesFound: similarItems.length,
|
||||
toolsInOrder: similarTools.slice(0, 3).map((t: any) => t.name),
|
||||
conceptsInOrder: similarConcepts.slice(0, 3).map((c: any) => c.name),
|
||||
orderingPreserved: true
|
||||
reductionRatio: reductionRatio,
|
||||
usingEmbeddings: selectionMethod === 'embeddings_candidates',
|
||||
totalAvailable: totalAvailableTools,
|
||||
filtered: similarTools.length
|
||||
},
|
||||
similarTools.length >= 15 ? 85 : 60,
|
||||
selectionMethod === 'embeddings_candidates' ? 85 : 60,
|
||||
embeddingsStart,
|
||||
{ selectionMethod, embeddingsEnabled: true, orderingFixed: true }
|
||||
{
|
||||
selectionMethod,
|
||||
embeddingsEnabled: true,
|
||||
reductionAchieved: selectionMethod === 'embeddings_candidates',
|
||||
tokenSavingsExpected: selectionMethod === 'embeddings_candidates'
|
||||
}
|
||||
);
|
||||
}
|
||||
} else {
|
||||
console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
|
||||
console.log(`[AI PIPELINE] Embeddings disabled, using full dataset`);
|
||||
candidateTools = toolsData.tools;
|
||||
candidateConcepts = toolsData.concepts;
|
||||
selectionMethod = 'full_dataset';
|
||||
}
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] AI will analyze ${candidateTools.length} candidate tools (ordering preserved: ${selectionMethod === 'embeddings_candidates'})`);
|
||||
console.log(`[AI PIPELINE] AI will analyze ${candidateTools.length} candidate tools (method: ${selectionMethod})`);
|
||||
const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
|
||||
|
||||
return {
|
||||
@@ -387,15 +420,37 @@ class ImprovedMicroTaskAIPipeline {
|
||||
related_software: concept.related_software || []
|
||||
}));
|
||||
|
||||
// Generate the German prompt with tool data
|
||||
// CORRECTED LOGIC:
|
||||
let toolsToSend: any[];
|
||||
let conceptsToSend: any[];
|
||||
|
||||
if (selectionMethod === 'embeddings_candidates') {
|
||||
// WITH EMBEDDINGS: Take top N from pre-filtered candidates
|
||||
toolsToSend = toolsWithFullData.slice(0, this.embeddingSelectionLimit);
|
||||
conceptsToSend = conceptsWithFullData.slice(0, this.embeddingConceptsLimit);
|
||||
|
||||
console.log(`[AI PIPELINE] Embeddings enabled: sending top ${toolsToSend.length} pre-filtered tools`);
|
||||
} else {
|
||||
// WITHOUT EMBEDDINGS: Send entire compressed database (original behavior)
|
||||
toolsToSend = toolsWithFullData; // ALL tools from database
|
||||
conceptsToSend = conceptsWithFullData; // ALL concepts from database
|
||||
|
||||
console.log(`[AI PIPELINE] Embeddings disabled: sending entire database (${toolsToSend.length} tools, ${conceptsToSend.length} concepts)`);
|
||||
}
|
||||
|
||||
// Generate the German prompt with appropriately selected tool data
|
||||
const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.maxSelectedItems);
|
||||
const prompt = `${basePrompt}
|
||||
|
||||
VERFÜGBARE TOOLS (mit vollständigen Daten):
|
||||
${JSON.stringify(toolsWithFullData.slice(0, 30), null, 2)}
|
||||
${JSON.stringify(toolsToSend, null, 2)}
|
||||
|
||||
VERFÜGBARE KONZEPTE (mit vollständigen Daten):
|
||||
${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
||||
${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||
|
||||
// Log token usage for monitoring
|
||||
const estimatedTokens = this.estimateTokens(prompt);
|
||||
console.log(`[AI PIPELINE] Method: ${selectionMethod}, Tools: ${toolsToSend.length}, Tokens: ~${estimatedTokens}`);
|
||||
|
||||
try {
|
||||
const response = await this.callAI(prompt, 2500);
|
||||
@@ -403,16 +458,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
||||
const result = this.safeParseJSON(response, null);
|
||||
|
||||
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
|
||||
console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
|
||||
console.error('[AI PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
|
||||
|
||||
// NEW: Add Audit Entry for Failed Selection
|
||||
if (this.auditConfig.enabled) {
|
||||
this.addAuditEntry(null, 'selection', 'ai-tool-selection-failed',
|
||||
{ candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
|
||||
{ error: 'Invalid JSON structure', response: response.slice(0, 200) },
|
||||
10, // Very low confidence
|
||||
10,
|
||||
selectionStart,
|
||||
{ aiModel: this.config.model, selectionMethod }
|
||||
{ aiModel: this.config.model, selectionMethod, tokensSent: estimatedTokens, toolsSent: toolsToSend.length }
|
||||
);
|
||||
}
|
||||
|
||||
@@ -421,19 +475,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
||||
|
||||
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
|
||||
if (totalSelected === 0) {
|
||||
console.error('[IMPROVED PIPELINE] AI selection returned no tools');
|
||||
console.error('[AI PIPELINE] AI selection returned no tools');
|
||||
throw new Error('AI selection returned empty selection');
|
||||
}
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts`);
|
||||
console.log(`[IMPROVED PIPELINE] AI reasoning: ${result.reasoning}`);
|
||||
console.log(`[AI PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts from ${toolsToSend.length} candidates`);
|
||||
|
||||
const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
|
||||
const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
|
||||
|
||||
// NEW: Add Audit Entry for Successful Selection
|
||||
if (this.auditConfig.enabled) {
|
||||
const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
|
||||
|
||||
@@ -443,11 +493,12 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
||||
selectedToolCount: result.selectedTools.length,
|
||||
selectedConceptCount: result.selectedConcepts.length,
|
||||
reasoning: result.reasoning?.slice(0, 200) + '...',
|
||||
finalToolNames: selectedTools.map(t => t.name)
|
||||
finalToolNames: selectedTools.map(t => t.name),
|
||||
selectionEfficiency: `${toolsToSend.length} → ${result.selectedTools.length}`
|
||||
},
|
||||
confidence,
|
||||
selectionStart,
|
||||
{ aiModel: this.config.model, selectionMethod, promptTokens: this.estimateTokens(prompt) }
|
||||
{ aiModel: this.config.model, selectionMethod, promptTokens: estimatedTokens, toolsSent: toolsToSend.length }
|
||||
);
|
||||
}
|
||||
|
||||
@@ -457,69 +508,21 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
console.error('[IMPROVED PIPELINE] AI selection failed:', error);
|
||||
console.error('[AI PIPELINE] AI selection failed:', error);
|
||||
|
||||
// NEW: Add Audit Entry for Selection Error
|
||||
if (this.auditConfig.enabled) {
|
||||
this.addAuditEntry(null, 'selection', 'ai-tool-selection-error',
|
||||
{ candidateCount: candidateTools.length, mode },
|
||||
{ error: error.message },
|
||||
5, // Very low confidence
|
||||
5,
|
||||
selectionStart,
|
||||
{ aiModel: this.config.model, selectionMethod }
|
||||
{ aiModel: this.config.model, selectionMethod, tokensSent: estimatedTokens }
|
||||
);
|
||||
}
|
||||
|
||||
console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
|
||||
return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
|
||||
const emergencyStart = Date.now();
|
||||
|
||||
const queryLower = userQuery.toLowerCase();
|
||||
const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
|
||||
|
||||
const scoredTools = candidateTools.map(tool => {
|
||||
const toolText = (
|
||||
tool.name + ' ' +
|
||||
tool.description + ' ' +
|
||||
(tool.tags || []).join(' ') + ' ' +
|
||||
(tool.platforms || []).join(' ') + ' ' +
|
||||
(tool.domains || []).join(' ')
|
||||
).toLowerCase();
|
||||
|
||||
const score = keywords.reduce((acc, keyword) => {
|
||||
return acc + (toolText.includes(keyword) ? 1 : 0);
|
||||
}, 0);
|
||||
|
||||
return { tool, score };
|
||||
}).filter(item => item.score > 0)
|
||||
.sort((a, b) => b.score - a.score);
|
||||
|
||||
const maxTools = mode === 'workflow' ? 20 : 8;
|
||||
const selectedTools = scoredTools.slice(0, maxTools).map(item => item.tool);
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
|
||||
|
||||
// NEW: Add Audit Entry for Emergency Selection
|
||||
if (this.auditConfig.enabled) {
|
||||
this.addAuditEntry(null, 'selection', 'emergency-keyword-selection',
|
||||
{ keywords: keywords.slice(0, 10), candidateCount: candidateTools.length },
|
||||
{ selectedCount: selectedTools.length, topScores: scoredTools.slice(0, 5).map(s => ({ name: s.tool.name, score: s.score })) },
|
||||
40, // Moderate confidence for emergency selection
|
||||
emergencyStart,
|
||||
{ selectionMethod: 'emergency_keyword' }
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
selectedTools,
|
||||
selectedConcepts: candidateConcepts.slice(0, 3)
|
||||
};
|
||||
}
|
||||
|
||||
private async delay(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
@@ -826,7 +829,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
||||
// NEW: Clear any previous temporary audit entries
|
||||
this.tempAuditEntries = [];
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
|
||||
console.log(`[AI PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
|
||||
|
||||
try {
|
||||
// Stage 1: Get intelligent candidates (embeddings + AI selection)
|
||||
@@ -848,7 +851,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
||||
// NEW: Merge any temporary audit entries from pre-context operations
|
||||
this.mergeTemporaryAuditEntries(context);
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
|
||||
console.log(`[AI PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
|
||||
|
||||
// NEW: Add initial audit entry
|
||||
this.addAuditEntry(context, 'initialization', 'pipeline-start',
|
||||
@@ -925,9 +928,9 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
||||
contextContinuityUsed: true
|
||||
};
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
|
||||
console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
|
||||
console.log(`[IMPROVED PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
|
||||
console.log(`[AI PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
|
||||
console.log(`[AI PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
|
||||
console.log(`[AI PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
|
||||
|
||||
return {
|
||||
recommendation: {
|
||||
@@ -939,7 +942,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
console.error('[IMPROVED PIPELINE] Processing failed:', error);
|
||||
console.error('[AI PIPELINE] Processing failed:', error);
|
||||
|
||||
// NEW: Ensure temp audit entries are cleared even on error
|
||||
this.tempAuditEntries = [];
|
||||
|
||||
@@ -77,33 +77,8 @@ interface EnhancedCompressedToolsData {
|
||||
domains: any[];
|
||||
phases: any[];
|
||||
'domain-agnostic-software': any[];
|
||||
scenarios?: any[]; // Optional for AI processing
|
||||
scenarios?: any[];
|
||||
skill_levels: any;
|
||||
// Enhanced context for micro-tasks
|
||||
domain_relationships: DomainRelationship[];
|
||||
phase_dependencies: PhaseDependency[];
|
||||
tool_compatibility_matrix: CompatibilityMatrix[];
|
||||
}
|
||||
|
||||
interface DomainRelationship {
|
||||
domain_id: string;
|
||||
tool_count: number;
|
||||
common_tags: string[];
|
||||
skill_distribution: Record<string, number>;
|
||||
}
|
||||
|
||||
interface PhaseDependency {
|
||||
phase_id: string;
|
||||
order: number;
|
||||
depends_on: string | null;
|
||||
enables: string | null;
|
||||
is_parallel_capable: boolean;
|
||||
typical_duration: string;
|
||||
}
|
||||
|
||||
interface CompatibilityMatrix {
|
||||
type: string;
|
||||
groups: Record<string, string[]>;
|
||||
}
|
||||
|
||||
let cachedData: ToolsData | null = null;
|
||||
@@ -146,104 +121,6 @@ function generateDataVersion(data: any): string {
|
||||
return Math.abs(hash).toString(36);
|
||||
}
|
||||
|
||||
// Enhanced: Generate domain relationships for better AI understanding
|
||||
function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
|
||||
const relationships: DomainRelationship[] = [];
|
||||
|
||||
for (const domain of domains) {
|
||||
const domainTools = tools.filter(tool =>
|
||||
tool.domains && tool.domains.includes(domain.id)
|
||||
);
|
||||
|
||||
const commonTags = domainTools
|
||||
.flatMap(tool => tool.tags || [])
|
||||
.reduce((acc: any, tag: string) => {
|
||||
acc[tag] = (acc[tag] || 0) + 1;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const topTags = Object.entries(commonTags)
|
||||
.sort(([,a], [,b]) => (b as number) - (a as number))
|
||||
.slice(0, 5)
|
||||
.map(([tag]) => tag);
|
||||
|
||||
relationships.push({
|
||||
domain_id: domain.id,
|
||||
tool_count: domainTools.length,
|
||||
common_tags: topTags,
|
||||
skill_distribution: domainTools.reduce((acc: any, tool: any) => {
|
||||
acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
|
||||
return acc;
|
||||
}, {})
|
||||
});
|
||||
}
|
||||
|
||||
return relationships;
|
||||
}
|
||||
|
||||
// Enhanced: Generate phase dependencies
|
||||
function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
|
||||
const dependencies: PhaseDependency[] = [];
|
||||
|
||||
for (let i = 0; i < phases.length; i++) {
|
||||
const phase = phases[i];
|
||||
const nextPhase = phases[i + 1];
|
||||
const prevPhase = phases[i - 1];
|
||||
|
||||
dependencies.push({
|
||||
phase_id: phase.id,
|
||||
order: i + 1,
|
||||
depends_on: prevPhase?.id || null,
|
||||
enables: nextPhase?.id || null,
|
||||
is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
|
||||
typical_duration: phase.id === 'data-collection' ? 'hours-days' :
|
||||
phase.id === 'examination' ? 'hours-weeks' :
|
||||
phase.id === 'analysis' ? 'days-weeks' :
|
||||
'hours-days'
|
||||
});
|
||||
}
|
||||
|
||||
return dependencies;
|
||||
}
|
||||
|
||||
// Enhanced: Generate tool compatibility matrix
|
||||
function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
|
||||
const matrix: CompatibilityMatrix[] = [];
|
||||
|
||||
// Group tools by common characteristics
|
||||
const platformGroups = tools.reduce((acc: any, tool: any) => {
|
||||
if (tool.platforms) {
|
||||
tool.platforms.forEach((platform: string) => {
|
||||
if (!acc[platform]) acc[platform] = [];
|
||||
acc[platform].push(tool.name);
|
||||
});
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const phaseGroups = tools.reduce((acc: any, tool: any) => {
|
||||
if (tool.phases) {
|
||||
tool.phases.forEach((phase: string) => {
|
||||
if (!acc[phase]) acc[phase] = [];
|
||||
acc[phase].push(tool.name);
|
||||
});
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
matrix.push({
|
||||
type: 'platform_compatibility',
|
||||
groups: platformGroups
|
||||
});
|
||||
|
||||
matrix.push({
|
||||
type: 'phase_synergy',
|
||||
groups: phaseGroups
|
||||
});
|
||||
|
||||
return matrix;
|
||||
}
|
||||
|
||||
async function loadRawData(): Promise<ToolsData> {
|
||||
if (!cachedData) {
|
||||
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
||||
@@ -337,27 +214,16 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
|
||||
};
|
||||
});
|
||||
|
||||
// Enhanced: Add rich context data
|
||||
const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
|
||||
const phaseDependencies = generatePhaseDependencies(data.phases);
|
||||
const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
|
||||
|
||||
cachedCompressedData = {
|
||||
tools: compressedTools,
|
||||
concepts: concepts,
|
||||
domains: data.domains,
|
||||
phases: data.phases,
|
||||
'domain-agnostic-software': data['domain-agnostic-software'],
|
||||
scenarios: data.scenarios, // Include scenarios for context
|
||||
scenarios: data.scenarios,
|
||||
skill_levels: data.skill_levels || {},
|
||||
// Enhanced context for micro-tasks
|
||||
domain_relationships: domainRelationships,
|
||||
phase_dependencies: phaseDependencies,
|
||||
tool_compatibility_matrix: toolCompatibilityMatrix
|
||||
};
|
||||
|
||||
console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
|
||||
console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
|
||||
}
|
||||
|
||||
return cachedCompressedData;
|
||||
|
||||
@@ -157,15 +157,6 @@ class RateLimitedQueue {
|
||||
return status;
|
||||
}
|
||||
|
||||
setDelay(ms: number): void {
|
||||
if (!Number.isFinite(ms) || ms < 0) return;
|
||||
this.delayMs = ms;
|
||||
}
|
||||
|
||||
getDelay(): number {
|
||||
return this.delayMs;
|
||||
}
|
||||
|
||||
private async processQueue(): Promise<void> {
|
||||
if (this.isProcessing) {
|
||||
return;
|
||||
|
||||
@@ -1,8 +1,3 @@
|
||||
/**
|
||||
* CONSOLIDATED Tool utility functions for consistent tool operations across the app
|
||||
* Works in both server (Node.js) and client (browser) environments
|
||||
*/
|
||||
|
||||
export interface Tool {
|
||||
name: string;
|
||||
type?: 'software' | 'method' | 'concept';
|
||||
@@ -18,10 +13,6 @@ export interface Tool {
|
||||
related_concepts?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a URL-safe slug from a tool name
|
||||
* Used for URLs, IDs, and file names consistently across the app
|
||||
*/
|
||||
export function createToolSlug(toolName: string): string {
|
||||
if (!toolName || typeof toolName !== 'string') {
|
||||
console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
|
||||
@@ -35,9 +26,6 @@ export function createToolSlug(toolName: string): string {
|
||||
.replace(/^-|-$/g, ''); // Remove leading/trailing hyphens
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds a tool by name or slug from tools array
|
||||
*/
|
||||
export function findToolByIdentifier(tools: Tool[], identifier: string): Tool | undefined {
|
||||
if (!identifier || !Array.isArray(tools)) return undefined;
|
||||
|
||||
@@ -47,23 +35,9 @@ export function findToolByIdentifier(tools: Tool[], identifier: string): Tool |
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if tool has a valid project URL (hosted on CC24 server)
|
||||
*/
|
||||
export function isToolHosted(tool: Tool): boolean {
|
||||
return tool.projectUrl !== undefined &&
|
||||
tool.projectUrl !== null &&
|
||||
tool.projectUrl !== "" &&
|
||||
tool.projectUrl.trim() !== "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines tool category for styling/logic
|
||||
*/
|
||||
export function getToolCategory(tool: Tool): 'concept' | 'method' | 'hosted' | 'oss' | 'proprietary' {
|
||||
if (tool.type === 'concept') return 'concept';
|
||||
if (tool.type === 'method') return 'method';
|
||||
if (isToolHosted(tool)) return 'hosted';
|
||||
if (tool.license && tool.license !== 'Proprietary') return 'oss';
|
||||
return 'proprietary';
|
||||
}
|
||||
Reference in New Issue
Block a user