Merge branch 'forensic-ai' of https://git.cc24.dev/mstoeck3/forensic-pathways into forensic-ai
This commit is contained in:
commit
c267681e7d
111
.env.example
111
.env.example
@ -42,32 +42,40 @@ AI_EMBEDDINGS_MODEL=mistral-embed
|
|||||||
# How many similar tools/concepts embeddings search returns as candidates
|
# How many similar tools/concepts embeddings search returns as candidates
|
||||||
# 🔍 This is the FIRST filter - vector similarity matching
|
# 🔍 This is the FIRST filter - vector similarity matching
|
||||||
# Lower = faster, less comprehensive | Higher = slower, more comprehensive
|
# Lower = faster, less comprehensive | Higher = slower, more comprehensive
|
||||||
AI_EMBEDDING_CANDIDATES=40
|
AI_EMBEDDING_CANDIDATES=50
|
||||||
|
|
||||||
# Minimum similarity score threshold (0.0-1.0)
|
# Minimum similarity score threshold (0.0-1.0)
|
||||||
# Lower = more results but less relevant | Higher = fewer but more relevant
|
# Lower = more results but less relevant | Higher = fewer but more relevant
|
||||||
AI_SIMILARITY_THRESHOLD=0.3
|
AI_SIMILARITY_THRESHOLD=0.3
|
||||||
|
|
||||||
|
# === AI SELECTION FROM EMBEDDINGS ===
|
||||||
|
# When embeddings are enabled, how many top tools to send with full context
|
||||||
|
# 🎯 This is the SECOND filter - take best N from embeddings results
|
||||||
|
AI_EMBEDDING_SELECTION_LIMIT=30
|
||||||
|
AI_EMBEDDING_CONCEPTS_LIMIT=15
|
||||||
|
|
||||||
|
# Maximum tools/concepts sent to AI when embeddings are DISABLED
|
||||||
|
# Set to 0 for no limit (WARNING: may cause token overflow with large datasets)
|
||||||
|
AI_NO_EMBEDDINGS_TOOL_LIMIT=0
|
||||||
|
AI_NO_EMBEDDINGS_CONCEPT_LIMIT=0
|
||||||
|
|
||||||
# === AI SELECTION STAGE ===
|
# === AI SELECTION STAGE ===
|
||||||
# Maximum tools the AI can select from embedding candidates
|
# Maximum tools the AI can select from embedding candidates
|
||||||
# 🤖 This is the SECOND filter - AI intelligent selection
|
# 🤖 This is the SECOND filter - AI intelligent selection
|
||||||
# Should be ≤ AI_EMBEDDING_CANDIDATES
|
# Should be ≤ AI_EMBEDDING_CANDIDATES
|
||||||
AI_MAX_SELECTED_ITEMS=25
|
AI_MAX_SELECTED_ITEMS=25
|
||||||
|
|
||||||
# Maximum tools sent to AI for detailed analysis (micro-tasks)
|
# === EMBEDDINGS EFFICIENCY THRESHOLDS ===
|
||||||
# 📋 This is the FINAL context size sent to AI models
|
# Minimum tools required for embeddings to be considered useful
|
||||||
# Lower = less AI context, faster responses | Higher = more context, slower
|
AI_EMBEDDINGS_MIN_TOOLS=8
|
||||||
AI_MAX_TOOLS_TO_ANALYZE=20
|
|
||||||
|
|
||||||
# Maximum concepts sent to AI for background knowledge selection
|
# Maximum percentage of total tools that embeddings can return to be considered "filtering"
|
||||||
# 📚 Concepts are smaller than tools, so can be higher
|
AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75
|
||||||
AI_MAX_CONCEPTS_TO_ANALYZE=10
|
|
||||||
|
|
||||||
# === CONTEXT FLOW SUMMARY ===
|
# === CONTEXT FLOW SUMMARY ===
|
||||||
# 1. Vector Search: 111 total tools → AI_EMBEDDING_CANDIDATES (40) most similar
|
# 1. Vector Search: 111 total tools → AI_EMBEDDING_CANDIDATES (40) most similar
|
||||||
# 2. AI Selection: 40 candidates → AI_MAX_SELECTED_ITEMS (25) best matches
|
# 2. AI Selection: 40 candidates → AI_MAX_SELECTED_ITEMS (25) best matches
|
||||||
# 3. AI Analysis: 25 selected → AI_MAX_TOOLS_TO_ANALYZE (20) for micro-tasks
|
# 3. Final Output: Recommendations based on analyzed subset
|
||||||
# 4. Final Output: Recommendations based on analyzed subset
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# 4. AI PERFORMANCE & RATE LIMITING
|
# 4. AI PERFORMANCE & RATE LIMITING
|
||||||
@ -95,23 +103,21 @@ AI_EMBEDDINGS_BATCH_SIZE=10
|
|||||||
# Delay between embedding batches (milliseconds)
|
# Delay between embedding batches (milliseconds)
|
||||||
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
|
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
|
||||||
|
|
||||||
|
# Maximum tools sent to AI for detailed analysis (micro-tasks)
|
||||||
|
AI_MAX_TOOLS_TO_ANALYZE=20
|
||||||
|
AI_MAX_CONCEPTS_TO_ANALYZE=10
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# 5. AI CONTEXT & TOKEN MANAGEMENT
|
# 5. AI CONTEXT & TOKEN MANAGEMENT
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
# Maximum context tokens to maintain across micro-tasks
|
# Maximum context tokens to maintain across micro-tasks
|
||||||
# Controls how much conversation history is preserved between AI calls
|
# Controls how much conversation history is preserved between AI calls
|
||||||
AI_MAX_CONTEXT_TOKENS=3000
|
AI_MAX_CONTEXT_TOKENS=4000
|
||||||
|
|
||||||
# Maximum tokens per individual AI prompt
|
# Maximum tokens per individual AI prompt
|
||||||
# Larger = more context per call | Smaller = faster responses
|
# Larger = more context per call | Smaller = faster responses
|
||||||
AI_MAX_PROMPT_TOKENS=1200
|
AI_MAX_PROMPT_TOKENS=1500
|
||||||
|
|
||||||
# Timeout for individual micro-tasks (milliseconds)
|
|
||||||
AI_MICRO_TASK_TIMEOUT_MS=25000
|
|
||||||
|
|
||||||
# Maximum size of the processing queue
|
|
||||||
AI_QUEUE_MAX_SIZE=50
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# 6. AUTHENTICATION & AUTHORIZATION (OPTIONAL)
|
# 6. AUTHENTICATION & AUTHORIZATION (OPTIONAL)
|
||||||
@ -172,7 +178,7 @@ GIT_API_TOKEN=your-git-api-token
|
|||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
# Enable detailed audit trail of AI decision-making
|
# Enable detailed audit trail of AI decision-making
|
||||||
FORENSIC_AUDIT_ENABLED=false
|
FORENSIC_AUDIT_ENABLED=true
|
||||||
|
|
||||||
# Audit detail level: minimal, standard, verbose
|
# Audit detail level: minimal, standard, verbose
|
||||||
FORENSIC_AUDIT_DETAIL_LEVEL=standard
|
FORENSIC_AUDIT_DETAIL_LEVEL=standard
|
||||||
@ -183,15 +189,6 @@ FORENSIC_AUDIT_RETENTION_HOURS=24
|
|||||||
# Maximum audit entries per request
|
# Maximum audit entries per request
|
||||||
FORENSIC_AUDIT_MAX_ENTRIES=50
|
FORENSIC_AUDIT_MAX_ENTRIES=50
|
||||||
|
|
||||||
# Enable detailed AI pipeline logging
|
|
||||||
AI_PIPELINE_DEBUG=false
|
|
||||||
|
|
||||||
# Enable performance metrics collection
|
|
||||||
AI_PERFORMANCE_METRICS=false
|
|
||||||
|
|
||||||
# Enable detailed micro-task debugging
|
|
||||||
AI_MICRO_TASK_DEBUG=false
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# 10. QUALITY CONTROL & BIAS DETECTION (ADVANCED)
|
# 10. QUALITY CONTROL & BIAS DETECTION (ADVANCED)
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
@ -207,61 +204,20 @@ CONFIDENCE_MINIMUM_THRESHOLD=40
|
|||||||
CONFIDENCE_MEDIUM_THRESHOLD=60
|
CONFIDENCE_MEDIUM_THRESHOLD=60
|
||||||
CONFIDENCE_HIGH_THRESHOLD=80
|
CONFIDENCE_HIGH_THRESHOLD=80
|
||||||
|
|
||||||
# Bias detection settings
|
|
||||||
BIAS_DETECTION_ENABLED=false
|
|
||||||
BIAS_POPULARITY_THRESHOLD=0.7
|
|
||||||
BIAS_DIVERSITY_MINIMUM=0.6
|
|
||||||
BIAS_CELEBRITY_TOOLS=""
|
|
||||||
|
|
||||||
# Quality control thresholds
|
|
||||||
QUALITY_MIN_RESPONSE_LENGTH=50
|
|
||||||
QUALITY_MIN_SELECTION_COUNT=1
|
|
||||||
QUALITY_MAX_PROCESSING_TIME_MS=30000
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# 11. USER INTERFACE DEFAULTS (OPTIONAL)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Default UI behavior (users can override)
|
|
||||||
UI_SHOW_AUDIT_TRAIL_DEFAULT=false
|
|
||||||
UI_SHOW_CONFIDENCE_SCORES=true
|
|
||||||
UI_SHOW_BIAS_WARNINGS=true
|
|
||||||
UI_AUDIT_TRAIL_COLLAPSIBLE=true
|
|
||||||
|
|
||||||
# ============================================================================
|
|
||||||
# 12. CACHING & PERFORMANCE (OPTIONAL)
|
|
||||||
# ============================================================================
|
|
||||||
|
|
||||||
# Cache AI responses (milliseconds)
|
|
||||||
AI_RESPONSE_CACHE_TTL_MS=3600000
|
|
||||||
|
|
||||||
# Queue cleanup interval (milliseconds)
|
|
||||||
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# PERFORMANCE TUNING PRESETS
|
# PERFORMANCE TUNING PRESETS
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
# 🚀 FOR FASTER RESPONSES (less comprehensive):
|
# 🚀 FOR FASTER RESPONSES (prevent token overflow):
|
||||||
# AI_EMBEDDING_CANDIDATES=20
|
# AI_NO_EMBEDDINGS_TOOL_LIMIT=25
|
||||||
# AI_MAX_SELECTED_ITEMS=15
|
# AI_NO_EMBEDDINGS_CONCEPT_LIMIT=10
|
||||||
# AI_MAX_TOOLS_TO_ANALYZE=10
|
|
||||||
# AI_MICRO_TASK_DELAY_MS=200
|
|
||||||
# AI_MAX_CONTEXT_TOKENS=2000
|
|
||||||
|
|
||||||
# 🎯 FOR BETTER QUALITY (more comprehensive):
|
# 🎯 FOR FULL DATABASE ACCESS (risk of truncation):
|
||||||
# AI_EMBEDDING_CANDIDATES=60
|
# AI_NO_EMBEDDINGS_TOOL_LIMIT=0
|
||||||
# AI_MAX_SELECTED_ITEMS=40
|
# AI_NO_EMBEDDINGS_CONCEPT_LIMIT=0
|
||||||
# AI_MAX_TOOLS_TO_ANALYZE=30
|
|
||||||
# AI_MICRO_TASK_DELAY_MS=800
|
|
||||||
# AI_MAX_CONTEXT_TOKENS=4000
|
|
||||||
|
|
||||||
# 🔋 FOR LOW-POWER SYSTEMS (minimal resources):
|
# 🔋 FOR LOW-POWER SYSTEMS:
|
||||||
# AI_EMBEDDING_CANDIDATES=15
|
# AI_NO_EMBEDDINGS_TOOL_LIMIT=15
|
||||||
# AI_MAX_SELECTED_ITEMS=10
|
|
||||||
# AI_MAX_TOOLS_TO_ANALYZE=8
|
|
||||||
# AI_RATE_LIMIT_MAX_REQUESTS=2
|
|
||||||
# AI_MICRO_TASK_DELAY_MS=1000
|
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
# FEATURE COMBINATIONS GUIDE
|
# FEATURE COMBINATIONS GUIDE
|
||||||
@ -285,7 +241,6 @@ AI_QUEUE_CLEANUP_INTERVAL_MS=300000
|
|||||||
|
|
||||||
# 🔍 WITH FULL MONITORING:
|
# 🔍 WITH FULL MONITORING:
|
||||||
# - Enable FORENSIC_AUDIT_ENABLED=true
|
# - Enable FORENSIC_AUDIT_ENABLED=true
|
||||||
# - Enable AI_PIPELINE_DEBUG=true
|
|
||||||
# - Configure audit retention and detail level
|
# - Configure audit retention and detail level
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
@ -15,7 +15,7 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];
|
|||||||
<path d="M9 11H5a2 2 0 0 0-2 2v7a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7a2 2 0 0 0-2-2h-4"/>
|
<path d="M9 11H5a2 2 0 0 0-2 2v7a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7a2 2 0 0 0-2-2h-4"/>
|
||||||
<path d="M9 11V7a3 3 0 0 1 6 0v4"/>
|
<path d="M9 11V7a3 3 0 0 1 6 0v4"/>
|
||||||
</svg>
|
</svg>
|
||||||
KI-gestützte Workflow-Empfehlungen
|
Forensic AI
|
||||||
</h2>
|
</h2>
|
||||||
<p id="ai-description" class="text-muted" style="max-width: 700px; margin: 0 auto; line-height: 1.6;">
|
<p id="ai-description" class="text-muted" style="max-width: 700px; margin: 0 auto; line-height: 1.6;">
|
||||||
Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen
|
Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen
|
||||||
@ -169,16 +169,16 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];
|
|||||||
<!-- Micro-task Progress -->
|
<!-- Micro-task Progress -->
|
||||||
<div id="micro-task-progress" class="micro-task-progress hidden">
|
<div id="micro-task-progress" class="micro-task-progress hidden">
|
||||||
<div class="micro-task-header">
|
<div class="micro-task-header">
|
||||||
<span class="micro-task-label">🔬 Micro-Task Analyse</span>
|
<span class="micro-task-label">🔬 micro-Agent-Analysis</span>
|
||||||
<span id="micro-task-counter" class="micro-task-counter">1/6</span>
|
<span id="micro-task-counter" class="micro-task-counter">1/6</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="micro-task-steps">
|
<div class="micro-task-steps">
|
||||||
<div class="micro-step" data-step="scenario">📋 Szenario</div>
|
<div class="micro-step" data-step="scenario">📋 Problemanalyse</div>
|
||||||
<div class="micro-step" data-step="approach">🎯 Ansatz</div>
|
<div class="micro-step" data-step="approach">🎯 Ermittlungsansatz</div>
|
||||||
<div class="micro-step" data-step="considerations">⚠️ Kritisches</div>
|
<div class="micro-step" data-step="considerations">⚠️ Herausforderungen</div>
|
||||||
<div class="micro-step" data-step="tools">🔧 Tools</div>
|
<div class="micro-step" data-step="tools">🔧 Methoden</div>
|
||||||
<div class="micro-step" data-step="knowledge">📚 Wissen</div>
|
<div class="micro-step" data-step="knowledge">📚 Evaluation</div>
|
||||||
<div class="micro-step" data-step="final">✅ Final</div>
|
<div class="micro-step" data-step="final">✅ Audit-Trail</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@ -292,13 +292,13 @@ class AIQueryInterface {
|
|||||||
return {
|
return {
|
||||||
workflow: {
|
workflow: {
|
||||||
placeholder: "Beschreiben Sie Ihr forensisches Szenario... z.B. 'Verdacht auf Ransomware-Angriff auf Windows-Domänencontroller'",
|
placeholder: "Beschreiben Sie Ihr forensisches Szenario... z.B. 'Verdacht auf Ransomware-Angriff auf Windows-Domänencontroller'",
|
||||||
description: "Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen.",
|
description: "Beschreiben Sie Ihre Untersuchungssituation und erhalten Empfehlungen für alle Phasen der Untersuchung.",
|
||||||
submitText: "Empfehlungen generieren",
|
submitText: "Empfehlungen generieren",
|
||||||
loadingText: "Analysiere Szenario und generiere Empfehlungen..."
|
loadingText: "Analysiere Szenario und generiere Empfehlungen..."
|
||||||
},
|
},
|
||||||
tool: {
|
tool: {
|
||||||
placeholder: "Beschreiben Sie Ihr Problem... z.B. 'Analyse von Android-Backups mit WhatsApp-Nachrichten'",
|
placeholder: "Beschreiben Sie Ihr Problem... z.B. 'Analyse von Android-Backups mit WhatsApp-Nachrichten'",
|
||||||
description: "Beschreiben Sie Ihr Problem und erhalten Sie 1-3 gezielt passende Empfehlungen.",
|
description: "Beschreiben Sie Ihre Untersuchungssituation und erhalten Empfehlungen für eine spezifische Aufgabenstellung.",
|
||||||
submitText: "Empfehlungen finden",
|
submitText: "Empfehlungen finden",
|
||||||
loadingText: "Analysiere Anforderungen und suche passende Methode..."
|
loadingText: "Analysiere Anforderungen und suche passende Methode..."
|
||||||
}
|
}
|
||||||
@ -706,7 +706,7 @@ class AIQueryInterface {
|
|||||||
|
|
||||||
const html = `
|
const html = `
|
||||||
<div class="workflow-container">
|
<div class="workflow-container">
|
||||||
${this.renderHeader('Empfohlener DFIR-Workflow', originalQuery)}
|
${this.renderHeader('Untersuchungsansatz', originalQuery)}
|
||||||
${this.renderContextualAnalysis(recommendation, 'workflow')}
|
${this.renderContextualAnalysis(recommendation, 'workflow')}
|
||||||
${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
|
${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
|
||||||
${this.renderWorkflowPhases(toolsByPhase, phaseOrder, phaseNames)}
|
${this.renderWorkflowPhases(toolsByPhase, phaseOrder, phaseNames)}
|
||||||
@ -721,7 +721,7 @@ class AIQueryInterface {
|
|||||||
displayToolResults(recommendation, originalQuery) {
|
displayToolResults(recommendation, originalQuery) {
|
||||||
const html = `
|
const html = `
|
||||||
<div class="tool-results-container">
|
<div class="tool-results-container">
|
||||||
${this.renderHeader('Passende Empfehlungen', originalQuery)}
|
${this.renderHeader('Handlungsempfehlung', originalQuery)}
|
||||||
${this.renderContextualAnalysis(recommendation, 'tool')}
|
${this.renderContextualAnalysis(recommendation, 'tool')}
|
||||||
${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
|
${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
|
||||||
${this.renderToolRecommendations(recommendation.recommended_tools)}
|
${this.renderToolRecommendations(recommendation.recommended_tools)}
|
||||||
|
@ -1,126 +0,0 @@
|
|||||||
// src/config/forensic.config.ts
|
|
||||||
// Centralized configuration for forensic RAG enhancements
|
|
||||||
|
|
||||||
export const FORENSIC_CONFIG = {
|
|
||||||
audit: {
|
|
||||||
enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
|
|
||||||
detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as 'minimal' | 'standard' | 'verbose') || 'standard',
|
|
||||||
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10),
|
|
||||||
maxEntriesPerRequest: parseInt(process.env.FORENSIC_AUDIT_MAX_ENTRIES || '50', 10)
|
|
||||||
},
|
|
||||||
confidence: {
|
|
||||||
embeddingsWeight: parseFloat(process.env.CONFIDENCE_EMBEDDINGS_WEIGHT || '0.3'),
|
|
||||||
consensusWeight: parseFloat(process.env.CONFIDENCE_CONSENSUS_WEIGHT || '0.25'),
|
|
||||||
domainMatchWeight: parseFloat(process.env.CONFIDENCE_DOMAIN_MATCH_WEIGHT || '0.25'),
|
|
||||||
freshnessWeight: parseFloat(process.env.CONFIDENCE_FRESHNESS_WEIGHT || '0.2'),
|
|
||||||
minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
|
|
||||||
highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10),
|
|
||||||
mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10)
|
|
||||||
},
|
|
||||||
bias: {
|
|
||||||
enabled: process.env.BIAS_DETECTION_ENABLED === 'true',
|
|
||||||
popularityThreshold: parseFloat(process.env.BIAS_POPULARITY_THRESHOLD || '0.7'),
|
|
||||||
diversityMinimum: parseFloat(process.env.BIAS_DIVERSITY_MINIMUM || '0.6'),
|
|
||||||
domainMismatchThreshold: parseFloat(process.env.BIAS_DOMAIN_MISMATCH_THRESHOLD || '0.3'),
|
|
||||||
warningThreshold: parseInt(process.env.BIAS_WARNING_THRESHOLD || '3', 10),
|
|
||||||
celebrityTools: (process.env.BIAS_CELEBRITY_TOOLS || 'Volatility 3,Wireshark,Autopsy,Maltego').split(',').map(t => t.trim())
|
|
||||||
},
|
|
||||||
// Quality thresholds for various metrics
|
|
||||||
quality: {
|
|
||||||
minResponseLength: parseInt(process.env.QUALITY_MIN_RESPONSE_LENGTH || '50', 10),
|
|
||||||
minSelectionCount: parseInt(process.env.QUALITY_MIN_SELECTION_COUNT || '1', 10),
|
|
||||||
maxProcessingTime: parseInt(process.env.QUALITY_MAX_PROCESSING_TIME_MS || '30000', 10)
|
|
||||||
},
|
|
||||||
// Display preferences
|
|
||||||
ui: {
|
|
||||||
showAuditTrailByDefault: process.env.UI_SHOW_AUDIT_TRAIL_DEFAULT === 'true',
|
|
||||||
showConfidenceScores: process.env.UI_SHOW_CONFIDENCE_SCORES !== 'false',
|
|
||||||
showBiasWarnings: process.env.UI_SHOW_BIAS_WARNINGS !== 'false',
|
|
||||||
auditTrailCollapsible: process.env.UI_AUDIT_TRAIL_COLLAPSIBLE !== 'false'
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// Validation function to ensure configuration is valid
|
|
||||||
export function validateForensicConfig(): { valid: boolean; errors: string[] } {
|
|
||||||
const errors: string[] = [];
|
|
||||||
|
|
||||||
// Validate audit configuration
|
|
||||||
if (FORENSIC_CONFIG.audit.retentionHours < 1 || FORENSIC_CONFIG.audit.retentionHours > 168) {
|
|
||||||
errors.push('FORENSIC_AUDIT_RETENTION_HOURS must be between 1 and 168 (1 week)');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!['minimal', 'standard', 'verbose'].includes(FORENSIC_CONFIG.audit.detailLevel)) {
|
|
||||||
errors.push('FORENSIC_AUDIT_DETAIL_LEVEL must be one of: minimal, standard, verbose');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate confidence weights sum to approximately 1.0
|
|
||||||
const weightSum = FORENSIC_CONFIG.confidence.embeddingsWeight +
|
|
||||||
FORENSIC_CONFIG.confidence.consensusWeight +
|
|
||||||
FORENSIC_CONFIG.confidence.domainMatchWeight +
|
|
||||||
FORENSIC_CONFIG.confidence.freshnessWeight;
|
|
||||||
|
|
||||||
if (Math.abs(weightSum - 1.0) > 0.05) {
|
|
||||||
errors.push(`Confidence weights must sum to 1.0 (currently ${weightSum.toFixed(3)})`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate threshold ranges
|
|
||||||
if (FORENSIC_CONFIG.confidence.minimumThreshold < 0 || FORENSIC_CONFIG.confidence.minimumThreshold > 100) {
|
|
||||||
errors.push('CONFIDENCE_MINIMUM_THRESHOLD must be between 0 and 100');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (FORENSIC_CONFIG.confidence.highThreshold <= FORENSIC_CONFIG.confidence.mediumThreshold) {
|
|
||||||
errors.push('CONFIDENCE_HIGH_THRESHOLD must be greater than CONFIDENCE_MEDIUM_THRESHOLD');
|
|
||||||
}
|
|
||||||
|
|
||||||
// Validate bias thresholds
|
|
||||||
if (FORENSIC_CONFIG.bias.popularityThreshold < 0 || FORENSIC_CONFIG.bias.popularityThreshold > 1) {
|
|
||||||
errors.push('BIAS_POPULARITY_THRESHOLD must be between 0 and 1');
|
|
||||||
}
|
|
||||||
|
|
||||||
if (FORENSIC_CONFIG.bias.diversityMinimum < 0 || FORENSIC_CONFIG.bias.diversityMinimum > 1) {
|
|
||||||
errors.push('BIAS_DIVERSITY_MINIMUM must be between 0 and 1');
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
valid: errors.length === 0,
|
|
||||||
errors
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper functions for configuration access
|
|
||||||
export function isAuditEnabled(): boolean {
|
|
||||||
return FORENSIC_CONFIG.audit.enabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function getAuditDetailLevel(): 'minimal' | 'standard' | 'verbose' {
|
|
||||||
return FORENSIC_CONFIG.audit.detailLevel;
|
|
||||||
}
|
|
||||||
|
|
||||||
export function getConfidenceThresholds() {
|
|
||||||
return {
|
|
||||||
minimum: FORENSIC_CONFIG.confidence.minimumThreshold,
|
|
||||||
medium: FORENSIC_CONFIG.confidence.mediumThreshold,
|
|
||||||
high: FORENSIC_CONFIG.confidence.highThreshold
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
export function isBiasDetectionEnabled(): boolean {
|
|
||||||
return FORENSIC_CONFIG.bias.enabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize and validate configuration on module load
|
|
||||||
const configValidation = validateForensicConfig();
|
|
||||||
if (!configValidation.valid) {
|
|
||||||
console.warn('[FORENSIC CONFIG] Configuration validation failed:', configValidation.errors);
|
|
||||||
// In development, we might want to throw an error
|
|
||||||
if (process.env.NODE_ENV === 'development') {
|
|
||||||
throw new Error(`Forensic configuration invalid: ${configValidation.errors.join(', ')}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('[FORENSIC CONFIG] Configuration loaded:', {
|
|
||||||
auditEnabled: FORENSIC_CONFIG.audit.enabled,
|
|
||||||
confidenceEnabled: true, // Always enabled
|
|
||||||
biasDetectionEnabled: FORENSIC_CONFIG.bias.enabled,
|
|
||||||
detailLevel: FORENSIC_CONFIG.audit.detailLevel
|
|
||||||
});
|
|
@ -113,64 +113,6 @@ tools:
|
|||||||
accessType: download
|
accessType: download
|
||||||
license: VSL
|
license: VSL
|
||||||
knowledgebase: false
|
knowledgebase: false
|
||||||
- name: TheHive 5
|
|
||||||
icon: 🐝
|
|
||||||
type: software
|
|
||||||
description: >-
|
|
||||||
Die zentrale Incident-Response-Plattform orchestriert komplexe
|
|
||||||
Sicherheitsvorfälle vom ersten Alert bis zum Abschlussbericht. Jeder Case
|
|
||||||
wird strukturiert durch Observables (IOCs), Tasks und Zeitleisten
|
|
||||||
abgebildet. Die Cortex-Integration automatisiert Analysen durch Dutzende
|
|
||||||
Analyzer - von VirusTotal-Checks bis Sandbox-Detonation.
|
|
||||||
MISP-Synchronisation reichert Cases mit Threat-Intelligence an. Das
|
|
||||||
ausgeklügelte Rollen- und Rechtesystem ermöglicht sichere Zusammenarbeit
|
|
||||||
zwischen SOC-Analysten, Forensikern und Management. Templates
|
|
||||||
standardisieren Response-Prozesse nach Incident-Typ. Die RESTful API
|
|
||||||
integriert nahtlos mit SIEM, SOAR und Ticketing-Systemen. Metrics und
|
|
||||||
KPIs messen die Team-Performance. Die Community Edition bleibt kostenlos
|
|
||||||
für kleinere Teams, während Gold/Platinum-Lizenzen Enterprise-Features
|
|
||||||
bieten.
|
|
||||||
domains:
|
|
||||||
- incident-response
|
|
||||||
- static-investigations
|
|
||||||
- malware-analysis
|
|
||||||
- network-forensics
|
|
||||||
- fraud-investigation
|
|
||||||
phases:
|
|
||||||
- data-collection
|
|
||||||
- examination
|
|
||||||
- analysis
|
|
||||||
- reporting
|
|
||||||
platforms:
|
|
||||||
- Web
|
|
||||||
related_software:
|
|
||||||
- MISP
|
|
||||||
- Cortex
|
|
||||||
- Elasticsearch
|
|
||||||
domain-agnostic-software:
|
|
||||||
- collaboration-general
|
|
||||||
skillLevel: intermediate
|
|
||||||
accessType: server-based
|
|
||||||
url: https://strangebee.com/thehive/
|
|
||||||
projectUrl: ''
|
|
||||||
license: Community Edition (Discontinued) / Commercial
|
|
||||||
knowledgebase: false
|
|
||||||
statusUrl: https://uptime.example.lab/api/badge/1/status
|
|
||||||
tags:
|
|
||||||
- web-interface
|
|
||||||
- case-management
|
|
||||||
- collaboration
|
|
||||||
- api
|
|
||||||
- workflow
|
|
||||||
- multi-user-support
|
|
||||||
- cortex-analyzer
|
|
||||||
- misp-integration
|
|
||||||
- playbooks
|
|
||||||
- metrics
|
|
||||||
- rbac
|
|
||||||
- template-driven
|
|
||||||
related_concepts:
|
|
||||||
- Digital Evidence Chain of Custody
|
|
||||||
- name: MISP
|
- name: MISP
|
||||||
icon: 🌐
|
icon: 🌐
|
||||||
type: software
|
type: software
|
||||||
@ -223,7 +165,6 @@ tools:
|
|||||||
related_concepts:
|
related_concepts:
|
||||||
- Hash Functions & Digital Signatures
|
- Hash Functions & Digital Signatures
|
||||||
related_software:
|
related_software:
|
||||||
- TheHive 5
|
|
||||||
- Cortex
|
- Cortex
|
||||||
- OpenCTI
|
- OpenCTI
|
||||||
- name: DFIR-IRIS
|
- name: DFIR-IRIS
|
||||||
@ -260,7 +201,6 @@ tools:
|
|||||||
platforms:
|
platforms:
|
||||||
- Web
|
- Web
|
||||||
related_software:
|
related_software:
|
||||||
- TheHive 5
|
|
||||||
- MISP
|
- MISP
|
||||||
- OpenCTI
|
- OpenCTI
|
||||||
domain-agnostic-software:
|
domain-agnostic-software:
|
||||||
@ -3427,6 +3367,244 @@ tools:
|
|||||||
accessType: download
|
accessType: download
|
||||||
license: "MPL\_/ AGPL"
|
license: "MPL\_/ AGPL"
|
||||||
knowledgebase: false
|
knowledgebase: false
|
||||||
|
- name: ShadowExplorer
|
||||||
|
icon: 🗂️
|
||||||
|
type: software
|
||||||
|
description: >-
|
||||||
|
Das schlanke Windows-Tool macht Volume-Shadow-Copy-Snapshots auch in Home-Editionen sichtbar und erlaubt das komfortable Durchstöbern sowie Wiederherstellen früherer Datei-Versionen. Damit lassen sich versehentlich gelöschte oder überschriebene Dateien in Sekunden zurückholen – geeignet für schnelle Triage und klassische Datenträgerforensik.
|
||||||
|
domains:
|
||||||
|
- static-investigations
|
||||||
|
- incident-response
|
||||||
|
phases:
|
||||||
|
- examination
|
||||||
|
- analysis
|
||||||
|
platforms:
|
||||||
|
- Windows
|
||||||
|
related_software:
|
||||||
|
- OSFMount
|
||||||
|
- PhotoRec
|
||||||
|
domain-agnostic-software: null
|
||||||
|
skillLevel: novice
|
||||||
|
accessType: download
|
||||||
|
url: https://www.shadowexplorer.com/
|
||||||
|
license: Freeware
|
||||||
|
knowledgebase: false
|
||||||
|
tags:
|
||||||
|
- gui
|
||||||
|
- shadow-copy
|
||||||
|
- snapshot-browsing
|
||||||
|
- file-recovery
|
||||||
|
- previous-versions
|
||||||
|
- scenario:file_recovery
|
||||||
|
- point-in-time-restore
|
||||||
|
related_concepts:
|
||||||
|
- Digital Evidence Chain of Custody
|
||||||
|
|
||||||
|
|
||||||
|
- name: Sonic Visualiser
|
||||||
|
icon: 🎵
|
||||||
|
type: software
|
||||||
|
description: >-
|
||||||
|
Die Open-Source-Audio-Analyse-Suite wird in der Forensik eingesetzt,
|
||||||
|
um Wave- und Kompressionsformate bis auf Sample-Ebene zu untersuchen.
|
||||||
|
Spektrogramm-Visualisierung, Zeit-/Frequenz-Annotationen und
|
||||||
|
Transkriptions-Plugins (Vamp) helfen, Manipulationen wie
|
||||||
|
Bandpass-Filter, Time-Stretching oder Insert-Edits nachzuweisen.
|
||||||
|
FFT- und Mel-Spectral-Views decken versteckte Audio-Watermarks oder
|
||||||
|
Steganografie auf. Export-Funktionen in CSV/JSON erlauben die
|
||||||
|
Weiterverarbeitung in Python-Notebooks oder SIEM-Pipelines.
|
||||||
|
Ideal für Voice-Authentication-Checks, Deep-Fake-Erkennung
|
||||||
|
und Beweisaufbereitung vor Gericht.
|
||||||
|
skillLevel: intermediate
|
||||||
|
url: https://www.sonicvisualiser.org/
|
||||||
|
domains:
|
||||||
|
- static-investigations
|
||||||
|
- fraud-investigation
|
||||||
|
phases:
|
||||||
|
- examination
|
||||||
|
- analysis
|
||||||
|
- reporting
|
||||||
|
platforms:
|
||||||
|
- Windows
|
||||||
|
- Linux
|
||||||
|
- macOS
|
||||||
|
accessType: download
|
||||||
|
license: GPL-2.0
|
||||||
|
knowledgebase: false
|
||||||
|
tags:
|
||||||
|
- gui
|
||||||
|
- audio-forensics
|
||||||
|
- spectrogram
|
||||||
|
- plugin-support
|
||||||
|
- annotation
|
||||||
|
- csv-export
|
||||||
|
related_concepts: []
|
||||||
|
related_software:
|
||||||
|
- Audacity
|
||||||
|
|
||||||
|
- name: Dissect
|
||||||
|
icon: 🧩
|
||||||
|
type: software
|
||||||
|
description: >-
|
||||||
|
Fox-ITs Python-Framework abstrahiert Windows- und Linux-Speicherabbilder
|
||||||
|
in virtuelle Objekte (Prozesse, Dateien, Registry, Kernel-Strukturen),
|
||||||
|
ohne zuvor ein Profil definieren zu müssen. Modularer
|
||||||
|
Hypervisor-Layer erlaubt das Mounten und gleichzeitige Analysieren
|
||||||
|
mehrerer Memory-Dumps – perfekt für großflächige Incident-Response.
|
||||||
|
Plugins dekodieren PTEs, handle tables, APC-Queues und liefern
|
||||||
|
YARA-kompatible Scans. Die Zero-Copy-Architektur beschleunigt Queries auf
|
||||||
|
Multi-GB-Images signifikant. Unterstützt Windows 11 24H2-Kernel sowie
|
||||||
|
Linux 6.x-schichten ab Juli 2025.
|
||||||
|
skillLevel: advanced
|
||||||
|
url: https://github.com/fox-it/dissect
|
||||||
|
domains:
|
||||||
|
- incident-response
|
||||||
|
- malware-analysis
|
||||||
|
- static-investigations
|
||||||
|
phases:
|
||||||
|
- examination
|
||||||
|
- analysis
|
||||||
|
platforms:
|
||||||
|
- Windows
|
||||||
|
- Linux
|
||||||
|
- macOS
|
||||||
|
accessType: download
|
||||||
|
license: Apache 2.0
|
||||||
|
knowledgebase: false
|
||||||
|
tags:
|
||||||
|
- command-line
|
||||||
|
- memory-analysis
|
||||||
|
- plugin-support
|
||||||
|
- python-library
|
||||||
|
- zero-copy
|
||||||
|
- profile-less
|
||||||
|
related_concepts:
|
||||||
|
- Regular Expressions (Regex)
|
||||||
|
related_software:
|
||||||
|
- Volatility 3
|
||||||
|
- Rekall
|
||||||
|
|
||||||
|
- name: Docker Explorer
|
||||||
|
icon: 🐳
|
||||||
|
type: software
|
||||||
|
description: >-
|
||||||
|
Googles Forensik-Toolkit zerlegt Offline-Docker-Volumes und
|
||||||
|
Overlay-Dateisysteme ohne laufenden Daemon. Es extrahiert
|
||||||
|
Container-Config, Image-Layer, ENV-Variablen, Mounted-Secrets
|
||||||
|
und schreibt Timeline-fähige Metadata-JSONs. Unterstützt btrfs,
|
||||||
|
overlay2 und zfs Storage-Driver sowie Docker Desktop (macOS/Windows).
|
||||||
|
Perfekt, um bösartige Images nach Supply-Chain-Attacken zu enttarnen
|
||||||
|
oder flüchtige Container nach einem Incident nachträglich zu analysieren.
|
||||||
|
skillLevel: intermediate
|
||||||
|
url: https://github.com/google/docker-explorer
|
||||||
|
domains:
|
||||||
|
- cloud-forensics
|
||||||
|
- incident-response
|
||||||
|
- static-investigations
|
||||||
|
phases:
|
||||||
|
- data-collection
|
||||||
|
- examination
|
||||||
|
- analysis
|
||||||
|
platforms:
|
||||||
|
- Linux
|
||||||
|
- macOS
|
||||||
|
- Windows
|
||||||
|
accessType: download
|
||||||
|
license: Apache 2.0
|
||||||
|
knowledgebase: false
|
||||||
|
tags:
|
||||||
|
- command-line
|
||||||
|
- container-forensics
|
||||||
|
- docker
|
||||||
|
- timeline
|
||||||
|
- json-export
|
||||||
|
- supply-chain
|
||||||
|
related_concepts: []
|
||||||
|
related_software:
|
||||||
|
- Velociraptor
|
||||||
|
- osquery
|
||||||
|
|
||||||
|
- name: Ghiro
|
||||||
|
icon: 🖼️
|
||||||
|
type: software
|
||||||
|
description: >-
|
||||||
|
Die Web-basierte Bildforensik-Plattform automatisiert EXIF-Analyse,
|
||||||
|
Hash-Matching, Error-Level-Evaluation (ELA) und
|
||||||
|
Steganografie-Erkennung für große Dateibatches. Unterstützt
|
||||||
|
Gesichts- und NSFW-Detection sowie GPS-Reverse-Geocoding für
|
||||||
|
Bewegungsprofile. Reports sind gerichtsfest
|
||||||
|
versioniert, REST-API und Celery-Worker skalieren auf
|
||||||
|
Millionen Bilder – ideal für CSAM-Ermittlungen oder Fake-News-Prüfung.
|
||||||
|
skillLevel: intermediate
|
||||||
|
url: https://getghiro.org/
|
||||||
|
domains:
|
||||||
|
- static-investigations
|
||||||
|
- fraud-investigation
|
||||||
|
- mobile-forensics
|
||||||
|
phases:
|
||||||
|
- examination
|
||||||
|
- analysis
|
||||||
|
- reporting
|
||||||
|
platforms:
|
||||||
|
- Web
|
||||||
|
- Linux
|
||||||
|
accessType: server-based
|
||||||
|
license: GPL-2.0
|
||||||
|
knowledgebase: false
|
||||||
|
tags:
|
||||||
|
- web-interface
|
||||||
|
- image-forensics
|
||||||
|
- exif-analysis
|
||||||
|
- steganography
|
||||||
|
- nsfw-detection
|
||||||
|
- batch-processing
|
||||||
|
related_concepts:
|
||||||
|
- Hash Functions & Digital Signatures
|
||||||
|
related_software:
|
||||||
|
- ExifTool
|
||||||
|
- PhotoRec
|
||||||
|
|
||||||
|
- name: Sherloq
|
||||||
|
icon: 🔍
|
||||||
|
type: software
|
||||||
|
description: >-
|
||||||
|
Das Python-GUI-Toolkit für visuelle Datei-Analyse kombiniert
|
||||||
|
klassische Reverse-Steganografie-Techniken (LSB, Palette-Tweaking,
|
||||||
|
DCT-Coefficient-Scanning) mit modernen CV-Algorithmen.
|
||||||
|
Heatmaps und Histogramm-Diffs zeigen Manipulations-Hotspots,
|
||||||
|
während eine „Carve-All-Layers“-Funktion versteckte Daten in PNG,
|
||||||
|
JPEG, BMP, GIF und Audio-Spectra aufspürt. Plugins für zsteg,
|
||||||
|
binwalk und exiftool erweitern die Pipeline.
|
||||||
|
Eine Must-have-Ergänzung zu Ghidra & friends, wenn
|
||||||
|
Malware Dateien als Dead-Drop nutzt.
|
||||||
|
skillLevel: intermediate
|
||||||
|
url: https://github.com/GuidoBartoli/sherloq
|
||||||
|
domains:
|
||||||
|
- malware-analysis
|
||||||
|
- static-investigations
|
||||||
|
phases:
|
||||||
|
- examination
|
||||||
|
- analysis
|
||||||
|
platforms:
|
||||||
|
- Windows
|
||||||
|
- Linux
|
||||||
|
- macOS
|
||||||
|
accessType: download
|
||||||
|
license: MIT
|
||||||
|
knowledgebase: false
|
||||||
|
tags:
|
||||||
|
- gui
|
||||||
|
- image-forensics
|
||||||
|
- steganography
|
||||||
|
- lsb-extraction
|
||||||
|
- histogram-analysis
|
||||||
|
- plugin-support
|
||||||
|
related_concepts:
|
||||||
|
- Regular Expressions (Regex)
|
||||||
|
related_software:
|
||||||
|
- Ghiro
|
||||||
|
- CyberChef
|
||||||
|
|
||||||
- name: Cortex
|
- name: Cortex
|
||||||
type: software
|
type: software
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -94,18 +94,15 @@ ${input}
|
|||||||
`.trim();
|
`.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enhanced AI service call function
|
|
||||||
async function callAIService(prompt: string): Promise<Response> {
|
async function callAIService(prompt: string): Promise<Response> {
|
||||||
const endpoint = AI_ENDPOINT;
|
const endpoint = AI_ENDPOINT;
|
||||||
const apiKey = AI_ANALYZER_API_KEY;
|
const apiKey = AI_ANALYZER_API_KEY;
|
||||||
const model = AI_ANALYZER_MODEL;
|
const model = AI_ANALYZER_MODEL;
|
||||||
|
|
||||||
// Simple headers - add auth only if API key exists
|
|
||||||
let headers: Record<string, string> = {
|
let headers: Record<string, string> = {
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
};
|
};
|
||||||
|
|
||||||
// Add authentication if API key is provided
|
|
||||||
if (apiKey) {
|
if (apiKey) {
|
||||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
headers['Authorization'] = `Bearer ${apiKey}`;
|
||||||
console.log('[ENHANCE API] Using API key authentication');
|
console.log('[ENHANCE API] Using API key authentication');
|
||||||
@ -113,7 +110,6 @@ async function callAIService(prompt: string): Promise<Response> {
|
|||||||
console.log('[ENHANCE API] No API key - making request without authentication');
|
console.log('[ENHANCE API] No API key - making request without authentication');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Simple request body
|
|
||||||
const requestBody = {
|
const requestBody = {
|
||||||
model,
|
model,
|
||||||
messages: [{ role: 'user', content: prompt }],
|
messages: [{ role: 'user', content: prompt }],
|
||||||
@ -124,8 +120,6 @@ async function callAIService(prompt: string): Promise<Response> {
|
|||||||
presence_penalty: 0.1
|
presence_penalty: 0.1
|
||||||
};
|
};
|
||||||
|
|
||||||
// FIXED: This function is already being called through enqueueApiCall in the main handler
|
|
||||||
// So we can use direct fetch here since the queuing happens at the caller level
|
|
||||||
return fetch(`${endpoint}/v1/chat/completions`, {
|
return fetch(`${endpoint}/v1/chat/completions`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers,
|
headers,
|
||||||
@ -214,7 +208,7 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
success: true,
|
success: true,
|
||||||
questions,
|
questions,
|
||||||
taskId,
|
taskId,
|
||||||
inputComplete: questions.length === 0 // Flag to indicate if input seems complete
|
inputComplete: questions.length === 0
|
||||||
}), {
|
}), {
|
||||||
status: 200,
|
status: 200,
|
||||||
headers: { 'Content-Type': 'application/json' }
|
headers: { 'Content-Type': 'application/json' }
|
||||||
|
@ -2087,6 +2087,7 @@ input[type="checkbox"] {
|
|||||||
gap: 1rem;
|
gap: 1rem;
|
||||||
max-width: 1200px;
|
max-width: 1200px;
|
||||||
margin: 0 auto;
|
margin: 0 auto;
|
||||||
|
margin-top: 1rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
.phase-header {
|
.phase-header {
|
||||||
|
@ -31,7 +31,6 @@ interface AnalysisResult {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// NEW: Audit Trail Types
|
|
||||||
interface AuditEntry {
|
interface AuditEntry {
|
||||||
timestamp: number;
|
timestamp: number;
|
||||||
phase: string; // 'retrieval', 'selection', 'micro-task-N'
|
phase: string; // 'retrieval', 'selection', 'micro-task-N'
|
||||||
@ -40,10 +39,9 @@ interface AuditEntry {
|
|||||||
output: any; // What came out of this step
|
output: any; // What came out of this step
|
||||||
confidence: number; // 0-100: How confident we are in this step
|
confidence: number; // 0-100: How confident we are in this step
|
||||||
processingTimeMs: number;
|
processingTimeMs: number;
|
||||||
metadata: Record<string, any>; // Additional context
|
metadata: Record<string, any>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enhanced AnalysisContext with Audit Trail
|
|
||||||
interface AnalysisContext {
|
interface AnalysisContext {
|
||||||
userQuery: string;
|
userQuery: string;
|
||||||
mode: string;
|
mode: string;
|
||||||
@ -62,7 +60,6 @@ interface AnalysisContext {
|
|||||||
|
|
||||||
seenToolNames: Set<string>;
|
seenToolNames: Set<string>;
|
||||||
|
|
||||||
// NEW: Audit Trail
|
|
||||||
auditTrail: AuditEntry[];
|
auditTrail: AuditEntry[];
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -78,17 +75,24 @@ class ImprovedMicroTaskAIPipeline {
|
|||||||
private similarityThreshold: number;
|
private similarityThreshold: number;
|
||||||
private microTaskDelay: number;
|
private microTaskDelay: number;
|
||||||
|
|
||||||
|
private embeddingSelectionLimit: number;
|
||||||
|
private embeddingConceptsLimit: number;
|
||||||
|
|
||||||
|
private noEmbeddingsToolLimit: number;
|
||||||
|
private noEmbeddingsConceptLimit: number;
|
||||||
|
|
||||||
|
private embeddingsMinTools: number;
|
||||||
|
private embeddingsMaxReductionRatio: number;
|
||||||
|
|
||||||
private maxContextTokens: number;
|
private maxContextTokens: number;
|
||||||
private maxPromptTokens: number;
|
private maxPromptTokens: number;
|
||||||
|
|
||||||
// NEW: Audit Configuration
|
|
||||||
private auditConfig: {
|
private auditConfig: {
|
||||||
enabled: boolean;
|
enabled: boolean;
|
||||||
detailLevel: 'minimal' | 'standard' | 'verbose';
|
detailLevel: 'minimal' | 'standard' | 'verbose';
|
||||||
retentionHours: number;
|
retentionHours: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
// NEW: Temporary audit storage for pre-context operations
|
|
||||||
private tempAuditEntries: AuditEntry[] = [];
|
private tempAuditEntries: AuditEntry[] = [];
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
@ -98,20 +102,35 @@ class ImprovedMicroTaskAIPipeline {
|
|||||||
model: this.getEnv('AI_ANALYZER_MODEL')
|
model: this.getEnv('AI_ANALYZER_MODEL')
|
||||||
};
|
};
|
||||||
|
|
||||||
this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
|
this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '25', 10);
|
||||||
this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '60', 10);
|
this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '50', 10);
|
||||||
this.similarityThreshold = 0.3;
|
this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
|
||||||
this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
|
this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
|
||||||
|
|
||||||
|
this.embeddingSelectionLimit = parseInt(process.env.AI_EMBEDDING_SELECTION_LIMIT || '30', 10);
|
||||||
|
this.embeddingConceptsLimit = parseInt(process.env.AI_EMBEDDING_CONCEPTS_LIMIT || '15', 10);
|
||||||
|
|
||||||
|
this.noEmbeddingsToolLimit = parseInt(process.env.AI_NO_EMBEDDINGS_TOOL_LIMIT || '0', 10);
|
||||||
|
this.noEmbeddingsConceptLimit = parseInt(process.env.AI_NO_EMBEDDINGS_CONCEPT_LIMIT || '0', 10);
|
||||||
|
|
||||||
|
this.embeddingsMinTools = parseInt(process.env.AI_EMBEDDINGS_MIN_TOOLS || '8', 10);
|
||||||
|
this.embeddingsMaxReductionRatio = parseFloat(process.env.AI_EMBEDDINGS_MAX_REDUCTION_RATIO || '0.75');
|
||||||
|
|
||||||
this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
|
this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
|
||||||
this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
|
this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
|
||||||
|
|
||||||
// NEW: Initialize Audit Configuration
|
|
||||||
this.auditConfig = {
|
this.auditConfig = {
|
||||||
enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
|
enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
|
||||||
detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as any) || 'standard',
|
detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as any) || 'standard',
|
||||||
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
|
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
console.log('[AI PIPELINE] Configuration loaded:', {
|
||||||
|
embeddingCandidates: this.embeddingCandidates,
|
||||||
|
embeddingSelection: `${this.embeddingSelectionLimit} tools, ${this.embeddingConceptsLimit} concepts`,
|
||||||
|
noEmbeddingsLimits: `${this.noEmbeddingsToolLimit || 'unlimited'} tools, ${this.noEmbeddingsConceptLimit || 'unlimited'} concepts`,
|
||||||
|
auditEnabled: this.auditConfig.enabled
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private getEnv(key: string): string {
|
private getEnv(key: string): string {
|
||||||
@ -122,7 +141,6 @@ class ImprovedMicroTaskAIPipeline {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
// NEW: Audit Trail Utility Functions
|
|
||||||
private addAuditEntry(
|
private addAuditEntry(
|
||||||
context: AnalysisContext | null,
|
context: AnalysisContext | null,
|
||||||
phase: string,
|
phase: string,
|
||||||
@ -149,22 +167,18 @@ class ImprovedMicroTaskAIPipeline {
|
|||||||
if (context) {
|
if (context) {
|
||||||
context.auditTrail.push(auditEntry);
|
context.auditTrail.push(auditEntry);
|
||||||
} else {
|
} else {
|
||||||
// Store in temporary array for later merging
|
|
||||||
this.tempAuditEntries.push(auditEntry);
|
this.tempAuditEntries.push(auditEntry);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Log for debugging when audit is enabled
|
|
||||||
console.log(`[AUDIT] ${phase}/${action}: ${confidence}% confidence, ${Date.now() - startTime}ms`);
|
console.log(`[AUDIT] ${phase}/${action}: ${confidence}% confidence, ${Date.now() - startTime}ms`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// NEW: Merge temporary audit entries into context
|
|
||||||
private mergeTemporaryAuditEntries(context: AnalysisContext): void {
|
private mergeTemporaryAuditEntries(context: AnalysisContext): void {
|
||||||
if (!this.auditConfig.enabled || this.tempAuditEntries.length === 0) return;
|
if (!this.auditConfig.enabled || this.tempAuditEntries.length === 0) return;
|
||||||
|
|
||||||
const entryCount = this.tempAuditEntries.length;
|
const entryCount = this.tempAuditEntries.length;
|
||||||
// Add temp entries to the beginning of the context audit trail
|
|
||||||
context.auditTrail.unshift(...this.tempAuditEntries);
|
context.auditTrail.unshift(...this.tempAuditEntries);
|
||||||
this.tempAuditEntries = []; // Clear temp storage
|
this.tempAuditEntries = [];
|
||||||
|
|
||||||
console.log(`[AUDIT] Merged ${entryCount} temporary audit entries into context`);
|
console.log(`[AUDIT] Merged ${entryCount} temporary audit entries into context`);
|
||||||
}
|
}
|
||||||
@ -196,15 +210,12 @@ class ImprovedMicroTaskAIPipeline {
|
|||||||
|
|
||||||
let confidence = 60; // Base confidence
|
let confidence = 60; // Base confidence
|
||||||
|
|
||||||
// Good selection ratio (not too many, not too few)
|
|
||||||
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
|
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
|
||||||
else if (selectionRatio <= 0.05) confidence -= 10; // Too few
|
else if (selectionRatio <= 0.05) confidence -= 10; // Too few
|
||||||
else confidence -= 15; // Too many
|
else confidence -= 15; // Too many
|
||||||
|
|
||||||
// Has detailed reasoning
|
|
||||||
if (hasReasoning) confidence += 15;
|
if (hasReasoning) confidence += 15;
|
||||||
|
|
||||||
// Selected tools have good distribution
|
|
||||||
if (result.selectedConcepts && result.selectedConcepts.length > 0) confidence += 5;
|
if (result.selectedConcepts && result.selectedConcepts.length > 0) confidence += 5;
|
||||||
|
|
||||||
return Math.min(95, Math.max(25, confidence));
|
return Math.min(95, Math.max(25, confidence));
|
||||||
@ -228,26 +239,106 @@ class ImprovedMicroTaskAIPipeline {
|
|||||||
|
|
||||||
private safeParseJSON(jsonString: string, fallback: any = null): any {
|
private safeParseJSON(jsonString: string, fallback: any = null): any {
|
||||||
try {
|
try {
|
||||||
const cleaned = jsonString
|
let cleaned = jsonString
|
||||||
.replace(/^```json\s*/i, '')
|
.replace(/^```json\s*/i, '')
|
||||||
.replace(/\s*```\s*$/g, '')
|
.replace(/\s*```\s*$/g, '')
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
|
if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
|
||||||
|
console.warn('[AI PIPELINE] JSON appears truncated, attempting recovery...');
|
||||||
|
|
||||||
|
let lastCompleteStructure = '';
|
||||||
|
let braceCount = 0;
|
||||||
|
let bracketCount = 0;
|
||||||
|
let inString = false;
|
||||||
|
let escaped = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < cleaned.length; i++) {
|
||||||
|
const char = cleaned[i];
|
||||||
|
|
||||||
|
if (escaped) {
|
||||||
|
escaped = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === '\\') {
|
||||||
|
escaped = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (char === '"' && !escaped) {
|
||||||
|
inString = !inString;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!inString) {
|
||||||
|
if (char === '{') braceCount++;
|
||||||
|
if (char === '}') braceCount--;
|
||||||
|
if (char === '[') bracketCount++;
|
||||||
|
if (char === ']') bracketCount--;
|
||||||
|
|
||||||
|
if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
|
||||||
|
lastCompleteStructure = cleaned.substring(0, i + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (lastCompleteStructure) {
|
||||||
|
console.log('[AI PIPELINE] Attempting to parse recovered JSON structure...');
|
||||||
|
cleaned = lastCompleteStructure;
|
||||||
|
} else {
|
||||||
|
if (braceCount > 0) {
|
||||||
|
cleaned += '}';
|
||||||
|
console.log('[AI PIPELINE] Added closing brace to truncated JSON');
|
||||||
|
}
|
||||||
|
if (bracketCount > 0) {
|
||||||
|
cleaned += ']';
|
||||||
|
console.log('[AI PIPELINE] Added closing bracket to truncated JSON');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const parsed = JSON.parse(cleaned);
|
const parsed = JSON.parse(cleaned);
|
||||||
|
|
||||||
|
if (parsed && typeof parsed === 'object') {
|
||||||
|
if (parsed.selectedTools === undefined) parsed.selectedTools = [];
|
||||||
|
if (parsed.selectedConcepts === undefined) parsed.selectedConcepts = [];
|
||||||
|
|
||||||
|
if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
|
||||||
|
if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
|
||||||
|
}
|
||||||
|
|
||||||
return parsed;
|
return parsed;
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.warn('[AI PIPELINE] JSON parsing failed:', error.message);
|
console.warn('[AI PIPELINE] JSON parsing failed:', error.message);
|
||||||
console.warn('[AI PIPELINE] Raw content:', jsonString.slice(0, 200));
|
console.warn('[AI PIPELINE] Raw content (first 300 chars):', jsonString.slice(0, 300));
|
||||||
|
console.warn('[AI PIPELINE] Raw content (last 300 chars):', jsonString.slice(-300));
|
||||||
|
|
||||||
|
if (jsonString.includes('selectedTools')) {
|
||||||
|
const toolMatches = jsonString.match(/"([^"]+)"/g);
|
||||||
|
if (toolMatches && toolMatches.length > 0) {
|
||||||
|
console.log('[AI PIPELINE] Attempting partial recovery from broken JSON...');
|
||||||
|
const possibleTools = toolMatches
|
||||||
|
.map(match => match.replace(/"/g, ''))
|
||||||
|
.filter(name => name.length > 2 && !['selectedTools', 'selectedConcepts', 'reasoning'].includes(name))
|
||||||
|
.slice(0, 15); // Reasonable limit
|
||||||
|
|
||||||
|
if (possibleTools.length > 0) {
|
||||||
|
console.log(`[AI PIPELINE] Recovered ${possibleTools.length} possible tool names from broken JSON`);
|
||||||
|
return {
|
||||||
|
selectedTools: possibleTools,
|
||||||
|
selectedConcepts: [],
|
||||||
|
reasoning: 'Recovered from truncated response'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return fallback;
|
return fallback;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
|
private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
|
||||||
if (context.seenToolNames.has(tool.name)) {
|
|
||||||
console.log(`[AI PIPELINE] Skipping duplicate tool: ${tool.name}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
context.seenToolNames.add(tool.name);
|
context.seenToolNames.add(tool.name);
|
||||||
if (!context.selectedTools) context.selectedTools = [];
|
if (!context.selectedTools) context.selectedTools = [];
|
||||||
|
|
||||||
@ -266,56 +357,62 @@ class ImprovedMicroTaskAIPipeline {
|
|||||||
let candidateConcepts: any[] = [];
|
let candidateConcepts: any[] = [];
|
||||||
let selectionMethod = 'unknown';
|
let selectionMethod = 'unknown';
|
||||||
|
|
||||||
|
// WAIT for embeddings initialization if embeddings are enabled
|
||||||
|
if (process.env.AI_EMBEDDINGS_ENABLED === 'true') {
|
||||||
|
try {
|
||||||
|
console.log('[AI PIPELINE] Waiting for embeddings initialization...');
|
||||||
|
await embeddingsService.waitForInitialization();
|
||||||
|
console.log('[AI PIPELINE] Embeddings ready, proceeding with similarity search');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[AI PIPELINE] Embeddings initialization failed, falling back to full dataset:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (embeddingsService.isEnabled()) {
|
if (embeddingsService.isEnabled()) {
|
||||||
const embeddingsStart = Date.now();
|
const embeddingsStart = Date.now();
|
||||||
const similarItems = await embeddingsService.findSimilar(
|
const similarItems = await embeddingsService.findSimilar(
|
||||||
userQuery,
|
userQuery,
|
||||||
this.embeddingCandidates,
|
this.embeddingCandidates,
|
||||||
this.similarityThreshold
|
this.similarityThreshold
|
||||||
) as SimilarityResult[]; // Type assertion for similarity property
|
) as SimilarityResult[];
|
||||||
|
|
||||||
console.log(`[IMPROVED PIPELINE] Embeddings found ${similarItems.length} similar items`);
|
console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
|
||||||
|
|
||||||
// FIXED: Create lookup maps for O(1) access while preserving original data
|
|
||||||
const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
|
const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
|
||||||
const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
|
const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
|
||||||
|
|
||||||
// FIXED: Process in similarity order, preserving the ranking
|
|
||||||
const similarTools = similarItems
|
const similarTools = similarItems
|
||||||
.filter((item): item is SimilarityResult => item.type === 'tool')
|
.filter((item): item is SimilarityResult => item.type === 'tool')
|
||||||
.map(item => toolsMap.get(item.name))
|
.map(item => toolsMap.get(item.name))
|
||||||
.filter((tool): tool is any => tool !== undefined); // Proper type guard
|
.filter((tool): tool is any => tool !== undefined);
|
||||||
|
|
||||||
const similarConcepts = similarItems
|
const similarConcepts = similarItems
|
||||||
.filter((item): item is SimilarityResult => item.type === 'concept')
|
.filter((item): item is SimilarityResult => item.type === 'concept')
|
||||||
.map(item => conceptsMap.get(item.name))
|
.map(item => conceptsMap.get(item.name))
|
||||||
.filter((concept): concept is any => concept !== undefined); // Proper type guard
|
.filter((concept): concept is any => concept !== undefined);
|
||||||
|
|
||||||
console.log(`[IMPROVED PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
|
console.log(`[AI PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
|
||||||
|
|
||||||
// Log the first few tools to verify ordering is preserved
|
const totalAvailableTools = toolsData.tools.length;
|
||||||
if (similarTools.length > 0) {
|
const reductionRatio = similarTools.length / totalAvailableTools;
|
||||||
console.log(`[IMPROVED PIPELINE] Top similar tools (in similarity order):`);
|
|
||||||
similarTools.slice(0, 5).forEach((tool, idx) => {
|
|
||||||
const originalSimilarItem = similarItems.find(item => item.name === tool.name);
|
|
||||||
console.log(` ${idx + 1}. ${tool.name} (similarity: ${originalSimilarItem?.similarity?.toFixed(4) || 'N/A'})`);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
if (similarTools.length >= 15) {
|
if (similarTools.length >= this.embeddingsMinTools && reductionRatio <= this.embeddingsMaxReductionRatio) {
|
||||||
candidateTools = similarTools;
|
candidateTools = similarTools;
|
||||||
candidateConcepts = similarConcepts;
|
candidateConcepts = similarConcepts;
|
||||||
selectionMethod = 'embeddings_candidates';
|
selectionMethod = 'embeddings_candidates';
|
||||||
|
|
||||||
console.log(`[IMPROVED PIPELINE] Using embeddings candidates in similarity order: ${candidateTools.length} tools`);
|
console.log(`[AI PIPELINE] Using embeddings filtering: ${totalAvailableTools} → ${similarTools.length} tools (${(reductionRatio * 100).toFixed(1)}% reduction)`);
|
||||||
} else {
|
} else {
|
||||||
console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${similarTools.length} < 15), using full dataset`);
|
if (similarTools.length < this.embeddingsMinTools) {
|
||||||
|
console.log(`[AI PIPELINE] Embeddings found too few tools (${similarTools.length} < ${this.embeddingsMinTools}), using full dataset`);
|
||||||
|
} else {
|
||||||
|
console.log(`[AI PIPELINE] Embeddings didn't filter enough (${(reductionRatio * 100).toFixed(1)}% > ${(this.embeddingsMaxReductionRatio * 100).toFixed(1)}%), using full dataset`);
|
||||||
|
}
|
||||||
candidateTools = toolsData.tools;
|
candidateTools = toolsData.tools;
|
||||||
candidateConcepts = toolsData.concepts;
|
candidateConcepts = toolsData.concepts;
|
||||||
selectionMethod = 'full_dataset';
|
selectionMethod = 'full_dataset';
|
||||||
}
|
}
|
||||||
|
|
||||||
// NEW: Add Audit Entry for Embeddings Search with ordering verification
|
|
||||||
if (this.auditConfig.enabled) {
|
if (this.auditConfig.enabled) {
|
||||||
this.addAuditEntry(null, 'retrieval', 'embeddings-search',
|
this.addAuditEntry(null, 'retrieval', 'embeddings-search',
|
||||||
{ query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates },
|
{ query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates },
|
||||||
@ -323,21 +420,29 @@ class ImprovedMicroTaskAIPipeline {
|
|||||||
candidatesFound: similarItems.length,
|
candidatesFound: similarItems.length,
|
||||||
toolsInOrder: similarTools.slice(0, 3).map((t: any) => t.name),
|
toolsInOrder: similarTools.slice(0, 3).map((t: any) => t.name),
|
||||||
conceptsInOrder: similarConcepts.slice(0, 3).map((c: any) => c.name),
|
conceptsInOrder: similarConcepts.slice(0, 3).map((c: any) => c.name),
|
||||||
orderingPreserved: true
|
reductionRatio: reductionRatio,
|
||||||
|
usingEmbeddings: selectionMethod === 'embeddings_candidates',
|
||||||
|
totalAvailable: totalAvailableTools,
|
||||||
|
filtered: similarTools.length
|
||||||
},
|
},
|
||||||
similarTools.length >= 15 ? 85 : 60,
|
selectionMethod === 'embeddings_candidates' ? 85 : 60,
|
||||||
embeddingsStart,
|
embeddingsStart,
|
||||||
{ selectionMethod, embeddingsEnabled: true, orderingFixed: true }
|
{
|
||||||
|
selectionMethod,
|
||||||
|
embeddingsEnabled: true,
|
||||||
|
reductionAchieved: selectionMethod === 'embeddings_candidates',
|
||||||
|
tokenSavingsExpected: selectionMethod === 'embeddings_candidates'
|
||||||
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
|
console.log(`[AI PIPELINE] Embeddings disabled or not ready, using full dataset`);
|
||||||
candidateTools = toolsData.tools;
|
candidateTools = toolsData.tools;
|
||||||
candidateConcepts = toolsData.concepts;
|
candidateConcepts = toolsData.concepts;
|
||||||
selectionMethod = 'full_dataset';
|
selectionMethod = 'full_dataset';
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[IMPROVED PIPELINE] AI will analyze ${candidateTools.length} candidate tools (ordering preserved: ${selectionMethod === 'embeddings_candidates'})`);
|
console.log(`[AI PIPELINE] AI will analyze ${candidateTools.length} candidate tools (method: ${selectionMethod})`);
|
||||||
const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
|
const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@ -387,15 +492,44 @@ class ImprovedMicroTaskAIPipeline {
|
|||||||
related_software: concept.related_software || []
|
related_software: concept.related_software || []
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Generate the German prompt with tool data
|
let toolsToSend: any[];
|
||||||
|
let conceptsToSend: any[];
|
||||||
|
|
||||||
|
if (selectionMethod === 'embeddings_candidates') {
|
||||||
|
toolsToSend = toolsWithFullData.slice(0, this.embeddingSelectionLimit);
|
||||||
|
conceptsToSend = conceptsWithFullData.slice(0, this.embeddingConceptsLimit);
|
||||||
|
|
||||||
|
console.log(`[AI PIPELINE] Embeddings enabled: sending top ${toolsToSend.length} similarity-ordered tools`);
|
||||||
|
} else {
|
||||||
|
const maxTools = this.noEmbeddingsToolLimit > 0 ?
|
||||||
|
Math.min(this.noEmbeddingsToolLimit, candidateTools.length) :
|
||||||
|
candidateTools.length;
|
||||||
|
|
||||||
|
const maxConcepts = this.noEmbeddingsConceptLimit > 0 ?
|
||||||
|
Math.min(this.noEmbeddingsConceptLimit, candidateConcepts.length) :
|
||||||
|
candidateConcepts.length;
|
||||||
|
|
||||||
|
toolsToSend = toolsWithFullData.slice(0, maxTools);
|
||||||
|
conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
|
||||||
|
|
||||||
|
console.log(`[AI PIPELINE] Embeddings disabled: sending ${toolsToSend.length}/${candidateTools.length} tools (limit: ${this.noEmbeddingsToolLimit || 'none'})`);
|
||||||
|
}
|
||||||
|
|
||||||
const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.maxSelectedItems);
|
const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.maxSelectedItems);
|
||||||
const prompt = `${basePrompt}
|
const prompt = `${basePrompt}
|
||||||
|
|
||||||
VERFÜGBARE TOOLS (mit vollständigen Daten):
|
VERFÜGBARE TOOLS (mit vollständigen Daten):
|
||||||
${JSON.stringify(toolsWithFullData.slice(0, 30), null, 2)}
|
${JSON.stringify(toolsToSend, null, 2)}
|
||||||
|
|
||||||
VERFÜGBARE KONZEPTE (mit vollständigen Daten):
|
VERFÜGBARE KONZEPTE (mit vollständigen Daten):
|
||||||
${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
${JSON.stringify(conceptsToSend, null, 2)}`;
|
||||||
|
|
||||||
|
const estimatedTokens = this.estimateTokens(prompt);
|
||||||
|
console.log(`[AI PIPELINE] Method: ${selectionMethod}, Tools: ${toolsToSend.length}, Estimated tokens: ~${estimatedTokens}`);
|
||||||
|
|
||||||
|
if (estimatedTokens > 35000) {
|
||||||
|
console.warn(`[AI PIPELINE] WARNING: Prompt tokens (${estimatedTokens}) may exceed model limits`);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await this.callAI(prompt, 2500);
|
const response = await this.callAI(prompt, 2500);
|
||||||
@ -403,16 +537,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
const result = this.safeParseJSON(response, null);
|
const result = this.safeParseJSON(response, null);
|
||||||
|
|
||||||
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
|
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
|
||||||
console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
|
console.error('[AI PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
|
||||||
|
|
||||||
// NEW: Add Audit Entry for Failed Selection
|
|
||||||
if (this.auditConfig.enabled) {
|
if (this.auditConfig.enabled) {
|
||||||
this.addAuditEntry(null, 'selection', 'ai-tool-selection-failed',
|
this.addAuditEntry(null, 'selection', 'ai-tool-selection-failed',
|
||||||
{ candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
|
{ candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
|
||||||
{ error: 'Invalid JSON structure', response: response.slice(0, 200) },
|
{ error: 'Invalid JSON structure', response: response.slice(0, 200) },
|
||||||
10, // Very low confidence
|
10,
|
||||||
selectionStart,
|
selectionStart,
|
||||||
{ aiModel: this.config.model, selectionMethod }
|
{ aiModel: this.config.model, selectionMethod, tokensSent: estimatedTokens, toolsSent: toolsToSend.length }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -421,19 +554,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
|
|
||||||
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
|
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
|
||||||
if (totalSelected === 0) {
|
if (totalSelected === 0) {
|
||||||
console.error('[IMPROVED PIPELINE] AI selection returned no tools');
|
console.error('[AI PIPELINE] AI selection returned no tools');
|
||||||
throw new Error('AI selection returned empty selection');
|
throw new Error('AI selection returned empty selection');
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[IMPROVED PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts`);
|
console.log(`[AI PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts from ${toolsToSend.length} candidates`);
|
||||||
console.log(`[IMPROVED PIPELINE] AI reasoning: ${result.reasoning}`);
|
|
||||||
|
|
||||||
const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
|
const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
|
||||||
const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));
|
const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));
|
||||||
|
|
||||||
console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
|
|
||||||
|
|
||||||
// NEW: Add Audit Entry for Successful Selection
|
|
||||||
if (this.auditConfig.enabled) {
|
if (this.auditConfig.enabled) {
|
||||||
const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
|
const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
|
||||||
|
|
||||||
@ -443,11 +572,12 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
selectedToolCount: result.selectedTools.length,
|
selectedToolCount: result.selectedTools.length,
|
||||||
selectedConceptCount: result.selectedConcepts.length,
|
selectedConceptCount: result.selectedConcepts.length,
|
||||||
reasoning: result.reasoning?.slice(0, 200) + '...',
|
reasoning: result.reasoning?.slice(0, 200) + '...',
|
||||||
finalToolNames: selectedTools.map(t => t.name)
|
finalToolNames: selectedTools.map(t => t.name),
|
||||||
|
selectionEfficiency: `${toolsToSend.length} → ${result.selectedTools.length}`
|
||||||
},
|
},
|
||||||
confidence,
|
confidence,
|
||||||
selectionStart,
|
selectionStart,
|
||||||
{ aiModel: this.config.model, selectionMethod, promptTokens: this.estimateTokens(prompt) }
|
{ aiModel: this.config.model, selectionMethod, promptTokens: estimatedTokens, toolsSent: toolsToSend.length }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -457,74 +587,26 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
};
|
};
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[IMPROVED PIPELINE] AI selection failed:', error);
|
console.error('[AI PIPELINE] AI selection failed:', error);
|
||||||
|
|
||||||
// NEW: Add Audit Entry for Selection Error
|
|
||||||
if (this.auditConfig.enabled) {
|
if (this.auditConfig.enabled) {
|
||||||
this.addAuditEntry(null, 'selection', 'ai-tool-selection-error',
|
this.addAuditEntry(null, 'selection', 'ai-tool-selection-error',
|
||||||
{ candidateCount: candidateTools.length, mode },
|
{ candidateCount: candidateTools.length, mode },
|
||||||
{ error: error.message },
|
{ error: error.message },
|
||||||
5, // Very low confidence
|
5,
|
||||||
selectionStart,
|
selectionStart,
|
||||||
{ aiModel: this.config.model, selectionMethod }
|
{ aiModel: this.config.model, selectionMethod, tokensSent: estimatedTokens }
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
throw error;
|
||||||
console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
|
|
||||||
return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
|
|
||||||
const emergencyStart = Date.now();
|
|
||||||
|
|
||||||
const queryLower = userQuery.toLowerCase();
|
|
||||||
const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
|
|
||||||
|
|
||||||
const scoredTools = candidateTools.map(tool => {
|
|
||||||
const toolText = (
|
|
||||||
tool.name + ' ' +
|
|
||||||
tool.description + ' ' +
|
|
||||||
(tool.tags || []).join(' ') + ' ' +
|
|
||||||
(tool.platforms || []).join(' ') + ' ' +
|
|
||||||
(tool.domains || []).join(' ')
|
|
||||||
).toLowerCase();
|
|
||||||
|
|
||||||
const score = keywords.reduce((acc, keyword) => {
|
|
||||||
return acc + (toolText.includes(keyword) ? 1 : 0);
|
|
||||||
}, 0);
|
|
||||||
|
|
||||||
return { tool, score };
|
|
||||||
}).filter(item => item.score > 0)
|
|
||||||
.sort((a, b) => b.score - a.score);
|
|
||||||
|
|
||||||
const maxTools = mode === 'workflow' ? 20 : 8;
|
|
||||||
const selectedTools = scoredTools.slice(0, maxTools).map(item => item.tool);
|
|
||||||
|
|
||||||
console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
|
|
||||||
|
|
||||||
// NEW: Add Audit Entry for Emergency Selection
|
|
||||||
if (this.auditConfig.enabled) {
|
|
||||||
this.addAuditEntry(null, 'selection', 'emergency-keyword-selection',
|
|
||||||
{ keywords: keywords.slice(0, 10), candidateCount: candidateTools.length },
|
|
||||||
{ selectedCount: selectedTools.length, topScores: scoredTools.slice(0, 5).map(s => ({ name: s.tool.name, score: s.score })) },
|
|
||||||
40, // Moderate confidence for emergency selection
|
|
||||||
emergencyStart,
|
|
||||||
{ selectionMethod: 'emergency_keyword' }
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
return {
|
|
||||||
selectedTools,
|
|
||||||
selectedConcepts: candidateConcepts.slice(0, 3)
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
private async delay(ms: number): Promise<void> {
|
private async delay(ms: number): Promise<void> {
|
||||||
return new Promise(resolve => setTimeout(resolve, ms));
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
}
|
}
|
||||||
|
|
||||||
private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 300): Promise<MicroTaskResult> {
|
private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 500): Promise<MicroTaskResult> {
|
||||||
const startTime = Date.now();
|
const startTime = Date.now();
|
||||||
|
|
||||||
let contextPrompt = prompt;
|
let contextPrompt = prompt;
|
||||||
@ -549,11 +631,10 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
success: true
|
success: true
|
||||||
};
|
};
|
||||||
|
|
||||||
// NEW: Add Audit Entry for Successful Micro-Task
|
|
||||||
this.addAuditEntry(context, 'micro-task', 'ai-analysis',
|
this.addAuditEntry(context, 'micro-task', 'ai-analysis',
|
||||||
{ promptLength: contextPrompt.length, maxTokens },
|
{ promptLength: contextPrompt.length, maxTokens },
|
||||||
{ responseLength: response.length, contentPreview: response.slice(0, 100) },
|
{ responseLength: response.length, contentPreview: response.slice(0, 100) },
|
||||||
response.length > 50 ? 80 : 60, // Confidence based on response quality
|
response.length > 50 ? 80 : 60,
|
||||||
startTime,
|
startTime,
|
||||||
{ aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
|
{ aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
|
||||||
);
|
);
|
||||||
@ -569,11 +650,10 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
error: error.message
|
error: error.message
|
||||||
};
|
};
|
||||||
|
|
||||||
// NEW: Add Audit Entry for Failed Micro-Task
|
|
||||||
this.addAuditEntry(context, 'micro-task', 'ai-analysis-failed',
|
this.addAuditEntry(context, 'micro-task', 'ai-analysis-failed',
|
||||||
{ promptLength: contextPrompt.length, maxTokens },
|
{ promptLength: contextPrompt.length, maxTokens },
|
||||||
{ error: error.message },
|
{ error: error.message },
|
||||||
5, // Very low confidence
|
5,
|
||||||
startTime,
|
startTime,
|
||||||
{ aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
|
{ aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
|
||||||
);
|
);
|
||||||
@ -586,7 +666,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
const isWorkflow = context.mode === 'workflow';
|
const isWorkflow = context.mode === 'workflow';
|
||||||
const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
|
const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
|
||||||
|
|
||||||
const result = await this.callMicroTaskAI(prompt, context, 220);
|
const result = await this.callMicroTaskAI(prompt, context, 400);
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
if (isWorkflow) {
|
if (isWorkflow) {
|
||||||
@ -605,7 +685,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
const isWorkflow = context.mode === 'workflow';
|
const isWorkflow = context.mode === 'workflow';
|
||||||
const prompt = getPrompt('investigationApproach', isWorkflow, context.userQuery);
|
const prompt = getPrompt('investigationApproach', isWorkflow, context.userQuery);
|
||||||
|
|
||||||
const result = await this.callMicroTaskAI(prompt, context, 220);
|
const result = await this.callMicroTaskAI(prompt, context, 400);
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
context.investigationApproach = result.content;
|
context.investigationApproach = result.content;
|
||||||
@ -619,7 +699,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
const isWorkflow = context.mode === 'workflow';
|
const isWorkflow = context.mode === 'workflow';
|
||||||
const prompt = getPrompt('criticalConsiderations', isWorkflow, context.userQuery);
|
const prompt = getPrompt('criticalConsiderations', isWorkflow, context.userQuery);
|
||||||
|
|
||||||
const result = await this.callMicroTaskAI(prompt, context, 180);
|
const result = await this.callMicroTaskAI(prompt, context, 350);
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
context.criticalConsiderations = result.content;
|
context.criticalConsiderations = result.content;
|
||||||
@ -645,7 +725,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
|
|
||||||
const prompt = getPrompt('phaseToolSelection', context.userQuery, phase, phaseTools);
|
const prompt = getPrompt('phaseToolSelection', context.userQuery, phase, phaseTools);
|
||||||
|
|
||||||
const result = await this.callMicroTaskAI(prompt, context, 450);
|
const result = await this.callMicroTaskAI(prompt, context, 800);
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
const selections = this.safeParseJSON(result.content, []);
|
const selections = this.safeParseJSON(result.content, []);
|
||||||
@ -662,7 +742,6 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// NEW: Add audit entry for tool selection
|
|
||||||
this.addAuditEntry(context, 'micro-task', 'phase-tool-selection',
|
this.addAuditEntry(context, 'micro-task', 'phase-tool-selection',
|
||||||
{ phase: phase.id, availableTools: phaseTools.length },
|
{ phase: phase.id, availableTools: phaseTools.length },
|
||||||
{ validSelections: validSelections.length, selectedTools: validSelections.map(s => s.toolName) },
|
{ validSelections: validSelections.length, selectedTools: validSelections.map(s => s.toolName) },
|
||||||
@ -679,7 +758,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
|
private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
|
||||||
const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank);
|
const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank);
|
||||||
|
|
||||||
const result = await this.callMicroTaskAI(prompt, context, 650);
|
const result = await this.callMicroTaskAI(prompt, context, 1200);
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
const evaluation = this.safeParseJSON(result.content, {
|
const evaluation = this.safeParseJSON(result.content, {
|
||||||
@ -699,7 +778,6 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
}
|
}
|
||||||
}, 'evaluation', evaluation.suitability_score);
|
}, 'evaluation', evaluation.suitability_score);
|
||||||
|
|
||||||
// NEW: Add audit entry for tool evaluation
|
|
||||||
this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
|
this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
|
||||||
{ toolName: tool.name, rank },
|
{ toolName: tool.name, rank },
|
||||||
{ suitabilityScore: evaluation.suitability_score, hasExplanation: !!evaluation.detailed_explanation },
|
{ suitabilityScore: evaluation.suitability_score, hasExplanation: !!evaluation.detailed_explanation },
|
||||||
@ -727,7 +805,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
|
const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
|
||||||
const prompt = getPrompt('backgroundKnowledgeSelection', context.userQuery, context.mode, selectedToolNames, availableConcepts);
|
const prompt = getPrompt('backgroundKnowledgeSelection', context.userQuery, context.mode, selectedToolNames, availableConcepts);
|
||||||
|
|
||||||
const result = await this.callMicroTaskAI(prompt, context, 400);
|
const result = await this.callMicroTaskAI(prompt, context, 700);
|
||||||
|
|
||||||
if (result.success) {
|
if (result.success) {
|
||||||
const selections = this.safeParseJSON(result.content, []);
|
const selections = this.safeParseJSON(result.content, []);
|
||||||
@ -740,7 +818,6 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
relevance: sel.relevance
|
relevance: sel.relevance
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// NEW: Add audit entry for background knowledge selection
|
|
||||||
this.addAuditEntry(context, 'micro-task', 'background-knowledge-selection',
|
this.addAuditEntry(context, 'micro-task', 'background-knowledge-selection',
|
||||||
{ availableConcepts: availableConcepts.length },
|
{ availableConcepts: availableConcepts.length },
|
||||||
{ selectedConcepts: context.backgroundKnowledge?.length || 0 },
|
{ selectedConcepts: context.backgroundKnowledge?.length || 0 },
|
||||||
@ -758,21 +835,19 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
|
const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
|
||||||
const prompt = getPrompt('finalRecommendations', context.mode === 'workflow', context.userQuery, selectedToolNames);
|
const prompt = getPrompt('finalRecommendations', context.mode === 'workflow', context.userQuery, selectedToolNames);
|
||||||
|
|
||||||
const result = await this.callMicroTaskAI(prompt, context, 180);
|
const result = await this.callMicroTaskAI(prompt, context, 350);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
|
private async callAI(prompt: string, maxTokens: number = 1500): Promise<string> {
|
||||||
const endpoint = this.config.endpoint;
|
const endpoint = this.config.endpoint;
|
||||||
const apiKey = this.config.apiKey;
|
const apiKey = this.config.apiKey;
|
||||||
const model = this.config.model;
|
const model = this.config.model;
|
||||||
|
|
||||||
// Simple headers - add auth only if API key exists
|
|
||||||
let headers: Record<string, string> = {
|
let headers: Record<string, string> = {
|
||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
};
|
};
|
||||||
|
|
||||||
// Add authentication if API key is provided
|
|
||||||
if (apiKey) {
|
if (apiKey) {
|
||||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
headers['Authorization'] = `Bearer ${apiKey}`;
|
||||||
console.log('[AI PIPELINE] Using API key authentication');
|
console.log('[AI PIPELINE] Using API key authentication');
|
||||||
@ -780,7 +855,6 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
console.log('[AI PIPELINE] No API key - making request without authentication');
|
console.log('[AI PIPELINE] No API key - making request without authentication');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Simple request body
|
|
||||||
const requestBody = {
|
const requestBody = {
|
||||||
model,
|
model,
|
||||||
messages: [{ role: 'user', content: prompt }],
|
messages: [{ role: 'user', content: prompt }],
|
||||||
@ -789,7 +863,6 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// FIXED: Use direct fetch since entire pipeline is already queued at query.ts level
|
|
||||||
const response = await fetch(`${endpoint}/v1/chat/completions`, {
|
const response = await fetch(`${endpoint}/v1/chat/completions`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers,
|
headers,
|
||||||
@ -823,13 +896,11 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
let completedTasks = 0;
|
let completedTasks = 0;
|
||||||
let failedTasks = 0;
|
let failedTasks = 0;
|
||||||
|
|
||||||
// NEW: Clear any previous temporary audit entries
|
|
||||||
this.tempAuditEntries = [];
|
this.tempAuditEntries = [];
|
||||||
|
|
||||||
console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
|
console.log(`[AI PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Stage 1: Get intelligent candidates (embeddings + AI selection)
|
|
||||||
const toolsData = await getCompressedToolsDataForAI();
|
const toolsData = await getCompressedToolsDataForAI();
|
||||||
const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
|
const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
|
||||||
|
|
||||||
@ -841,20 +912,17 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
maxContextLength: this.maxContextTokens,
|
maxContextLength: this.maxContextTokens,
|
||||||
currentContextLength: 0,
|
currentContextLength: 0,
|
||||||
seenToolNames: new Set<string>(),
|
seenToolNames: new Set<string>(),
|
||||||
// NEW: Initialize audit trail
|
|
||||||
auditTrail: []
|
auditTrail: []
|
||||||
};
|
};
|
||||||
|
|
||||||
// NEW: Merge any temporary audit entries from pre-context operations
|
|
||||||
this.mergeTemporaryAuditEntries(context);
|
this.mergeTemporaryAuditEntries(context);
|
||||||
|
|
||||||
console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
|
console.log(`[AI PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
|
||||||
|
|
||||||
// NEW: Add initial audit entry
|
|
||||||
this.addAuditEntry(context, 'initialization', 'pipeline-start',
|
this.addAuditEntry(context, 'initialization', 'pipeline-start',
|
||||||
{ userQuery, mode, toolsDataLoaded: !!toolsData },
|
{ userQuery, mode, toolsDataLoaded: !!toolsData },
|
||||||
{ candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length },
|
{ candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length },
|
||||||
90, // High confidence for initialization
|
90,
|
||||||
startTime,
|
startTime,
|
||||||
{ auditEnabled: this.auditConfig.enabled }
|
{ auditEnabled: this.auditConfig.enabled }
|
||||||
);
|
);
|
||||||
@ -893,19 +961,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Task 5: Background Knowledge Selection
|
|
||||||
const knowledgeResult = await this.selectBackgroundKnowledge(context);
|
const knowledgeResult = await this.selectBackgroundKnowledge(context);
|
||||||
if (knowledgeResult.success) completedTasks++; else failedTasks++;
|
if (knowledgeResult.success) completedTasks++; else failedTasks++;
|
||||||
await this.delay(this.microTaskDelay);
|
await this.delay(this.microTaskDelay);
|
||||||
|
|
||||||
// Task 6: Final Recommendations
|
|
||||||
const finalResult = await this.generateFinalRecommendations(context);
|
const finalResult = await this.generateFinalRecommendations(context);
|
||||||
if (finalResult.success) completedTasks++; else failedTasks++;
|
if (finalResult.success) completedTasks++; else failedTasks++;
|
||||||
|
|
||||||
// Build final recommendation
|
|
||||||
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
|
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
|
||||||
|
|
||||||
// NEW: Add final audit entry
|
|
||||||
this.addAuditEntry(context, 'completion', 'pipeline-end',
|
this.addAuditEntry(context, 'completion', 'pipeline-end',
|
||||||
{ completedTasks, failedTasks },
|
{ completedTasks, failedTasks },
|
||||||
{ finalRecommendation: !!recommendation, auditEntriesGenerated: context.auditTrail.length },
|
{ finalRecommendation: !!recommendation, auditEntriesGenerated: context.auditTrail.length },
|
||||||
@ -925,23 +989,21 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
|
|||||||
contextContinuityUsed: true
|
contextContinuityUsed: true
|
||||||
};
|
};
|
||||||
|
|
||||||
console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
|
console.log(`[AI PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
|
||||||
console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
|
console.log(`[AI PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
|
||||||
console.log(`[IMPROVED PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
|
console.log(`[AI PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
|
||||||
|
|
||||||
return {
|
return {
|
||||||
recommendation: {
|
recommendation: {
|
||||||
...recommendation,
|
...recommendation,
|
||||||
// NEW: Include audit trail in response
|
|
||||||
auditTrail: this.auditConfig.enabled ? context.auditTrail : undefined
|
auditTrail: this.auditConfig.enabled ? context.auditTrail : undefined
|
||||||
},
|
},
|
||||||
processingStats
|
processingStats
|
||||||
};
|
};
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[IMPROVED PIPELINE] Processing failed:', error);
|
console.error('[AI PIPELINE] Processing failed:', error);
|
||||||
|
|
||||||
// NEW: Ensure temp audit entries are cleared even on error
|
|
||||||
this.tempAuditEntries = [];
|
this.tempAuditEntries = [];
|
||||||
|
|
||||||
throw error;
|
throw error;
|
||||||
|
@ -77,33 +77,8 @@ interface EnhancedCompressedToolsData {
|
|||||||
domains: any[];
|
domains: any[];
|
||||||
phases: any[];
|
phases: any[];
|
||||||
'domain-agnostic-software': any[];
|
'domain-agnostic-software': any[];
|
||||||
scenarios?: any[]; // Optional for AI processing
|
scenarios?: any[];
|
||||||
skill_levels: any;
|
skill_levels: any;
|
||||||
// Enhanced context for micro-tasks
|
|
||||||
domain_relationships: DomainRelationship[];
|
|
||||||
phase_dependencies: PhaseDependency[];
|
|
||||||
tool_compatibility_matrix: CompatibilityMatrix[];
|
|
||||||
}
|
|
||||||
|
|
||||||
interface DomainRelationship {
|
|
||||||
domain_id: string;
|
|
||||||
tool_count: number;
|
|
||||||
common_tags: string[];
|
|
||||||
skill_distribution: Record<string, number>;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface PhaseDependency {
|
|
||||||
phase_id: string;
|
|
||||||
order: number;
|
|
||||||
depends_on: string | null;
|
|
||||||
enables: string | null;
|
|
||||||
is_parallel_capable: boolean;
|
|
||||||
typical_duration: string;
|
|
||||||
}
|
|
||||||
|
|
||||||
interface CompatibilityMatrix {
|
|
||||||
type: string;
|
|
||||||
groups: Record<string, string[]>;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let cachedData: ToolsData | null = null;
|
let cachedData: ToolsData | null = null;
|
||||||
@ -146,104 +121,6 @@ function generateDataVersion(data: any): string {
|
|||||||
return Math.abs(hash).toString(36);
|
return Math.abs(hash).toString(36);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enhanced: Generate domain relationships for better AI understanding
|
|
||||||
function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
|
|
||||||
const relationships: DomainRelationship[] = [];
|
|
||||||
|
|
||||||
for (const domain of domains) {
|
|
||||||
const domainTools = tools.filter(tool =>
|
|
||||||
tool.domains && tool.domains.includes(domain.id)
|
|
||||||
);
|
|
||||||
|
|
||||||
const commonTags = domainTools
|
|
||||||
.flatMap(tool => tool.tags || [])
|
|
||||||
.reduce((acc: any, tag: string) => {
|
|
||||||
acc[tag] = (acc[tag] || 0) + 1;
|
|
||||||
return acc;
|
|
||||||
}, {});
|
|
||||||
|
|
||||||
const topTags = Object.entries(commonTags)
|
|
||||||
.sort(([,a], [,b]) => (b as number) - (a as number))
|
|
||||||
.slice(0, 5)
|
|
||||||
.map(([tag]) => tag);
|
|
||||||
|
|
||||||
relationships.push({
|
|
||||||
domain_id: domain.id,
|
|
||||||
tool_count: domainTools.length,
|
|
||||||
common_tags: topTags,
|
|
||||||
skill_distribution: domainTools.reduce((acc: any, tool: any) => {
|
|
||||||
acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
|
|
||||||
return acc;
|
|
||||||
}, {})
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return relationships;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enhanced: Generate phase dependencies
|
|
||||||
function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
|
|
||||||
const dependencies: PhaseDependency[] = [];
|
|
||||||
|
|
||||||
for (let i = 0; i < phases.length; i++) {
|
|
||||||
const phase = phases[i];
|
|
||||||
const nextPhase = phases[i + 1];
|
|
||||||
const prevPhase = phases[i - 1];
|
|
||||||
|
|
||||||
dependencies.push({
|
|
||||||
phase_id: phase.id,
|
|
||||||
order: i + 1,
|
|
||||||
depends_on: prevPhase?.id || null,
|
|
||||||
enables: nextPhase?.id || null,
|
|
||||||
is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
|
|
||||||
typical_duration: phase.id === 'data-collection' ? 'hours-days' :
|
|
||||||
phase.id === 'examination' ? 'hours-weeks' :
|
|
||||||
phase.id === 'analysis' ? 'days-weeks' :
|
|
||||||
'hours-days'
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
return dependencies;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enhanced: Generate tool compatibility matrix
|
|
||||||
function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
|
|
||||||
const matrix: CompatibilityMatrix[] = [];
|
|
||||||
|
|
||||||
// Group tools by common characteristics
|
|
||||||
const platformGroups = tools.reduce((acc: any, tool: any) => {
|
|
||||||
if (tool.platforms) {
|
|
||||||
tool.platforms.forEach((platform: string) => {
|
|
||||||
if (!acc[platform]) acc[platform] = [];
|
|
||||||
acc[platform].push(tool.name);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return acc;
|
|
||||||
}, {});
|
|
||||||
|
|
||||||
const phaseGroups = tools.reduce((acc: any, tool: any) => {
|
|
||||||
if (tool.phases) {
|
|
||||||
tool.phases.forEach((phase: string) => {
|
|
||||||
if (!acc[phase]) acc[phase] = [];
|
|
||||||
acc[phase].push(tool.name);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return acc;
|
|
||||||
}, {});
|
|
||||||
|
|
||||||
matrix.push({
|
|
||||||
type: 'platform_compatibility',
|
|
||||||
groups: platformGroups
|
|
||||||
});
|
|
||||||
|
|
||||||
matrix.push({
|
|
||||||
type: 'phase_synergy',
|
|
||||||
groups: phaseGroups
|
|
||||||
});
|
|
||||||
|
|
||||||
return matrix;
|
|
||||||
}
|
|
||||||
|
|
||||||
async function loadRawData(): Promise<ToolsData> {
|
async function loadRawData(): Promise<ToolsData> {
|
||||||
if (!cachedData) {
|
if (!cachedData) {
|
||||||
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
||||||
@ -253,7 +130,6 @@ async function loadRawData(): Promise<ToolsData> {
|
|||||||
try {
|
try {
|
||||||
cachedData = ToolsDataSchema.parse(rawData);
|
cachedData = ToolsDataSchema.parse(rawData);
|
||||||
|
|
||||||
// Enhanced: Add default skill level descriptions if not provided
|
|
||||||
if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
|
if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
|
||||||
cachedData.skill_levels = {
|
cachedData.skill_levels = {
|
||||||
novice: "Minimal technical background required, guided interfaces",
|
novice: "Minimal technical background required, guided interfaces",
|
||||||
@ -301,21 +177,18 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
|
|||||||
if (!cachedCompressedData) {
|
if (!cachedCompressedData) {
|
||||||
const data = await getToolsData();
|
const data = await getToolsData();
|
||||||
|
|
||||||
// Enhanced: More detailed tool information for micro-tasks
|
|
||||||
const compressedTools = data.tools
|
const compressedTools = data.tools
|
||||||
.filter(tool => tool.type !== 'concept')
|
.filter(tool => tool.type !== 'concept')
|
||||||
.map(tool => {
|
.map(tool => {
|
||||||
const { projectUrl, statusUrl, ...compressedTool } = tool;
|
const { projectUrl, statusUrl, ...compressedTool } = tool;
|
||||||
return {
|
return {
|
||||||
...compressedTool,
|
...compressedTool,
|
||||||
// Enhanced: Add computed fields for AI
|
|
||||||
is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
|
is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
|
||||||
is_open_source: tool.license && tool.license !== 'Proprietary',
|
is_open_source: tool.license && tool.license !== 'Proprietary',
|
||||||
complexity_score: tool.skillLevel === 'expert' ? 5 :
|
complexity_score: tool.skillLevel === 'expert' ? 5 :
|
||||||
tool.skillLevel === 'advanced' ? 4 :
|
tool.skillLevel === 'advanced' ? 4 :
|
||||||
tool.skillLevel === 'intermediate' ? 3 :
|
tool.skillLevel === 'intermediate' ? 3 :
|
||||||
tool.skillLevel === 'beginner' ? 2 : 1,
|
tool.skillLevel === 'beginner' ? 2 : 1,
|
||||||
// Enhanced: Phase-specific suitability hints
|
|
||||||
phase_suitability: tool.phases?.map(phase => ({
|
phase_suitability: tool.phases?.map(phase => ({
|
||||||
phase,
|
phase,
|
||||||
primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
|
primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
|
||||||
@ -329,7 +202,6 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
|
|||||||
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
|
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
|
||||||
return {
|
return {
|
||||||
...compressedConcept,
|
...compressedConcept,
|
||||||
// Enhanced: Learning difficulty indicator
|
|
||||||
learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
|
learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
|
||||||
concept.skillLevel === 'advanced' ? 'high' :
|
concept.skillLevel === 'advanced' ? 'high' :
|
||||||
concept.skillLevel === 'intermediate' ? 'medium' :
|
concept.skillLevel === 'intermediate' ? 'medium' :
|
||||||
@ -337,27 +209,16 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
|
|||||||
};
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
// Enhanced: Add rich context data
|
|
||||||
const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
|
|
||||||
const phaseDependencies = generatePhaseDependencies(data.phases);
|
|
||||||
const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
|
|
||||||
|
|
||||||
cachedCompressedData = {
|
cachedCompressedData = {
|
||||||
tools: compressedTools,
|
tools: compressedTools,
|
||||||
concepts: concepts,
|
concepts: concepts,
|
||||||
domains: data.domains,
|
domains: data.domains,
|
||||||
phases: data.phases,
|
phases: data.phases,
|
||||||
'domain-agnostic-software': data['domain-agnostic-software'],
|
'domain-agnostic-software': data['domain-agnostic-software'],
|
||||||
scenarios: data.scenarios, // Include scenarios for context
|
scenarios: data.scenarios,
|
||||||
skill_levels: data.skill_levels || {},
|
skill_levels: data.skill_levels || {},
|
||||||
// Enhanced context for micro-tasks
|
|
||||||
domain_relationships: domainRelationships,
|
|
||||||
phase_dependencies: phaseDependencies,
|
|
||||||
tool_compatibility_matrix: toolCompatibilityMatrix
|
|
||||||
};
|
};
|
||||||
|
|
||||||
console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
|
|
||||||
console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return cachedCompressedData;
|
return cachedCompressedData;
|
||||||
|
@ -31,6 +31,7 @@ interface SimilarityResult extends EmbeddingData {
|
|||||||
class EmbeddingsService {
|
class EmbeddingsService {
|
||||||
private embeddings: EmbeddingData[] = [];
|
private embeddings: EmbeddingData[] = [];
|
||||||
private isInitialized = false;
|
private isInitialized = false;
|
||||||
|
private initializationPromise: Promise<void> | null = null; // ADD THIS LINE
|
||||||
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
|
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
|
||||||
private readonly batchSize: number;
|
private readonly batchSize: number;
|
||||||
private readonly batchDelay: number;
|
private readonly batchDelay: number;
|
||||||
@ -42,7 +43,25 @@ class EmbeddingsService {
|
|||||||
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
|
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// REPLACE the existing initialize method with this:
|
||||||
async initialize(): Promise<void> {
|
async initialize(): Promise<void> {
|
||||||
|
// If initialization is already in progress, wait for it
|
||||||
|
if (this.initializationPromise) {
|
||||||
|
return this.initializationPromise;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If already initialized, return immediately
|
||||||
|
if (this.isInitialized) {
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start initialization and store the promise
|
||||||
|
this.initializationPromise = this.performInitialization();
|
||||||
|
return this.initializationPromise;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ADD THIS NEW METHOD:
|
||||||
|
private async performInitialization(): Promise<void> {
|
||||||
if (!this.enabled) {
|
if (!this.enabled) {
|
||||||
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
|
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
|
||||||
return;
|
return;
|
||||||
@ -74,9 +93,29 @@ class EmbeddingsService {
|
|||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('[EMBEDDINGS] Failed to initialize:', error);
|
console.error('[EMBEDDINGS] Failed to initialize:', error);
|
||||||
this.isInitialized = false;
|
this.isInitialized = false;
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
this.initializationPromise = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async waitForInitialization(): Promise<void> {
|
||||||
|
if (!this.enabled) {
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.isInitialized) {
|
||||||
|
return Promise.resolve();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.initializationPromise) {
|
||||||
|
await this.initializationPromise;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.initialize();
|
||||||
|
}
|
||||||
|
|
||||||
private hashData(data: any): string {
|
private hashData(data: any): string {
|
||||||
return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
|
return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
|
||||||
}
|
}
|
||||||
@ -127,7 +166,6 @@ class EmbeddingsService {
|
|||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
};
|
};
|
||||||
|
|
||||||
// API key is optional for Ollama but required for Mistral/OpenAI
|
|
||||||
if (apiKey) {
|
if (apiKey) {
|
||||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
headers['Authorization'] = `Bearer ${apiKey}`;
|
||||||
}
|
}
|
||||||
@ -148,12 +186,10 @@ class EmbeddingsService {
|
|||||||
|
|
||||||
const data = await response.json();
|
const data = await response.json();
|
||||||
|
|
||||||
// Detect Ollama format
|
|
||||||
if (Array.isArray(data.embeddings)) {
|
if (Array.isArray(data.embeddings)) {
|
||||||
return data.embeddings;
|
return data.embeddings;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detect OpenAI/Mistral format
|
|
||||||
if (Array.isArray(data.data)) {
|
if (Array.isArray(data.data)) {
|
||||||
return data.data.map((item: any) => item.embedding);
|
return data.data.map((item: any) => item.embedding);
|
||||||
}
|
}
|
||||||
@ -170,7 +206,6 @@ class EmbeddingsService {
|
|||||||
const contents = allItems.map(item => this.createContentString(item));
|
const contents = allItems.map(item => this.createContentString(item));
|
||||||
this.embeddings = [];
|
this.embeddings = [];
|
||||||
|
|
||||||
// Process in batches to respect rate limits
|
|
||||||
for (let i = 0; i < contents.length; i += this.batchSize) {
|
for (let i = 0; i < contents.length; i += this.batchSize) {
|
||||||
const batch = contents.slice(i, i + this.batchSize);
|
const batch = contents.slice(i, i + this.batchSize);
|
||||||
const batchItems = allItems.slice(i, i + this.batchSize);
|
const batchItems = allItems.slice(i, i + this.batchSize);
|
||||||
@ -198,7 +233,6 @@ class EmbeddingsService {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// Rate limiting delay between batches
|
|
||||||
if (i + this.batchSize < contents.length) {
|
if (i + this.batchSize < contents.length) {
|
||||||
await new Promise(resolve => setTimeout(resolve, this.batchDelay));
|
await new Promise(resolve => setTimeout(resolve, this.batchDelay));
|
||||||
}
|
}
|
||||||
@ -213,7 +247,6 @@ class EmbeddingsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public async embedText(text: string): Promise<number[]> {
|
public async embedText(text: string): Promise<number[]> {
|
||||||
// Re‑use the private batch helper to avoid auth duplication
|
|
||||||
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
|
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
|
||||||
return embedding;
|
return embedding;
|
||||||
}
|
}
|
||||||
@ -239,25 +272,21 @@ class EmbeddingsService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Generate embedding for query
|
|
||||||
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
|
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
|
||||||
const queryEmbedding = queryEmbeddings[0];
|
const queryEmbedding = queryEmbeddings[0];
|
||||||
|
|
||||||
console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
|
console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
|
||||||
|
|
||||||
// Calculate similarities - properly typed
|
|
||||||
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
|
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
|
||||||
...item,
|
...item,
|
||||||
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
||||||
}));
|
}));
|
||||||
|
|
||||||
// Filter by threshold and sort by similarity (descending - highest first)
|
|
||||||
const results = similarities
|
const results = similarities
|
||||||
.filter(item => item.similarity >= threshold)
|
.filter(item => item.similarity >= threshold)
|
||||||
.sort((a, b) => b.similarity - a.similarity) // CRITICAL: Ensure descending order
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
.slice(0, maxResults);
|
.slice(0, maxResults);
|
||||||
|
|
||||||
// ENHANCED: Verify ordering is correct
|
|
||||||
const orderingValid = results.every((item, index) => {
|
const orderingValid = results.every((item, index) => {
|
||||||
if (index === 0) return true;
|
if (index === 0) return true;
|
||||||
return item.similarity <= results[index - 1].similarity;
|
return item.similarity <= results[index - 1].similarity;
|
||||||
@ -270,15 +299,13 @@ class EmbeddingsService {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// ENHANCED: Log top results for debugging
|
|
||||||
console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
|
console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
|
||||||
if (results.length > 0) {
|
if (results.length > 0) {
|
||||||
console.log('[EMBEDDINGS] Top 5 similarity matches:');
|
console.log('[EMBEDDINGS] Top 10 similarity matches:');
|
||||||
results.slice(0, 5).forEach((item, idx) => {
|
results.slice(0, 10).forEach((item, idx) => {
|
||||||
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
|
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Verify first result is indeed the highest
|
|
||||||
const topSimilarity = results[0].similarity;
|
const topSimilarity = results[0].similarity;
|
||||||
const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
|
const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
|
||||||
if (hasHigherSimilarity) {
|
if (hasHigherSimilarity) {
|
||||||
|
@ -157,15 +157,6 @@ class RateLimitedQueue {
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
setDelay(ms: number): void {
|
|
||||||
if (!Number.isFinite(ms) || ms < 0) return;
|
|
||||||
this.delayMs = ms;
|
|
||||||
}
|
|
||||||
|
|
||||||
getDelay(): number {
|
|
||||||
return this.delayMs;
|
|
||||||
}
|
|
||||||
|
|
||||||
private async processQueue(): Promise<void> {
|
private async processQueue(): Promise<void> {
|
||||||
if (this.isProcessing) {
|
if (this.isProcessing) {
|
||||||
return;
|
return;
|
||||||
|
@ -1,8 +1,3 @@
|
|||||||
/**
|
|
||||||
* CONSOLIDATED Tool utility functions for consistent tool operations across the app
|
|
||||||
* Works in both server (Node.js) and client (browser) environments
|
|
||||||
*/
|
|
||||||
|
|
||||||
export interface Tool {
|
export interface Tool {
|
||||||
name: string;
|
name: string;
|
||||||
type?: 'software' | 'method' | 'concept';
|
type?: 'software' | 'method' | 'concept';
|
||||||
@ -18,10 +13,6 @@ export interface Tool {
|
|||||||
related_concepts?: string[];
|
related_concepts?: string[];
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a URL-safe slug from a tool name
|
|
||||||
* Used for URLs, IDs, and file names consistently across the app
|
|
||||||
*/
|
|
||||||
export function createToolSlug(toolName: string): string {
|
export function createToolSlug(toolName: string): string {
|
||||||
if (!toolName || typeof toolName !== 'string') {
|
if (!toolName || typeof toolName !== 'string') {
|
||||||
console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
|
console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
|
||||||
@ -35,9 +26,6 @@ export function createToolSlug(toolName: string): string {
|
|||||||
.replace(/^-|-$/g, ''); // Remove leading/trailing hyphens
|
.replace(/^-|-$/g, ''); // Remove leading/trailing hyphens
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Finds a tool by name or slug from tools array
|
|
||||||
*/
|
|
||||||
export function findToolByIdentifier(tools: Tool[], identifier: string): Tool | undefined {
|
export function findToolByIdentifier(tools: Tool[], identifier: string): Tool | undefined {
|
||||||
if (!identifier || !Array.isArray(tools)) return undefined;
|
if (!identifier || !Array.isArray(tools)) return undefined;
|
||||||
|
|
||||||
@ -47,23 +35,9 @@ export function findToolByIdentifier(tools: Tool[], identifier: string): Tool |
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks if tool has a valid project URL (hosted on CC24 server)
|
|
||||||
*/
|
|
||||||
export function isToolHosted(tool: Tool): boolean {
|
export function isToolHosted(tool: Tool): boolean {
|
||||||
return tool.projectUrl !== undefined &&
|
return tool.projectUrl !== undefined &&
|
||||||
tool.projectUrl !== null &&
|
tool.projectUrl !== null &&
|
||||||
tool.projectUrl !== "" &&
|
tool.projectUrl !== "" &&
|
||||||
tool.projectUrl.trim() !== "";
|
tool.projectUrl.trim() !== "";
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Determines tool category for styling/logic
|
|
||||||
*/
|
|
||||||
export function getToolCategory(tool: Tool): 'concept' | 'method' | 'hosted' | 'oss' | 'proprietary' {
|
|
||||||
if (tool.type === 'concept') return 'concept';
|
|
||||||
if (tool.type === 'method') return 'method';
|
|
||||||
if (isToolHosted(tool)) return 'hosted';
|
|
||||||
if (tool.license && tool.license !== 'Proprietary') return 'oss';
|
|
||||||
return 'proprietary';
|
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user