forensic-ai #4

Merged
mstoeck3 merged 20 commits from forensic-ai into main 2025-08-05 20:56:02 +00:00
5 changed files with 147 additions and 168 deletions
Showing only changes of commit 4b0d208ef5 - Show all commits

View File

@ -54,6 +54,11 @@ AI_SIMILARITY_THRESHOLD=0.3
AI_EMBEDDING_SELECTION_LIMIT=30
AI_EMBEDDING_CONCEPTS_LIMIT=15
# Maximum tools/concepts sent to AI when embeddings are DISABLED
# Set to 0 for no limit (WARNING: may cause token overflow with large datasets)
AI_NO_EMBEDDINGS_TOOL_LIMIT=0
AI_NO_EMBEDDINGS_CONCEPT_LIMIT=0
# === AI SELECTION STAGE ===
# Maximum tools the AI can select from embedding candidates
# 🤖 This is the SECOND filter - AI intelligent selection
@ -98,17 +103,21 @@ AI_EMBEDDINGS_BATCH_SIZE=10
# Delay between embedding batches (milliseconds)
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
# Maximum tools sent to AI for detailed analysis (micro-tasks)
AI_MAX_TOOLS_TO_ANALYZE=20
AI_MAX_CONCEPTS_TO_ANALYZE=10
# ============================================================================
# 5. AI CONTEXT & TOKEN MANAGEMENT
# ============================================================================
# Maximum context tokens to maintain across micro-tasks
# Controls how much conversation history is preserved between AI calls
AI_MAX_CONTEXT_TOKENS=3000
AI_MAX_CONTEXT_TOKENS=4000
# Maximum tokens per individual AI prompt
# Larger = more context per call | Smaller = faster responses
AI_MAX_PROMPT_TOKENS=1200
AI_MAX_PROMPT_TOKENS=1500
# ============================================================================
# 6. AUTHENTICATION & AUTHORIZATION (OPTIONAL)
@ -169,7 +178,7 @@ GIT_API_TOKEN=your-git-api-token
# ============================================================================
# Enable detailed audit trail of AI decision-making
FORENSIC_AUDIT_ENABLED=false
FORENSIC_AUDIT_ENABLED=true
# Audit detail level: minimal, standard, verbose
FORENSIC_AUDIT_DETAIL_LEVEL=standard
@ -199,23 +208,16 @@ CONFIDENCE_HIGH_THRESHOLD=80
# PERFORMANCE TUNING PRESETS
# ============================================================================
# 🚀 FOR FASTER RESPONSES (less comprehensive):
# AI_EMBEDDING_CANDIDATES=20
# AI_MAX_SELECTED_ITEMS=15
# AI_MICRO_TASK_DELAY_MS=200
# AI_MAX_CONTEXT_TOKENS=2000
# 🚀 FOR FASTER RESPONSES (prevent token overflow):
# AI_NO_EMBEDDINGS_TOOL_LIMIT=25
# AI_NO_EMBEDDINGS_CONCEPT_LIMIT=10
# 🎯 FOR BETTER QUALITY (more comprehensive):
# AI_EMBEDDING_CANDIDATES=60
# AI_MAX_SELECTED_ITEMS=40
# AI_MICRO_TASK_DELAY_MS=800
# AI_MAX_CONTEXT_TOKENS=4000
# 🎯 FOR FULL DATABASE ACCESS (risk of truncation):
# AI_NO_EMBEDDINGS_TOOL_LIMIT=0
# AI_NO_EMBEDDINGS_CONCEPT_LIMIT=0
# 🔋 FOR LOW-POWER SYSTEMS (minimal resources):
# AI_EMBEDDING_CANDIDATES=15
# AI_MAX_SELECTED_ITEMS=10
# AI_RATE_LIMIT_MAX_REQUESTS=2
# AI_MICRO_TASK_DELAY_MS=1000
# 🔋 FOR LOW-POWER SYSTEMS:
# AI_NO_EMBEDDINGS_TOOL_LIMIT=15
# ============================================================================
# FEATURE COMBINATIONS GUIDE

View File

@ -113,64 +113,6 @@ tools:
accessType: download
license: VSL
knowledgebase: false
- name: TheHive 5
icon: 🐝
type: software
description: >-
Die zentrale Incident-Response-Plattform orchestriert komplexe
Sicherheitsvorfälle vom ersten Alert bis zum Abschlussbericht. Jeder Case
wird strukturiert durch Observables (IOCs), Tasks und Zeitleisten
abgebildet. Die Cortex-Integration automatisiert Analysen durch Dutzende
Analyzer - von VirusTotal-Checks bis Sandbox-Detonation.
MISP-Synchronisation reichert Cases mit Threat-Intelligence an. Das
ausgeklügelte Rollen- und Rechtesystem ermöglicht sichere Zusammenarbeit
zwischen SOC-Analysten, Forensikern und Management. Templates
standardisieren Response-Prozesse nach Incident-Typ. Die RESTful API
integriert nahtlos mit SIEM, SOAR und Ticketing-Systemen. Metrics und
KPIs messen die Team-Performance. Die Community Edition bleibt kostenlos
für kleinere Teams, während Gold/Platinum-Lizenzen Enterprise-Features
bieten.
domains:
- incident-response
- static-investigations
- malware-analysis
- network-forensics
- fraud-investigation
phases:
- data-collection
- examination
- analysis
- reporting
platforms:
- Web
related_software:
- MISP
- Cortex
- Elasticsearch
domain-agnostic-software:
- collaboration-general
skillLevel: intermediate
accessType: server-based
url: https://strangebee.com/thehive/
projectUrl: ''
license: Community Edition (Discontinued) / Commercial
knowledgebase: false
statusUrl: https://uptime.example.lab/api/badge/1/status
tags:
- web-interface
- case-management
- collaboration
- api
- workflow
- multi-user-support
- cortex-analyzer
- misp-integration
- playbooks
- metrics
- rbac
- template-driven
related_concepts:
- Digital Evidence Chain of Custody
- name: MISP
icon: 🌐
type: software
@ -223,7 +165,6 @@ tools:
related_concepts:
- Hash Functions & Digital Signatures
related_software:
- TheHive 5
- Cortex
- OpenCTI
- name: DFIR-IRIS
@ -260,7 +201,6 @@ tools:
platforms:
- Web
related_software:
- TheHive 5
- MISP
- OpenCTI
domain-agnostic-software:

View File

@ -94,18 +94,15 @@ ${input}
`.trim();
}
// Enhanced AI service call function
async function callAIService(prompt: string): Promise<Response> {
const endpoint = AI_ENDPOINT;
const apiKey = AI_ANALYZER_API_KEY;
const model = AI_ANALYZER_MODEL;
// Simple headers - add auth only if API key exists
let headers: Record<string, string> = {
'Content-Type': 'application/json'
};
// Add authentication if API key is provided
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
console.log('[ENHANCE API] Using API key authentication');
@ -113,7 +110,6 @@ async function callAIService(prompt: string): Promise<Response> {
console.log('[ENHANCE API] No API key - making request without authentication');
}
// Simple request body
const requestBody = {
model,
messages: [{ role: 'user', content: prompt }],
@ -124,8 +120,6 @@ async function callAIService(prompt: string): Promise<Response> {
presence_penalty: 0.1
};
// FIXED: This function is already being called through enqueueApiCall in the main handler
// So we can use direct fetch here since the queuing happens at the caller level
return fetch(`${endpoint}/v1/chat/completions`, {
method: 'POST',
headers,
@ -214,7 +208,7 @@ export const POST: APIRoute = async ({ request }) => {
success: true,
questions,
taskId,
inputComplete: questions.length === 0 // Flag to indicate if input seems complete
inputComplete: questions.length === 0
}), {
status: 200,
headers: { 'Content-Type': 'application/json' }

View File

@ -31,7 +31,6 @@ interface AnalysisResult {
};
}
// NEW: Audit Trail Types
interface AuditEntry {
timestamp: number;
phase: string; // 'retrieval', 'selection', 'micro-task-N'
@ -40,10 +39,9 @@ interface AuditEntry {
output: any; // What came out of this step
confidence: number; // 0-100: How confident we are in this step
processingTimeMs: number;
metadata: Record<string, any>; // Additional context
metadata: Record<string, any>;
}
// Enhanced AnalysisContext with Audit Trail
interface AnalysisContext {
userQuery: string;
mode: string;
@ -62,7 +60,6 @@ interface AnalysisContext {
seenToolNames: Set<string>;
// NEW: Audit Trail
auditTrail: AuditEntry[];
}
@ -78,25 +75,24 @@ class ImprovedMicroTaskAIPipeline {
private similarityThreshold: number;
private microTaskDelay: number;
// NEW: Embedding selection limits (top N from pre-filtered candidates)
private embeddingSelectionLimit: number;
private embeddingConceptsLimit: number;
private noEmbeddingsToolLimit: number;
private noEmbeddingsConceptLimit: number;
// NEW: Embeddings efficiency thresholds
private embeddingsMinTools: number;
private embeddingsMaxReductionRatio: number;
private maxContextTokens: number;
private maxPromptTokens: number;
// Audit Configuration
private auditConfig: {
enabled: boolean;
detailLevel: 'minimal' | 'standard' | 'verbose';
retentionHours: number;
};
// Temporary audit storage for pre-context operations
private tempAuditEntries: AuditEntry[] = [];
constructor() {
@ -106,36 +102,33 @@ class ImprovedMicroTaskAIPipeline {
model: this.getEnv('AI_ANALYZER_MODEL')
};
// Core pipeline configuration
this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '25', 10);
this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '50', 10);
this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
// NEW: Embedding selection limits (top N from pre-filtered candidates)
this.embeddingSelectionLimit = parseInt(process.env.AI_EMBEDDING_SELECTION_LIMIT || '30', 10);
this.embeddingConceptsLimit = parseInt(process.env.AI_EMBEDDING_CONCEPTS_LIMIT || '15', 10);
this.noEmbeddingsToolLimit = parseInt(process.env.AI_NO_EMBEDDINGS_TOOL_LIMIT || '0', 10);
this.noEmbeddingsConceptLimit = parseInt(process.env.AI_NO_EMBEDDINGS_CONCEPT_LIMIT || '0', 10);
// NEW: Embeddings efficiency thresholds
this.embeddingsMinTools = parseInt(process.env.AI_EMBEDDINGS_MIN_TOOLS || '8', 10);
this.embeddingsMaxReductionRatio = parseFloat(process.env.AI_EMBEDDINGS_MAX_REDUCTION_RATIO || '0.75');
// Context management
this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
// Audit configuration
this.auditConfig = {
enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as any) || 'standard',
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
};
// Log configuration for debugging
console.log('[AI PIPELINE] Configuration loaded:', {
embeddingCandidates: this.embeddingCandidates,
embeddingSelection: `${this.embeddingSelectionLimit} tools, ${this.embeddingConceptsLimit} concepts`,
embeddingsThresholds: `min ${this.embeddingsMinTools} tools, max ${this.embeddingsMaxReductionRatio * 100}% of total`,
noEmbeddingsLimits: `${this.noEmbeddingsToolLimit || 'unlimited'} tools, ${this.noEmbeddingsConceptLimit || 'unlimited'} concepts`,
auditEnabled: this.auditConfig.enabled
});
}
@ -148,7 +141,6 @@ class ImprovedMicroTaskAIPipeline {
return value;
}
// NEW: Audit Trail Utility Functions
private addAuditEntry(
context: AnalysisContext | null,
phase: string,
@ -175,22 +167,18 @@ class ImprovedMicroTaskAIPipeline {
if (context) {
context.auditTrail.push(auditEntry);
} else {
// Store in temporary array for later merging
this.tempAuditEntries.push(auditEntry);
}
// Log for debugging when audit is enabled
console.log(`[AUDIT] ${phase}/${action}: ${confidence}% confidence, ${Date.now() - startTime}ms`);
}
// NEW: Merge temporary audit entries into context
private mergeTemporaryAuditEntries(context: AnalysisContext): void {
if (!this.auditConfig.enabled || this.tempAuditEntries.length === 0) return;
const entryCount = this.tempAuditEntries.length;
// Add temp entries to the beginning of the context audit trail
context.auditTrail.unshift(...this.tempAuditEntries);
this.tempAuditEntries = []; // Clear temp storage
this.tempAuditEntries = [];
console.log(`[AUDIT] Merged ${entryCount} temporary audit entries into context`);
}
@ -222,15 +210,12 @@ class ImprovedMicroTaskAIPipeline {
let confidence = 60; // Base confidence
// Good selection ratio (not too many, not too few)
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
else if (selectionRatio <= 0.05) confidence -= 10; // Too few
else confidence -= 15; // Too many
// Has detailed reasoning
if (hasReasoning) confidence += 15;
// Selected tools have good distribution
if (result.selectedConcepts && result.selectedConcepts.length > 0) confidence += 5;
return Math.min(95, Math.max(25, confidence));
@ -254,26 +239,106 @@ class ImprovedMicroTaskAIPipeline {
private safeParseJSON(jsonString: string, fallback: any = null): any {
try {
const cleaned = jsonString
let cleaned = jsonString
.replace(/^```json\s*/i, '')
.replace(/\s*```\s*$/g, '')
.trim();
if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
console.warn('[AI PIPELINE] JSON appears truncated, attempting recovery...');
let lastCompleteStructure = '';
let braceCount = 0;
let bracketCount = 0;
let inString = false;
let escaped = false;
for (let i = 0; i < cleaned.length; i++) {
const char = cleaned[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\') {
escaped = true;
continue;
}
if (char === '"' && !escaped) {
inString = !inString;
continue;
}
if (!inString) {
if (char === '{') braceCount++;
if (char === '}') braceCount--;
if (char === '[') bracketCount++;
if (char === ']') bracketCount--;
if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
lastCompleteStructure = cleaned.substring(0, i + 1);
}
}
}
if (lastCompleteStructure) {
console.log('[AI PIPELINE] Attempting to parse recovered JSON structure...');
cleaned = lastCompleteStructure;
} else {
if (braceCount > 0) {
cleaned += '}';
console.log('[AI PIPELINE] Added closing brace to truncated JSON');
}
if (bracketCount > 0) {
cleaned += ']';
console.log('[AI PIPELINE] Added closing bracket to truncated JSON');
}
}
}
const parsed = JSON.parse(cleaned);
if (parsed && typeof parsed === 'object') {
if (parsed.selectedTools === undefined) parsed.selectedTools = [];
if (parsed.selectedConcepts === undefined) parsed.selectedConcepts = [];
if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
}
return parsed;
} catch (error) {
console.warn('[AI PIPELINE] JSON parsing failed:', error.message);
console.warn('[AI PIPELINE] Raw content:', jsonString.slice(0, 200));
console.warn('[AI PIPELINE] Raw content (first 300 chars):', jsonString.slice(0, 300));
console.warn('[AI PIPELINE] Raw content (last 300 chars):', jsonString.slice(-300));
if (jsonString.includes('selectedTools')) {
const toolMatches = jsonString.match(/"([^"]+)"/g);
if (toolMatches && toolMatches.length > 0) {
console.log('[AI PIPELINE] Attempting partial recovery from broken JSON...');
const possibleTools = toolMatches
.map(match => match.replace(/"/g, ''))
.filter(name => name.length > 2 && !['selectedTools', 'selectedConcepts', 'reasoning'].includes(name))
.slice(0, 15); // Reasonable limit
if (possibleTools.length > 0) {
console.log(`[AI PIPELINE] Recovered ${possibleTools.length} possible tool names from broken JSON`);
return {
selectedTools: possibleTools,
selectedConcepts: [],
reasoning: 'Recovered from truncated response'
};
}
}
}
return fallback;
}
}
private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
if (context.seenToolNames.has(tool.name)) {
console.log(`[AI PIPELINE] Skipping duplicate tool: ${tool.name}`);
return false;
}
private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
context.seenToolNames.add(tool.name);
if (!context.selectedTools) context.selectedTools = [];
@ -302,11 +367,9 @@ class ImprovedMicroTaskAIPipeline {
console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
// Create lookup maps for O(1) access while preserving original data
const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
// Process in similarity order, preserving the ranking
const similarTools = similarItems
.filter((item): item is SimilarityResult => item.type === 'tool')
.map(item => toolsMap.get(item.name))
@ -319,7 +382,6 @@ class ImprovedMicroTaskAIPipeline {
console.log(`[AI PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
// FIXED: Better threshold logic - only use embeddings if we get meaningful filtering
const totalAvailableTools = toolsData.tools.length;
const reductionRatio = similarTools.length / totalAvailableTools;
@ -340,7 +402,6 @@ class ImprovedMicroTaskAIPipeline {
selectionMethod = 'full_dataset';
}
// Enhanced audit entry with reduction statistics
if (this.auditConfig.enabled) {
this.addAuditEntry(null, 'retrieval', 'embeddings-search',
{ query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates },
@ -420,25 +481,29 @@ class ImprovedMicroTaskAIPipeline {
related_software: concept.related_software || []
}));
// CORRECTED LOGIC:
let toolsToSend: any[];
let conceptsToSend: any[];
if (selectionMethod === 'embeddings_candidates') {
// WITH EMBEDDINGS: Take top N from pre-filtered candidates
toolsToSend = toolsWithFullData.slice(0, this.embeddingSelectionLimit);
conceptsToSend = conceptsWithFullData.slice(0, this.embeddingConceptsLimit);
console.log(`[AI PIPELINE] Embeddings enabled: sending top ${toolsToSend.length} pre-filtered tools`);
console.log(`[AI PIPELINE] Embeddings enabled: sending top ${toolsToSend.length} similarity-ordered tools`);
} else {
// WITHOUT EMBEDDINGS: Send entire compressed database (original behavior)
toolsToSend = toolsWithFullData; // ALL tools from database
conceptsToSend = conceptsWithFullData; // ALL concepts from database
const maxTools = this.noEmbeddingsToolLimit > 0 ?
Math.min(this.noEmbeddingsToolLimit, candidateTools.length) :
candidateTools.length;
console.log(`[AI PIPELINE] Embeddings disabled: sending entire database (${toolsToSend.length} tools, ${conceptsToSend.length} concepts)`);
const maxConcepts = this.noEmbeddingsConceptLimit > 0 ?
Math.min(this.noEmbeddingsConceptLimit, candidateConcepts.length) :
candidateConcepts.length;
toolsToSend = toolsWithFullData.slice(0, maxTools);
conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
console.log(`[AI PIPELINE] Embeddings disabled: sending ${toolsToSend.length}/${candidateTools.length} tools (limit: ${this.noEmbeddingsToolLimit || 'none'})`);
}
// Generate the German prompt with appropriately selected tool data
const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.maxSelectedItems);
const prompt = `${basePrompt}
@ -448,9 +513,12 @@ ${JSON.stringify(toolsToSend, null, 2)}
VERFÜGBARE KONZEPTE (mit vollständigen Daten):
${JSON.stringify(conceptsToSend, null, 2)}`;
// Log token usage for monitoring
const estimatedTokens = this.estimateTokens(prompt);
console.log(`[AI PIPELINE] Method: ${selectionMethod}, Tools: ${toolsToSend.length}, Tokens: ~${estimatedTokens}`);
console.log(`[AI PIPELINE] Method: ${selectionMethod}, Tools: ${toolsToSend.length}, Estimated tokens: ~${estimatedTokens}`);
if (estimatedTokens > 35000) {
console.warn(`[AI PIPELINE] WARNING: Prompt tokens (${estimatedTokens}) may exceed model limits`);
}
try {
const response = await this.callAI(prompt, 2500);
@ -527,7 +595,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
return new Promise(resolve => setTimeout(resolve, ms));
}
private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 300): Promise<MicroTaskResult> {
private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 500): Promise<MicroTaskResult> {
const startTime = Date.now();
let contextPrompt = prompt;
@ -552,11 +620,10 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
success: true
};
// NEW: Add Audit Entry for Successful Micro-Task
this.addAuditEntry(context, 'micro-task', 'ai-analysis',
{ promptLength: contextPrompt.length, maxTokens },
{ responseLength: response.length, contentPreview: response.slice(0, 100) },
response.length > 50 ? 80 : 60, // Confidence based on response quality
response.length > 50 ? 80 : 60,
startTime,
{ aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
);
@ -572,11 +639,10 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
error: error.message
};
// NEW: Add Audit Entry for Failed Micro-Task
this.addAuditEntry(context, 'micro-task', 'ai-analysis-failed',
{ promptLength: contextPrompt.length, maxTokens },
{ error: error.message },
5, // Very low confidence
5,
startTime,
{ aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
);
@ -589,7 +655,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
const isWorkflow = context.mode === 'workflow';
const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
const result = await this.callMicroTaskAI(prompt, context, 220);
const result = await this.callMicroTaskAI(prompt, context, 400);
if (result.success) {
if (isWorkflow) {
@ -608,7 +674,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
const isWorkflow = context.mode === 'workflow';
const prompt = getPrompt('investigationApproach', isWorkflow, context.userQuery);
const result = await this.callMicroTaskAI(prompt, context, 220);
const result = await this.callMicroTaskAI(prompt, context, 400);
if (result.success) {
context.investigationApproach = result.content;
@ -622,7 +688,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
const isWorkflow = context.mode === 'workflow';
const prompt = getPrompt('criticalConsiderations', isWorkflow, context.userQuery);
const result = await this.callMicroTaskAI(prompt, context, 180);
const result = await this.callMicroTaskAI(prompt, context, 350);
if (result.success) {
context.criticalConsiderations = result.content;
@ -648,7 +714,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
const prompt = getPrompt('phaseToolSelection', context.userQuery, phase, phaseTools);
const result = await this.callMicroTaskAI(prompt, context, 450);
const result = await this.callMicroTaskAI(prompt, context, 800);
if (result.success) {
const selections = this.safeParseJSON(result.content, []);
@ -665,7 +731,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
}
});
// NEW: Add audit entry for tool selection
this.addAuditEntry(context, 'micro-task', 'phase-tool-selection',
{ phase: phase.id, availableTools: phaseTools.length },
{ validSelections: validSelections.length, selectedTools: validSelections.map(s => s.toolName) },
@ -682,7 +747,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank);
const result = await this.callMicroTaskAI(prompt, context, 650);
const result = await this.callMicroTaskAI(prompt, context, 1200);
if (result.success) {
const evaluation = this.safeParseJSON(result.content, {
@ -702,7 +767,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
}
}, 'evaluation', evaluation.suitability_score);
// NEW: Add audit entry for tool evaluation
this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
{ toolName: tool.name, rank },
{ suitabilityScore: evaluation.suitability_score, hasExplanation: !!evaluation.detailed_explanation },
@ -730,7 +794,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
const prompt = getPrompt('backgroundKnowledgeSelection', context.userQuery, context.mode, selectedToolNames, availableConcepts);
const result = await this.callMicroTaskAI(prompt, context, 400);
const result = await this.callMicroTaskAI(prompt, context, 700);
if (result.success) {
const selections = this.safeParseJSON(result.content, []);
@ -743,7 +807,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
relevance: sel.relevance
}));
// NEW: Add audit entry for background knowledge selection
this.addAuditEntry(context, 'micro-task', 'background-knowledge-selection',
{ availableConcepts: availableConcepts.length },
{ selectedConcepts: context.backgroundKnowledge?.length || 0 },
@ -761,21 +824,19 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
const prompt = getPrompt('finalRecommendations', context.mode === 'workflow', context.userQuery, selectedToolNames);
const result = await this.callMicroTaskAI(prompt, context, 180);
const result = await this.callMicroTaskAI(prompt, context, 350);
return result;
}
private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
private async callAI(prompt: string, maxTokens: number = 1500): Promise<string> {
const endpoint = this.config.endpoint;
const apiKey = this.config.apiKey;
const model = this.config.model;
// Simple headers - add auth only if API key exists
let headers: Record<string, string> = {
'Content-Type': 'application/json'
};
// Add authentication if API key is provided
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
console.log('[AI PIPELINE] Using API key authentication');
@ -783,7 +844,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
console.log('[AI PIPELINE] No API key - making request without authentication');
}
// Simple request body
const requestBody = {
model,
messages: [{ role: 'user', content: prompt }],
@ -792,7 +852,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
};
try {
// FIXED: Use direct fetch since entire pipeline is already queued at query.ts level
const response = await fetch(`${endpoint}/v1/chat/completions`, {
method: 'POST',
headers,
@ -826,13 +885,11 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
let completedTasks = 0;
let failedTasks = 0;
// NEW: Clear any previous temporary audit entries
this.tempAuditEntries = [];
console.log(`[AI PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
try {
// Stage 1: Get intelligent candidates (embeddings + AI selection)
const toolsData = await getCompressedToolsDataForAI();
const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
@ -844,20 +901,17 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
maxContextLength: this.maxContextTokens,
currentContextLength: 0,
seenToolNames: new Set<string>(),
// NEW: Initialize audit trail
auditTrail: []
};
// NEW: Merge any temporary audit entries from pre-context operations
this.mergeTemporaryAuditEntries(context);
console.log(`[AI PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
// NEW: Add initial audit entry
this.addAuditEntry(context, 'initialization', 'pipeline-start',
{ userQuery, mode, toolsDataLoaded: !!toolsData },
{ candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length },
90, // High confidence for initialization
90,
startTime,
{ auditEnabled: this.auditConfig.enabled }
);
@ -896,19 +950,15 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
}
}
// Task 5: Background Knowledge Selection
const knowledgeResult = await this.selectBackgroundKnowledge(context);
if (knowledgeResult.success) completedTasks++; else failedTasks++;
await this.delay(this.microTaskDelay);
// Task 6: Final Recommendations
const finalResult = await this.generateFinalRecommendations(context);
if (finalResult.success) completedTasks++; else failedTasks++;
// Build final recommendation
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
// NEW: Add final audit entry
this.addAuditEntry(context, 'completion', 'pipeline-end',
{ completedTasks, failedTasks },
{ finalRecommendation: !!recommendation, auditEntriesGenerated: context.auditTrail.length },
@ -935,7 +985,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
return {
recommendation: {
...recommendation,
// NEW: Include audit trail in response
auditTrail: this.auditConfig.enabled ? context.auditTrail : undefined
},
processingStats
@ -944,7 +993,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
} catch (error) {
console.error('[AI PIPELINE] Processing failed:', error);
// NEW: Ensure temp audit entries are cleared even on error
this.tempAuditEntries = [];
throw error;

View File

@ -130,7 +130,6 @@ async function loadRawData(): Promise<ToolsData> {
try {
cachedData = ToolsDataSchema.parse(rawData);
// Enhanced: Add default skill level descriptions if not provided
if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
cachedData.skill_levels = {
novice: "Minimal technical background required, guided interfaces",
@ -178,21 +177,18 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
if (!cachedCompressedData) {
const data = await getToolsData();
// Enhanced: More detailed tool information for micro-tasks
const compressedTools = data.tools
.filter(tool => tool.type !== 'concept')
.map(tool => {
const { projectUrl, statusUrl, ...compressedTool } = tool;
return {
...compressedTool,
// Enhanced: Add computed fields for AI
is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
is_open_source: tool.license && tool.license !== 'Proprietary',
complexity_score: tool.skillLevel === 'expert' ? 5 :
tool.skillLevel === 'advanced' ? 4 :
tool.skillLevel === 'intermediate' ? 3 :
tool.skillLevel === 'beginner' ? 2 : 1,
// Enhanced: Phase-specific suitability hints
phase_suitability: tool.phases?.map(phase => ({
phase,
primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
@ -206,7 +202,6 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
return {
...compressedConcept,
// Enhanced: Learning difficulty indicator
learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
concept.skillLevel === 'advanced' ? 'high' :
concept.skillLevel === 'intermediate' ? 'medium' :