// src/utils/aiPipeline.ts import { getCompressedToolsDataForAI } from './dataService.js'; import { embeddingsService, type EmbeddingData, type SimilarityResult } from './embeddings.js'; import { AI_PROMPTS, getPrompt } from '../config/prompts.js'; import { isToolHosted } from './toolHelpers.js'; import { auditService, type AuditEntry } from './auditService.js'; import dotenv from 'dotenv'; dotenv.config(); interface AIConfig { endpoint: string; apiKey: string; model: string; } interface MicroTaskResult { taskType: string; content: string; processingTimeMs: number; success: boolean; error?: string; } interface AnalysisResult { recommendation: any; processingStats: { embeddingsUsed: boolean; candidatesFromEmbeddings: number; finalSelectedItems: number; processingTimeMs: number; microTasksCompleted: number; microTasksFailed: number; contextContinuityUsed: boolean; }; } interface AnalysisContext { userQuery: string; mode: string; filteredData: any; contextHistory: string[]; maxContextLength: number; currentContextLength: number; scenarioAnalysis?: string; problemAnalysis?: string; investigationApproach?: string; criticalConsiderations?: string; selectedTools?: Array<{ tool: any; phase: string; priority: string; justification?: string; taskRelevance?: number; limitations?: string[]; }>; backgroundKnowledge?: Array<{ concept: any; relevance: string; }>; seenToolNames: Set; embeddingsSimilarities: Map; aiSelectedTools?: any[]; aiSelectedConcepts?: any[]; } interface ConfidenceMetrics { overall: number; semanticRelevance: number; taskSuitability: number; uncertaintyFactors: string[]; strengthIndicators: string[]; } class ImprovedMicroTaskAIPipeline { private config: AIConfig; private maxSelectedItems: number; private embeddingCandidates: number; private similarityThreshold: number; private microTaskDelay: number; private embeddingSelectionLimit: number; private embeddingConceptsLimit: number; private noEmbeddingsToolLimit: number; private noEmbeddingsConceptLimit: number; private embeddingsMinTools: number; private embeddingsMaxReductionRatio: number; private methodSelectionRatio: number; private softwareSelectionRatio: number; private maxContextTokens: number; private maxPromptTokens: number; private confidenceConfig: { semanticWeight: number; suitabilityWeight: number; minimumThreshold: number; mediumThreshold: number; highThreshold: number; }; constructor() { this.config = { endpoint: this.getRequiredEnv('AI_ANALYZER_ENDPOINT'), apiKey: this.getRequiredEnv('AI_ANALYZER_API_KEY'), model: this.getRequiredEnv('AI_ANALYZER_MODEL') }; this.maxSelectedItems = this.getEnvInt('AI_MAX_SELECTED_ITEMS', 25); this.embeddingCandidates = this.getEnvInt('AI_EMBEDDING_CANDIDATES', 50); this.similarityThreshold = this.getEnvFloat('AI_SIMILARITY_THRESHOLD', 0.3); this.microTaskDelay = this.getEnvInt('AI_MICRO_TASK_DELAY_MS', 500); this.embeddingSelectionLimit = this.getEnvInt('AI_EMBEDDING_SELECTION_LIMIT', 30); this.embeddingConceptsLimit = this.getEnvInt('AI_EMBEDDING_CONCEPTS_LIMIT', 15); this.noEmbeddingsToolLimit = this.getEnvInt('AI_NO_EMBEDDINGS_TOOL_LIMIT', 25); this.noEmbeddingsConceptLimit = this.getEnvInt('AI_NO_EMBEDDINGS_CONCEPT_LIMIT', 10); this.embeddingsMinTools = this.getEnvInt('AI_EMBEDDINGS_MIN_TOOLS', 8); this.embeddingsMaxReductionRatio = this.getEnvFloat('AI_EMBEDDINGS_MAX_REDUCTION_RATIO', 0.75); this.methodSelectionRatio = this.getEnvFloat('AI_METHOD_SELECTION_RATIO', 0.4); this.softwareSelectionRatio = this.getEnvFloat('AI_SOFTWARE_SELECTION_RATIO', 0.5); this.maxContextTokens = this.getEnvInt('AI_MAX_CONTEXT_TOKENS', 4000); this.maxPromptTokens = this.getEnvInt('AI_MAX_PROMPT_TOKENS', 1500); this.confidenceConfig = { semanticWeight: this.getEnvFloat('CONFIDENCE_SEMANTIC_WEIGHT', 0.3), suitabilityWeight: this.getEnvFloat('CONFIDENCE_SUITABILITY_WEIGHT', 0.7), minimumThreshold: this.getEnvInt('CONFIDENCE_MINIMUM_THRESHOLD', 40), mediumThreshold: this.getEnvInt('CONFIDENCE_MEDIUM_THRESHOLD', 60), highThreshold: this.getEnvInt('CONFIDENCE_HIGH_THRESHOLD', 80) }; console.log('[AI-PIPELINE] Initialized with audit service integration'); } private getRequiredEnv(key: string): string { const value = process.env[key]; if (!value) { throw new Error(`Missing required environment variable: ${key}`); } return value; } private getEnvInt(key: string, defaultValue: number): number { const value = process.env[key]; return value ? parseInt(value, 10) : defaultValue; } private getEnvFloat(key: string, defaultValue: number): number { const value = process.env[key]; return value ? parseFloat(value) : defaultValue; } // SIMPLIFIED AUDIT INTEGRATION - Use auditService instead of local implementation private addAuditEntry( context: AnalysisContext, phase: string, action: string, input: any, output: any, confidence: number, startTime: number, metadata: Record = {} ): void { auditService.addEntry(phase, action, input, output, confidence, startTime, metadata); } private calculateSelectionConfidence(result: any, candidateCount: number): number { if (!result?.selectedTools) return 30; const selectionRatio = result.selectedTools.length / candidateCount; const hasReasoning = result.reasoning && result.reasoning.length > 50; let confidence = 60; if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20; else if (selectionRatio <= 0.05) confidence -= 10; else confidence -= 15; if (hasReasoning) confidence += 15; if (result.selectedConcepts?.length > 0) confidence += 5; return Math.min(95, Math.max(25, confidence)); } private estimateTokens(text: string): number { return Math.ceil(text.length / 4); } private addToContextHistory(context: AnalysisContext, newEntry: string): void { const entryTokens = this.estimateTokens(newEntry); context.contextHistory.push(newEntry); context.currentContextLength += entryTokens; while (context.currentContextLength > this.maxContextTokens && context.contextHistory.length > 1) { const removed = context.contextHistory.shift()!; context.currentContextLength -= this.estimateTokens(removed); } } private safeParseJSON(jsonString: string, fallback: any = null): any { try { let cleaned = jsonString.trim(); const jsonBlockPatterns = [ /```json\s*([\s\S]*?)\s*```/i, /```\s*([\s\S]*?)\s*```/i, /\{[\s\S]*\}/, ]; for (const pattern of jsonBlockPatterns) { const match = cleaned.match(pattern); if (match) { cleaned = match[1] || match[0]; break; } } if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) { console.warn('[AI-PIPELINE] JSON appears truncated, attempting recovery'); let braceCount = 0; let bracketCount = 0; let inString = false; let escaped = false; let lastCompleteStructure = ''; for (let i = 0; i < cleaned.length; i++) { const char = cleaned[i]; if (escaped) { escaped = false; continue; } if (char === '\\') { escaped = true; continue; } if (char === '"' && !escaped) { inString = !inString; continue; } if (!inString) { if (char === '{') braceCount++; if (char === '}') braceCount--; if (char === '[') bracketCount++; if (char === ']') bracketCount--; if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) { lastCompleteStructure = cleaned.substring(0, i + 1); } } } if (lastCompleteStructure) { cleaned = lastCompleteStructure; } else { if (braceCount > 0) cleaned += '}'; if (bracketCount > 0) cleaned += ']'; } } const parsed = JSON.parse(cleaned); if (parsed && typeof parsed === 'object') { if (!parsed.selectedTools) parsed.selectedTools = []; if (!parsed.selectedConcepts) parsed.selectedConcepts = []; if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = []; if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = []; } return parsed; } catch (error) { console.warn('[AI-PIPELINE] JSON parsing failed:', error.message); if (jsonString.includes('selectedTools') || jsonString.includes('selectedConcepts')) { const selectedTools: string[] = []; const selectedConcepts: string[] = []; const toolsMatch = jsonString.match(/"selectedTools"\s*:\s*\[([\s\S]*?)\]/i); if (toolsMatch) { const toolMatches = toolsMatch[1].match(/"([^"]+)"/g); if (toolMatches) { selectedTools.push(...toolMatches.map(match => match.replace(/"/g, ''))); } } const conceptsMatch = jsonString.match(/"selectedConcepts"\s*:\s*\[([\s\S]*?)\]/i); if (conceptsMatch) { const conceptMatches = conceptsMatch[1].match(/"([^"]+)"/g); if (conceptMatches) { selectedConcepts.push(...conceptMatches.map(match => match.replace(/"/g, ''))); } } if (selectedTools.length === 0 && selectedConcepts.length === 0) { const allMatches = jsonString.match(/"([^"]+)"/g); if (allMatches) { const possibleNames = allMatches .map(match => match.replace(/"/g, '')) .filter(name => name.length > 2 && !['selectedTools', 'selectedConcepts', 'reasoning'].includes(name) && !name.includes(':') && !name.match(/^\d+$/) ) .slice(0, 15); selectedTools.push(...possibleNames); } } if (selectedTools.length > 0 || selectedConcepts.length > 0) { console.log('[AI-PIPELINE] JSON recovery successful:', selectedTools.length, 'tools,', selectedConcepts.length, 'concepts'); return { selectedTools, selectedConcepts, reasoning: 'Recovered from malformed JSON response' }; } } return fallback; } } private addToolToSelection( context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string, taskRelevance?: number, limitations?: string[] ): boolean { context.seenToolNames.add(tool.name); if (!context.selectedTools) context.selectedTools = []; context.selectedTools.push({ tool, phase, priority, justification, taskRelevance, limitations }); return true; } private generatePhaseQueryTemplates(phases: any[]): Record { const templates: Record = {}; phases.forEach((phase: any) => { if (phase?.id && phase?.name) { const phaseKeywords = [ 'forensic', phase.name.toLowerCase(), ...(phase.description ? phase.description.toLowerCase().split(' ').filter((word: string) => word.length > 3) : []), ...(phase.key_activities || []).map((activity: string) => activity.toLowerCase()), ...(phase.typical_tools || []).map((tool: string) => tool.toLowerCase()) ].join(' '); templates[phase.id] = phaseKeywords; } }); return templates; } private async getIntelligentCandidates( userQuery: string, toolsData: any, mode: string, context: AnalysisContext ) { let candidateTools: any[] = []; let candidateConcepts: any[] = []; let selectionMethod = 'unknown'; context.embeddingsSimilarities = new Map(); try { await embeddingsService.waitForInitialization(); } catch (error) { console.error('[AI-PIPELINE] Embeddings initialization failed:', error); } if (embeddingsService.isEnabled()) { const embeddingsStart = Date.now(); const similarItems = await embeddingsService.findSimilar( userQuery, this.embeddingCandidates, this.similarityThreshold ) as SimilarityResult[]; console.log('[AI-PIPELINE] Embeddings found', similarItems.length, 'similar items'); similarItems.forEach(item => { context.embeddingsSimilarities.set(item.name, item.similarity); }); const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool])); const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept])); const similarTools = similarItems .filter((item: any) => item.type === 'tool') .map((item: any) => toolsMap.get(item.name)) .filter((tool: any): tool is NonNullable => tool !== undefined && tool !== null); const similarConcepts = similarItems .filter((item: any) => item.type === 'concept') .map((item: any) => conceptsMap.get(item.name)) .filter((concept: any): concept is NonNullable => concept !== undefined && concept !== null); const totalAvailableTools = toolsData.tools.length; const reductionRatio = similarTools.length / totalAvailableTools; if (similarTools.length >= this.embeddingsMinTools && reductionRatio <= this.embeddingsMaxReductionRatio) { candidateTools = similarTools; candidateConcepts = similarConcepts; selectionMethod = 'embeddings_candidates'; console.log('[AI-PIPELINE] Using embeddings filtering:', totalAvailableTools, '→', similarTools.length, 'tools'); } else { console.log('[AI-PIPELINE] Embeddings filtering insufficient, using full dataset'); candidateTools = toolsData.tools; candidateConcepts = toolsData.concepts; selectionMethod = 'full_dataset'; } this.addAuditEntry( context, 'retrieval', 'embeddings-search', { query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates }, { candidatesFound: similarItems.length, reductionRatio: reductionRatio, usingEmbeddings: selectionMethod === 'embeddings_candidates', totalAvailable: totalAvailableTools, filtered: similarTools.length }, selectionMethod === 'embeddings_candidates' ? 85 : 60, embeddingsStart, { selectionMethod, embeddingsEnabled: true } ); } else { console.log('[AI-PIPELINE] Embeddings disabled, using full dataset'); candidateTools = toolsData.tools; candidateConcepts = toolsData.concepts; selectionMethod = 'full_dataset'; } const finalSelection = await this.aiSelectionWithFullData( userQuery, candidateTools, candidateConcepts, mode, selectionMethod, context ); return { tools: finalSelection.selectedTools, concepts: finalSelection.selectedConcepts, domains: toolsData.domains, phases: toolsData.phases, 'domain-agnostic-software': toolsData['domain-agnostic-software'] }; } private async aiSelectionWithFullData( userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string, selectionMethod: string, context: AnalysisContext ) { const selectionStart = Date.now(); const candidateMethods = candidateTools.filter((tool: any) => tool && tool.type === 'method'); const candidateSoftware = candidateTools.filter((tool: any) => tool && tool.type === 'software'); console.log('[AI-PIPELINE] Tool selection candidates:', candidateMethods.length, 'methods,', candidateSoftware.length, 'software,', candidateConcepts.length, 'concepts'); const methodsWithFullData = candidateMethods.map(this.createToolData); const softwareWithFullData = candidateSoftware.map(this.createToolData); const conceptsWithFullData = candidateConcepts.map(this.createConceptData); let toolsToSend: any[]; let conceptsToSend: any[]; if (selectionMethod === 'embeddings_candidates') { const totalLimit = this.embeddingSelectionLimit; const methodLimit = Math.ceil(totalLimit * this.methodSelectionRatio); const softwareLimit = Math.floor(totalLimit * this.softwareSelectionRatio); toolsToSend = [ ...methodsWithFullData.slice(0, methodLimit), ...softwareWithFullData.slice(0, softwareLimit) ]; const remainingCapacity = totalLimit - toolsToSend.length; if (remainingCapacity > 0) { if (methodsWithFullData.length > methodLimit) { toolsToSend.push(...methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity)); } else if (softwareWithFullData.length > softwareLimit) { toolsToSend.push(...softwareWithFullData.slice(softwareLimit, softwareLimit + remainingCapacity)); } } conceptsToSend = conceptsWithFullData.slice(0, this.embeddingConceptsLimit); } else { const maxTools = this.noEmbeddingsToolLimit; const maxConcepts = this.noEmbeddingsConceptLimit; const methodLimit = Math.ceil(maxTools * 0.4); const softwareLimit = Math.floor(maxTools * 0.5); toolsToSend = [ ...methodsWithFullData.slice(0, methodLimit), ...softwareWithFullData.slice(0, softwareLimit) ]; const remainingCapacity = maxTools - toolsToSend.length; if (remainingCapacity > 0) { if (methodsWithFullData.length > methodLimit) { toolsToSend.push(...methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity)); } else if (softwareWithFullData.length > softwareLimit) { toolsToSend.push(...softwareWithFullData.slice(softwareLimit, softwareLimit + remainingCapacity)); } } conceptsToSend = conceptsWithFullData.slice(0, maxConcepts); } const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.maxSelectedItems); const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend); const estimatedTokens = this.estimateTokens(prompt); console.log('[AI-PIPELINE] Sending to AI:', toolsToSend.filter((t: any) => t.type === 'method').length, 'methods,', toolsToSend.filter((t: any) => t.type === 'software').length, 'software,', conceptsToSend.length, 'concepts'); if (estimatedTokens > 35000) { console.warn('[AI-PIPELINE] WARNING: Prompt tokens may exceed model limits:', estimatedTokens); } try { const response = await this.callAI(prompt, 2500); const result = this.safeParseJSON(response, null); if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) { console.error('[AI-PIPELINE] AI selection returned invalid structure'); this.addAuditEntry( context, 'selection', 'ai-tool-selection-failed', { candidateCount: candidateTools.length, mode }, { error: 'Invalid JSON structure' }, 10, selectionStart, { aiModel: this.config.model, selectionMethod } ); throw new Error('AI selection failed to return valid tool and concept selection'); } const totalSelected = result.selectedTools.length + result.selectedConcepts.length; if (totalSelected === 0) { throw new Error('AI selection returned empty selection'); } const toolsMap = new Map(candidateTools.map((tool: any) => [tool.name, tool])); const conceptsMap = new Map(candidateConcepts.map((concept: any) => [concept.name, concept])); const selectedTools = result.selectedTools .map((name: string) => toolsMap.get(name)) .filter((tool: any): tool is NonNullable => tool !== undefined && tool !== null); const selectedConcepts = result.selectedConcepts .map((name: string) => conceptsMap.get(name)) .filter((concept: any): concept is NonNullable => concept !== undefined && concept !== null); const selectedMethods = selectedTools.filter((t: any) => t && t.type === 'method'); const selectedSoftware = selectedTools.filter((t: any) => t && t.type === 'software'); console.log('[AI-PIPELINE] AI selected:', selectedMethods.length, 'methods,', selectedSoftware.length, 'software,', selectedConcepts.length, 'concepts'); const confidence = this.calculateSelectionConfidence(result, candidateTools.length + candidateConcepts.length); this.addAuditEntry( context, 'selection', 'ai-tool-selection', { candidateCount: candidateTools.length, mode }, { selectedMethodCount: selectedMethods.length, selectedSoftwareCount: selectedSoftware.length, selectedConceptCount: selectedConcepts.length, reasoning: result.reasoning?.slice(0, 200), methodBalance: `${((selectedMethods.length / (selectedTools.length || 1)) * 100).toFixed(0)}%` }, confidence, selectionStart, { aiModel: this.config.model, selectionMethod } ); return { selectedTools, selectedConcepts }; } catch (error) { console.error('[AI-PIPELINE] AI selection failed:', error); this.addAuditEntry( context, 'selection', 'ai-tool-selection-error', { candidateCount: candidateTools.length, mode }, { error: error.message }, 5, selectionStart, { aiModel: this.config.model, selectionMethod } ); throw error; } } private createToolData = (tool: any) => ({ name: tool.name, type: tool.type, description: tool.description, domains: tool.domains, phases: tool.phases, platforms: tool.platforms || [], tags: tool.tags || [], skillLevel: tool.skillLevel, license: tool.license, accessType: tool.accessType, projectUrl: tool.projectUrl, knowledgebase: tool.knowledgebase, related_concepts: tool.related_concepts || [], related_software: tool.related_software || [] }); private createConceptData = (concept: any) => ({ name: concept.name, type: 'concept', description: concept.description, domains: concept.domains, phases: concept.phases, tags: concept.tags || [], skillLevel: concept.skillLevel, related_concepts: concept.related_concepts || [], related_software: concept.related_software || [] }); private async delay(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } private async callMicroTaskAI( prompt: string, context: AnalysisContext, maxTokens: number = 500 ): Promise { const startTime = Date.now(); let contextPrompt = prompt; if (context.contextHistory.length > 0) { const contextSection = `BISHERIGE ANALYSE:\n${context.contextHistory.join('\n\n')}\n\nAKTUELLE AUFGABE:\n`; const combinedPrompt = contextSection + prompt; if (this.estimateTokens(combinedPrompt) <= this.maxPromptTokens) { contextPrompt = combinedPrompt; } } try { const response = await this.callAI(contextPrompt, maxTokens); const result = { taskType: 'micro-task', content: response.trim(), processingTimeMs: Date.now() - startTime, success: true }; this.addAuditEntry( context, 'micro-task', 'ai-analysis', { promptLength: contextPrompt.length, maxTokens }, { responseLength: response.length }, response.length > 50 ? 80 : 60, startTime, { aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 } ); return result; } catch (error) { const result = { taskType: 'micro-task', content: '', processingTimeMs: Date.now() - startTime, success: false, error: error.message }; this.addAuditEntry( context, 'micro-task', 'ai-analysis-failed', { promptLength: contextPrompt.length, maxTokens }, { error: error.message }, 5, startTime, { aiModel: this.config.model } ); return result; } } private calculateRecommendationConfidence( tool: any, context: AnalysisContext, taskRelevance: number = 70, limitations: string[] = [] ): ConfidenceMetrics { const rawSemanticRelevance = context.embeddingsSimilarities.has(tool.name) ? context.embeddingsSimilarities.get(tool.name)! * 100 : 50; let enhancedTaskSuitability = taskRelevance; if (context.mode === 'workflow') { const toolSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name); if (toolSelection && tool.phases && Array.isArray(tool.phases) && tool.phases.includes(toolSelection.phase)) { const phaseBonus = Math.min(15, 100 - taskRelevance); enhancedTaskSuitability = Math.min(100, taskRelevance + phaseBonus); } } const overall = ( rawSemanticRelevance * this.confidenceConfig.semanticWeight + enhancedTaskSuitability * this.confidenceConfig.suitabilityWeight ); const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, limitations, overall); const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall); return { overall: Math.round(overall), semanticRelevance: Math.round(rawSemanticRelevance), taskSuitability: Math.round(enhancedTaskSuitability), uncertaintyFactors, strengthIndicators }; } private identifyUncertaintyFactors( tool: any, context: AnalysisContext, limitations: string[], confidence: number ): string[] { const factors: string[] = []; if (limitations?.length > 0) { factors.push(...limitations.slice(0, 2)); } const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5; if (similarity < 0.7) { factors.push('Geringe semantische Ähnlichkeit zur Anfrage'); } if (tool.skillLevel === 'expert' && /schnell|rapid|triage|urgent|sofort/i.test(context.userQuery)) { factors.push('Experten-Tool für zeitkritisches Szenario'); } if (tool.skillLevel === 'novice' && /komplex|erweitert|tiefgehend|advanced|forensisch/i.test(context.userQuery)) { factors.push('Einsteiger-Tool für komplexe Analyse'); } if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') { factors.push('Installation und Setup erforderlich'); } if (tool.license === 'Proprietary') { factors.push('Kommerzielle Software - Lizenzkosten zu beachten'); } if (confidence < 60) { factors.push('Moderate Gesamtbewertung - alternative Ansätze empfohlen'); } return factors.slice(0, 4); } private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] { const indicators: string[] = []; const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5; if (similarity >= 0.7) { indicators.push('Sehr gute semantische Übereinstimmung mit Ihrer Anfrage'); } if (tool.knowledgebase === true) { indicators.push('Umfassende Dokumentation und Wissensbasis verfügbar'); } if (isToolHosted(tool)) { indicators.push('Sofort verfügbar über gehostete Lösung'); } if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') { indicators.push('Ausgewogenes Verhältnis zwischen Funktionalität und Benutzerfreundlichkeit'); } if (tool.type === 'method' && /methodik|vorgehen|prozess|ansatz/i.test(context.userQuery)) { indicators.push('Methodischer Ansatz passt zu Ihrer prozeduralen Anfrage'); } return indicators.slice(0, 4); } private async analyzeScenario(context: AnalysisContext): Promise { console.log('[AI-PIPELINE] Starting scenario analysis micro-task'); const isWorkflow = context.mode === 'workflow'; const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery); const result = await this.callMicroTaskAI(prompt, context, 400); if (result.success) { if (isWorkflow) { context.scenarioAnalysis = result.content; } else { context.problemAnalysis = result.content; } this.addToContextHistory(context, `${isWorkflow ? 'Szenario' : 'Problem'}-Analyse: ${result.content.slice(0, 200)}...`); } return result; } private async generateApproach(context: AnalysisContext): Promise { console.log('[AI-PIPELINE] Starting investigation approach micro-task'); const isWorkflow = context.mode === 'workflow'; const prompt = getPrompt('investigationApproach', isWorkflow, context.userQuery); const result = await this.callMicroTaskAI(prompt, context, 400); if (result.success) { context.investigationApproach = result.content; this.addToContextHistory(context, `${isWorkflow ? 'Untersuchungs' : 'Lösungs'}ansatz: ${result.content.slice(0, 200)}...`); } return result; } private async generateCriticalConsiderations(context: AnalysisContext): Promise { console.log('[AI-PIPELINE] Starting critical considerations micro-task'); const isWorkflow = context.mode === 'workflow'; const prompt = getPrompt('criticalConsiderations', isWorkflow, context.userQuery); const result = await this.callMicroTaskAI(prompt, context, 350); if (result.success) { context.criticalConsiderations = result.content; this.addToContextHistory(context, `Kritische Überlegungen: ${result.content.slice(0, 200)}...`); } return result; } private async selectToolsForPhase(context: AnalysisContext, phase: any): Promise { console.log('[AI-PIPELINE] Starting phase tool selection micro-task for:', phase.id); const phaseTools = context.filteredData.tools.filter((tool: any) => tool && tool.phases && Array.isArray(tool.phases) && tool.phases.includes(phase.id) ); if (phaseTools.length === 0) { console.log('[AI-PIPELINE] No tools available for phase:', phase.id); return { taskType: 'tool-selection', content: JSON.stringify([]), processingTimeMs: 0, success: true }; } const phaseMethods = phaseTools.filter((t: any) => t && t.type === 'method'); const phaseSoftware = phaseTools.filter((t: any) => t && t.type === 'software'); console.log('[AI-PIPELINE] Phase tools available:', phaseMethods.length, 'methods,', phaseSoftware.length, 'software'); const prompt = getPrompt('phaseToolSelection', context.userQuery, phase, phaseTools); const result = await this.callMicroTaskAI(prompt, context, 1000); if (result.success) { const selections = this.safeParseJSON(result.content, []); if (Array.isArray(selections)) { const validSelections = selections.filter((sel: any) => { const matchingTool = phaseTools.find((tool: any) => tool && tool.name === sel.toolName); if (!matchingTool) { console.warn('[AI-PIPELINE] Invalid tool selection for phase:', phase.id, sel.toolName); } return !!matchingTool; }); console.log('[AI-PIPELINE] Valid selections for phase:', phase.id, validSelections.length); validSelections.forEach((sel: any) => { const tool = phaseTools.find((t: any) => t && t.name === sel.toolName); if (tool) { const taskRelevance = typeof sel.taskRelevance === 'number' ? sel.taskRelevance : parseInt(String(sel.taskRelevance)) || 70; const priority = this.derivePriorityFromScore(taskRelevance); this.addToolToSelection(context, tool, phase.id, priority, sel.justification, taskRelevance, sel.limitations); } }); this.addAuditEntry( context, 'micro-task', 'phase-tool-selection', { phase: phase.id, availableTools: phaseTools.length }, { validSelections: validSelections.length, selectedTools: validSelections.map((s: any) => ({ name: s.toolName, taskRelevance: s.taskRelevance, derivedPriority: this.derivePriorityFromScore(s.taskRelevance) })) }, validSelections.length > 0 ? 75 : 30, Date.now() - result.processingTimeMs, { phaseName: phase.name } ); } } return result; } private async completeUnderrepresentedPhases( context: AnalysisContext, toolsData: any, originalQuery: string ): Promise { const phases = toolsData.phases || []; const selectedPhases = new Map(); context.selectedTools?.forEach((st: any) => { const count = selectedPhases.get(st.phase) || 0; selectedPhases.set(st.phase, count + 1); }); console.log('[AI-PIPELINE] Phase coverage analysis complete'); const phaseQueryTemplates = this.generatePhaseQueryTemplates(phases); const underrepresentedPhases = phases.filter((phase: any) => { const count = selectedPhases.get(phase.id) || 0; return count <= 1; }); if (underrepresentedPhases.length === 0) { console.log('[AI-PIPELINE] All phases adequately represented'); return; } console.log('[AI-PIPELINE] Completing underrepresented phases:', underrepresentedPhases.map((p: any) => p.id).join(', ')); for (const phase of underrepresentedPhases) { await this.completePhaseWithSemanticSearch(context, phase, phaseQueryTemplates, toolsData, originalQuery); await this.delay(this.microTaskDelay); } } private async completePhaseWithSemanticSearch( context: AnalysisContext, phase: any, phaseQueryTemplates: Record, toolsData: any, originalQuery: string ): Promise { const phaseStart = Date.now(); const phaseQuery = phaseQueryTemplates[phase.id] || `forensic ${phase.name.toLowerCase()} tools methods`; console.log('[AI-PIPELINE] Starting enhanced phase completion micro-task for:', phase.id); try { const phaseResults = await embeddingsService.findSimilar(phaseQuery, 20, 0.2); if (phaseResults.length === 0) { console.log('[AI-PIPELINE] No semantic results for phase:', phase.id); return; } const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool])); const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept])); const phaseTools = phaseResults .filter((result: any) => result.type === 'tool') .map((result: any) => toolsMap.get(result.name)) .filter((tool: any): tool is NonNullable => tool !== undefined && tool !== null && tool.phases && Array.isArray(tool.phases) && tool.phases.includes(phase.id) && !context.seenToolNames.has(tool.name) ) .slice(0, 5); const phaseConcepts = phaseResults .filter((result: any) => result.type === 'concept') .map((result: any) => conceptsMap.get(result.name)) .filter((concept: any): concept is NonNullable => concept !== undefined && concept !== null) .slice(0, 2); if (phaseTools.length === 0) { console.log('[AI-PIPELINE] No suitable tools for phase completion:', phase.id); return; } const selectionPrompt = AI_PROMPTS.generatePhaseCompletionPrompt(originalQuery, phase, phaseTools, phaseConcepts); const selectionResult = await this.callMicroTaskAI(selectionPrompt, context, 800); if (!selectionResult.success) { console.error('[AI-PIPELINE] Phase completion selection failed for:', phase.id); return; } const selection = this.safeParseJSON(selectionResult.content, { selectedTools: [], selectedConcepts: [], completionReasoning: '' }); const validTools = selection.selectedTools .map((name: string) => phaseTools.find((t: any) => t && t.name === name)) .filter((tool: any): tool is NonNullable => tool !== undefined && tool !== null) .slice(0, 2); if (validTools.length === 0) { console.log('[AI-PIPELINE] No valid tools selected for phase completion:', phase.id); return; } for (const tool of validTools) { console.log('[AI-PIPELINE] Generating reasoning for phase completion tool:', tool.name); const reasoningPrompt = getPrompt( 'phaseCompletionReasoning', originalQuery, phase, tool.name, tool, selection.completionReasoning || 'Nachergänzung zur Vervollständigung der Phasenabdeckung' ); const reasoningResult = await this.callMicroTaskAI(reasoningPrompt, context, 400); let detailedJustification: string; if (reasoningResult.success) { detailedJustification = reasoningResult.content.trim(); } else { detailedJustification = `Nachträglich hinzugefügt zur Vervollständigung der ${phase.name}-Phase. Die ursprüngliche KI-Auswahl war zu spezifisch und hat wichtige Tools für diese Phase übersehen.`; } this.addToolToSelection( context, tool, phase.id, 'medium', detailedJustification, 75, ['Nachträgliche Ergänzung via semantische Phasensuche'] ); console.log('[AI-PIPELINE] Added phase completion tool with reasoning:', tool.name); } this.addAuditEntry( context, 'validation', 'phase-completion', { phase: phase.id, phaseQuery, candidatesFound: phaseTools.length, selectionReasoning: selection.completionReasoning }, { toolsAdded: validTools.length, addedTools: validTools.map((t: any) => ({ name: t.name, type: t.type, reasoning: 'Generated via micro-task' })) }, validTools.length > 0 ? 80 : 40, phaseStart, { phaseCompletion: true, semanticSearch: true, microTaskReasoning: true, contextualExplanation: true } ); } catch (error) { console.error('[AI-PIPELINE] Enhanced phase completion failed for:', phase.id, error); this.addAuditEntry( context, 'validation', 'phase-completion-failed', { phase: phase.id, phaseQuery }, { error: error.message }, 10, phaseStart, { phaseCompletion: true, failed: true } ); } } private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise { console.log('[AI-PIPELINE] Starting tool evaluation micro-task for:', tool.name); const existingSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name); const taskRelevance = existingSelection?.taskRelevance || 70; const priority = this.derivePriorityFromScore(taskRelevance); const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank, taskRelevance); const result = await this.callMicroTaskAI(prompt, context, 1000); if (result.success) { const evaluation = this.safeParseJSON(result.content, { detailed_explanation: 'Evaluation failed', implementation_approach: '', pros: [], limitations: [], alternatives: '' }); this.addToolToSelection(context, { ...tool, evaluation: { ...evaluation, rank, task_relevance: taskRelevance } }, 'evaluation', priority, evaluation.detailed_explanation, taskRelevance, evaluation.limitations); this.addAuditEntry( context, 'micro-task', 'tool-evaluation', { toolName: tool.name, rank, existingTaskRelevance: taskRelevance }, { hasExplanation: !!evaluation.detailed_explanation, hasImplementationApproach: !!evaluation.implementation_approach, prosCount: evaluation.pros?.length || 0, limitationsCount: evaluation.limitations?.length || 0 }, 70, Date.now() - result.processingTimeMs, { toolType: tool.type } ); } return result; } private async selectBackgroundKnowledge(context: AnalysisContext): Promise { console.log('[AI-PIPELINE] Starting background knowledge selection micro-task'); const availableConcepts = context.filteredData.concepts; if (availableConcepts.length === 0) { return { taskType: 'background-knowledge', content: JSON.stringify([]), processingTimeMs: 0, success: true }; } const selectedToolNames = context.selectedTools?.map((st: any) => st.tool && st.tool.name).filter(Boolean) || []; const prompt = getPrompt('backgroundKnowledgeSelection', context.userQuery, context.mode, selectedToolNames, availableConcepts); const result = await this.callMicroTaskAI(prompt, context, 700); if (result.success) { const selections = this.safeParseJSON(result.content, []); if (Array.isArray(selections)) { context.backgroundKnowledge = selections.filter((sel: any) => sel.conceptName && availableConcepts.some((concept: any) => concept.name === sel.conceptName) ).map((sel: any) => ({ concept: availableConcepts.find((c: any) => c.name === sel.conceptName), relevance: sel.relevance })); this.addAuditEntry( context, 'micro-task', 'background-knowledge-selection', { availableConcepts: availableConcepts.length }, { selectedConcepts: context.backgroundKnowledge?.length || 0 }, context.backgroundKnowledge && context.backgroundKnowledge.length > 0 ? 75 : 40, Date.now() - result.processingTimeMs, {} ); } } return result; } private async generateFinalRecommendations(context: AnalysisContext): Promise { console.log('[AI-PIPELINE] Starting final recommendations micro-task'); const selectedToolNames = context.selectedTools?.map((st: any) => st.tool && st.tool.name).filter(Boolean) || []; const prompt = getPrompt('finalRecommendations', context.mode === 'workflow', context.userQuery, selectedToolNames); const result = await this.callMicroTaskAI(prompt, context, 350); return result; } private async callAI(prompt: string, maxTokens: number = 1500): Promise { const endpoint = this.config.endpoint; const apiKey = this.config.apiKey; const model = this.config.model; let headers: Record = { 'Content-Type': 'application/json' }; if (apiKey) { headers['Authorization'] = `Bearer ${apiKey}`; } const requestBody = { model, messages: [{ role: 'user', content: prompt }], max_tokens: maxTokens, temperature: 0.3 }; try { const response = await fetch(`${endpoint}/v1/chat/completions`, { method: 'POST', headers, body: JSON.stringify(requestBody) }); if (!response.ok) { const errorText = await response.text(); console.error('[AI-PIPELINE] AI API Error:', response.status, errorText); throw new Error(`AI API error: ${response.status} - ${errorText}`); } const data = await response.json(); const content = data.choices?.[0]?.message?.content; if (!content) { console.error('[AI-PIPELINE] No response content from AI model'); throw new Error('No response from AI model'); } return content; } catch (error) { console.error('[AI-PIPELINE] AI service call failed:', error.message); throw error; } } private derivePriorityFromScore(taskRelevance: number): string { if (taskRelevance >= 80) return 'high'; if (taskRelevance >= 60) return 'medium'; return 'low'; } private async performAISelection( filteredData: any, userQuery: string, mode: string, context: AnalysisContext ): Promise<{ tools: any[], concepts: any[] }> { const result = await this.aiSelectionWithFullData( userQuery, filteredData.tools, filteredData.concepts, mode, embeddingsService.isEnabled() ? 'embeddings_candidates' : 'full_dataset', context ); console.log('[AI-PIPELINE] AI selection complete:', result.selectedTools.length, 'tools,', result.selectedConcepts.length, 'concepts'); return { tools: result.selectedTools, concepts: result.selectedConcepts }; } async processQuery(userQuery: string, mode: string): Promise { const startTime = Date.now(); let completeTasks = 0; let failedTasks = 0; console.log('[AI-PIPELINE] Starting', mode, 'query processing'); // CLEAR AUDIT TRAIL for new analysis auditService.clearAuditTrail(); try { const toolsData = await getCompressedToolsDataForAI(); const context: AnalysisContext = { userQuery, mode, filteredData: {}, contextHistory: [], maxContextLength: this.maxContextTokens, currentContextLength: 0, seenToolNames: new Set(), embeddingsSimilarities: new Map(), aiSelectedTools: [], aiSelectedConcepts: [] }; const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode, context); const aiSelection = await this.performAISelection(filteredData, userQuery, mode, context); context.aiSelectedTools = aiSelection.tools; context.aiSelectedConcepts = aiSelection.concepts; context.filteredData = { tools: aiSelection.tools, concepts: aiSelection.concepts, domains: filteredData.domains, phases: filteredData.phases, 'domain-agnostic-software': filteredData['domain-agnostic-software'] }; this.addAuditEntry( context, 'initialization', 'pipeline-start', { userQuery, mode, toolsDataLoaded: !!toolsData }, { candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length }, 90, startTime, { auditEnabled: auditService.isEnabled() } ); const analysisResult = await this.analyzeScenario(context); if (analysisResult.success) completeTasks++; else failedTasks++; await this.delay(this.microTaskDelay); const approachResult = await this.generateApproach(context); if (approachResult.success) completeTasks++; else failedTasks++; await this.delay(this.microTaskDelay); const considerationsResult = await this.generateCriticalConsiderations(context); if (considerationsResult.success) completeTasks++; else failedTasks++; await this.delay(this.microTaskDelay); if (mode === 'workflow') { const phases = toolsData.phases || []; for (const phase of phases) { const toolSelectionResult = await this.selectToolsForPhase(context, phase); if (toolSelectionResult.success) completeTasks++; else failedTasks++; await this.delay(this.microTaskDelay); } await this.completeUnderrepresentedPhases(context, toolsData, userQuery); } else { const topTools = filteredData.tools.slice(0, 3); for (let i = 0; i < topTools.length; i++) { const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1); if (evaluationResult.success) completeTasks++; else failedTasks++; await this.delay(this.microTaskDelay); } } const knowledgeResult = await this.selectBackgroundKnowledge(context); if (knowledgeResult.success) completeTasks++; else failedTasks++; await this.delay(this.microTaskDelay); const finalResult = await this.generateFinalRecommendations(context); if (finalResult.success) completeTasks++; else failedTasks++; const recommendation = this.buildRecommendation(context, mode, finalResult.content); this.addAuditEntry( context, 'completion', 'pipeline-end', { completedTasks: completeTasks, failedTasks }, { finalRecommendation: !!recommendation, auditEntriesGenerated: auditService.getCurrentAuditTrail().length }, completeTasks > failedTasks ? 85 : 60, startTime, { totalProcessingTimeMs: Date.now() - startTime } ); const processingStats = { embeddingsUsed: embeddingsService.isEnabled(), candidatesFromEmbeddings: filteredData.tools.length, finalSelectedItems: (context.selectedTools?.length || 0) + (context.backgroundKnowledge?.length || 0), processingTimeMs: Date.now() - startTime, microTasksCompleted: completeTasks, microTasksFailed: failedTasks, contextContinuityUsed: true }; console.log('[AI-PIPELINE] Processing complete. Tasks completed:', completeTasks, 'failed:', failedTasks); // FINALIZE AUDIT TRAIL and get final trail const finalAuditTrail = auditService.finalizeAuditTrail(); return { recommendation: { ...recommendation, auditTrail: auditService.isEnabled() ? finalAuditTrail : undefined }, processingStats }; } catch (error) { console.error('[AI-PIPELINE] Processing failed:', error); throw error; } } private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any { const isWorkflow = mode === 'workflow'; console.log('[AI-PIPELINE] Building recommendation for', mode, 'mode with', context.selectedTools?.length || 0, 'tools'); if (context.selectedTools && context.selectedTools.length > 0) { const methods = context.selectedTools.filter((st: any) => st.tool && st.tool.type === 'method'); const software = context.selectedTools.filter((st: any) => st.tool && st.tool.type === 'software'); console.log('[AI-PIPELINE] Final selection breakdown:', methods.length, 'methods,', software.length, 'software'); console.log('[AI-PIPELINE] Method names:', methods.map((m: any) => m.tool.name).join(', ')); console.log('[AI-PIPELINE] Software names:', software.map((s: any) => s.tool.name).join(', ')); context.selectedTools.forEach((st: any, index: number) => { console.log('[AI-PIPELINE] Selected tool', index + 1, ':', st.tool.name, '(' + st.tool.type + ') - Phase:', st.phase, ', Priority:', st.priority); }); } else { console.warn('[AI-PIPELINE] WARNING: No tools in selectedTools array!'); } const base = { [isWorkflow ? 'scenario_analysis' : 'problem_analysis']: isWorkflow ? context.scenarioAnalysis : context.problemAnalysis, investigation_approach: context.investigationApproach, critical_considerations: context.criticalConsiderations, background_knowledge: context.backgroundKnowledge?.map((bk: any) => ({ concept_name: bk.concept.name, relevance: bk.relevance })) || [] }; if (isWorkflow) { const recommendedToolsWithConfidence = context.selectedTools?.map((st: any) => { const confidence = this.calculateRecommendationConfidence( st.tool, context, st.taskRelevance || 70, st.limitations || [] ); this.addAuditEntry( context, 'validation', 'confidence-scoring', { toolName: st.tool.name, toolType: st.tool.type, phase: st.phase }, { overall: confidence.overall, components: { semantic: confidence.semanticRelevance, suitability: confidence.taskSuitability, } }, confidence.overall, Date.now(), { uncertaintyCount: confidence.uncertaintyFactors.length, strengthCount: confidence.strengthIndicators.length } ); return { name: st.tool.name, type: st.tool.type, phase: st.phase, priority: st.priority, justification: st.justification || `Empfohlen für ${st.phase}`, confidence: confidence, recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' : confidence.overall >= this.confidenceConfig.mediumThreshold ? 'moderate' : 'weak' }; }) || []; return { ...base, recommended_tools: recommendedToolsWithConfidence, workflow_suggestion: finalContent }; } else { const recommendedToolsWithConfidence = context.selectedTools?.map((st: any) => { const confidence = this.calculateRecommendationConfidence( st.tool, context, st.taskRelevance || 70, st.limitations || [] ); this.addAuditEntry( context, 'validation', 'confidence-scoring', { toolName: st.tool.name, toolType: st.tool.type, rank: st.tool.evaluation?.rank || 1 }, { overall: confidence.overall, suitabilityAlignment: st.priority === 'high' && confidence.overall >= this.confidenceConfig.highThreshold }, confidence.overall, Date.now(), { strengthCount: confidence.strengthIndicators.length } ); return { name: st.tool.name, type: st.tool.type, rank: st.tool.evaluation?.rank || 1, suitability_score: st.priority, detailed_explanation: st.tool.evaluation?.detailed_explanation || '', implementation_approach: st.tool.evaluation?.implementation_approach || '', pros: st.tool.evaluation?.pros || [], cons: st.tool.evaluation?.limitations || [], alternatives: st.tool.evaluation?.alternatives || '', confidence: confidence, recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' : confidence.overall >= this.confidenceConfig.mediumThreshold ? 'moderate' : 'weak' }; }) || []; return { ...base, recommended_tools: recommendedToolsWithConfidence, additional_considerations: finalContent }; } } } const aiPipeline = new ImprovedMicroTaskAIPipeline(); export { aiPipeline, type AnalysisResult };