From 3c6fb568d65da39f08b9422bbcbec95a678e3e4e Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Fri, 8 Aug 2025 22:54:47 +0200 Subject: [PATCH] fix pipeline --- src/config/prompts.ts | 42 ++++++++++ src/utils/aiPipeline.ts | 166 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 208 insertions(+) diff --git a/src/config/prompts.ts b/src/config/prompts.ts index bc51f4e..a75df21 100644 --- a/src/config/prompts.ts +++ b/src/config/prompts.ts @@ -189,6 +189,47 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-S ]`; }, + generatePhaseCompletionPrompt( + originalQuery: string, + phase: any, + candidateTools: any[], + candidateConcepts: any[] + ): string { + return `Du bist ein DFIR-Experte. Die Phase "${phase.name}" ist in der aktuellen Analyse unterrepräsentiert. + +ORIGINAL ANFRAGE: "${originalQuery}" +PHASE ZU VERVOLLSTÄNDIGEN: ${phase.name} - ${phase.description || ''} + +Wähle 1-2 BESTE Tools aus den gefundenen Kandidaten, die diese Phase optimal ergänzen: + +VERFÜGBARE TOOLS (${candidateTools.length}): +${candidateTools.map((tool: any) => ` +- ${tool.name} (${tool.type}) + Beschreibung: ${tool.description.slice(0, 120)}... + Skill Level: ${tool.skillLevel} +`).join('')} + +${candidateConcepts.length > 0 ? ` +VERFÜGBARE KONZEPTE (${candidateConcepts.length}): +${candidateConcepts.map((concept: any) => ` +- ${concept.name} + Beschreibung: ${concept.description.slice(0, 120)}... +`).join('')} +` : ''} + +AUSWAHLREGELN: +1. Wähle Tools, die die ${phase.name}-Phase der ursprünglichen Anfrage optimal ergänzen +2. Priorisiere Tools, die zur Gesamtlösung beitragen +3. Maximal 2 Tools für diese Phase + +ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT: +{ + "selectedTools": ["ToolName1", "ToolName2"], + "selectedConcepts": ["ConceptName1"], + "reasoning": "Kurze Begründung der Auswahl für ${phase.name}" +}`; + }, + finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => { const focus = isWorkflow ? 'Workflow-Schritte, Best Practices, Objektivität' : @@ -213,6 +254,7 @@ export function getPrompt(key: 'phaseToolSelection', userQuery: string, phase: a export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number, taskRelevance: number): string; export function getPrompt(key: 'backgroundKnowledgeSelection', userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]): string; export function getPrompt(key: 'finalRecommendations', isWorkflow: boolean, userQuery: string, selectedToolNames: string[]): string; +export function getPrompt(key: 'generatePhaseCompletionPrompt', originalQuery: string, phase: any, candidateTools: any[], candidateConcepts: any[]): string; export function getPrompt(promptKey: keyof typeof AI_PROMPTS, ...args: any[]): string { try { const promptFunction = AI_PROMPTS[promptKey]; diff --git a/src/utils/aiPipeline.ts b/src/utils/aiPipeline.ts index cceec47..2ae0fd4 100644 --- a/src/utils/aiPipeline.ts +++ b/src/utils/aiPipeline.ts @@ -1096,6 +1096,168 @@ class ImprovedMicroTaskAIPipeline { return result; } + private async completeUnderrepresentedPhases( + context: AnalysisContext, + toolsData: any, + originalQuery: string + ): Promise { + const phases = toolsData.phases || []; + const selectedPhases = new Map(); + + // Count tools per phase from current selection + context.selectedTools?.forEach(st => { + const count = selectedPhases.get(st.phase) || 0; + selectedPhases.set(st.phase, count + 1); + }); + + console.log(`[AI PIPELINE] Phase coverage analysis:`); + phases.forEach(phase => { + const count = selectedPhases.get(phase.id) || 0; + console.log(`[AI PIPELINE] ${phase.id}: ${count} tools`); + }); + + // Define phase-specific semantic queries + const phaseQueryTemplates = { + 'data-collection': 'forensic data acquisition imaging memory disk capture evidence collection', + 'examination': 'forensic analysis parsing extraction artifact examination file system', + 'analysis': 'forensic correlation timeline analysis pattern detection investigation', + 'reporting': 'forensic report documentation case management collaboration presentation findings' + }; + + // Identify underrepresented phases (0 tools = missing, 1 tool = underrepresented) + const underrepresentedPhases = phases.filter(phase => { + const count = selectedPhases.get(phase.id) || 0; + return count <= 1; // Missing (0) or underrepresented (1) + }); + + if (underrepresentedPhases.length === 0) { + console.log(`[AI PIPELINE] All phases adequately represented, no completion needed`); + return; + } + + console.log(`[AI PIPELINE] Underrepresented phases: ${underrepresentedPhases.map(p => p.id).join(', ')}`); + + // Process each underrepresented phase + for (const phase of underrepresentedPhases) { + await this.completePhaseWithSemanticSearch(context, phase, phaseQueryTemplates, toolsData, originalQuery); + await this.delay(this.microTaskDelay); + } + } + + private async completePhaseWithSemanticSearch( + context: AnalysisContext, + phase: any, + phaseQueryTemplates: Record, + toolsData: any, + originalQuery: string + ): Promise { + const phaseStart = Date.now(); + + // Generate phase-specific semantic query + const phaseQuery = phaseQueryTemplates[phase.id] || `forensic ${phase.name.toLowerCase()} tools methods`; + + console.log(`[AI PIPELINE] Completing phase ${phase.id} with query: "${phaseQuery}"`); + + try { + // Run semantic search with phase-specific query + const phaseResults = await embeddingsService.findSimilar( + phaseQuery, + 20, // Smaller set for phase completion + 0.2 // Lower threshold for more results + ); + + if (phaseResults.length === 0) { + console.log(`[AI PIPELINE] No semantic results for phase ${phase.id}`); + return; + } + + // Filter to tools that actually belong to this phase + const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool])); + const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept])); + + const phaseTools = phaseResults + .filter(result => result.type === 'tool') + .map(result => toolsMap.get(result.name)) + .filter((tool): tool is any => + tool !== undefined && + tool.phases && + tool.phases.includes(phase.id) && + !context.seenToolNames.has(tool.name) // Don't re-select already chosen tools + ) + .slice(0, 5); // Top 5 candidates for this phase + + const phaseConcepts = phaseResults + .filter(result => result.type === 'concept') + .map(result => conceptsMap.get(result.name)) + .filter((concept): concept is any => concept !== undefined) + .slice(0, 2); // Top 2 concepts + + console.log(`[AI PIPELINE] Phase ${phase.id} semantic search found: ${phaseTools.length} tools, ${phaseConcepts.length} concepts`); + + if (phaseTools.length === 0) { + console.log(`[AI PIPELINE] No suitable tools found for phase ${phase.id} after filtering`); + return; + } + + const prompt = AI_PROMPTS.generatePhaseCompletionPrompt(originalQuery, phase, phaseTools, phaseConcepts); + + const response = await this.callAI(prompt, 800); + const selection = this.safeParseJSON(response, { selectedTools: [], selectedConcepts: [] }); + + const validTools = selection.selectedTools + .map(name => phaseTools.find(t => t.name === name)) + .filter((tool): tool is any => tool !== undefined) + .slice(0, 2); + + validTools.forEach(tool => { + console.log(`[AI PIPELINE] Adding phase completion tool: ${tool.name} for ${phase.id}`); + + this.addToolToSelection( + context, + tool, + phase.id, + 'medium', // Phase completion tools get medium priority + `Hinzugefügt zur Vervollständigung der ${phase.name}-Phase`, + 75, // Good relevance for phase-specific search + ['Via phasenspezifische semantische Suche hinzugefügt'] + ); + }); + + // Audit the phase completion + this.addAuditEntry(context, 'validation', 'phase-completion', + { + phase: phase.id, + phaseQuery, + candidatesFound: phaseTools.length, + originalQuery: originalQuery.slice(0, 100) + '...' + }, + { + toolsAdded: validTools.length, + addedTools: validTools.map(t => t.name), + semanticResults: phaseResults.length + }, + validTools.length > 0 ? 80 : 40, + phaseStart, + { + phaseCompletion: true, + semanticSearch: true, + originalQueryBias: true + } + ); + + } catch (error) { + console.error(`[AI PIPELINE] Phase completion failed for ${phase.id}:`, error); + + this.addAuditEntry(context, 'validation', 'phase-completion-failed', + { phase: phase.id, phaseQuery }, + { error: error.message }, + 10, + phaseStart, + { phaseCompletion: true, failed: true } + ); + } + } + private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise { const existingSelection = context.selectedTools?.find(st => st.tool.name === tool.name); const taskRelevance = existingSelection?.taskRelevance || 70; @@ -1373,6 +1535,10 @@ class ImprovedMicroTaskAIPipeline { if (toolSelectionResult.success) completeTasks++; else failedTasks++; await this.delay(this.microTaskDelay); } + + console.log('[AI PIPELINE] Checking for underrepresented phases...'); + await this.completeUnderrepresentedPhases(context, toolsData, userQuery); + } else { const topTools = filteredData.tools.slice(0, 3); for (let i = 0; i < topTools.length; i++) {