2025-08-11 12:02:56 +00:00
2 changed files with 208 additions and 0 deletions
--- a/src/config/prompts.ts
+++ b/src/config/prompts.ts
@@ -189,6 +189,47 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-S
 ]`;
  },
  generatePhaseCompletionPrompt(
    originalQuery: string,
    phase: any,
    candidateTools: any[],
    candidateConcepts: any[]
  ): string {
    return `Du bist ein DFIR-Experte. Die Phase "${phase.name}" ist in der aktuellen Analyse unterrepräsentiert.
 ORIGINAL ANFRAGE: "${originalQuery}"
 PHASE ZU VERVOLLSTÄNDIGEN: ${phase.name} - ${phase.description || ''}
 Wähle 1-2 BESTE Tools aus den gefundenen Kandidaten, die diese Phase optimal ergänzen:
 VERFÜGBARE TOOLS (${candidateTools.length}):
 ${candidateTools.map((tool: any) => `
 - ${tool.name} (${tool.type})
  Beschreibung: ${tool.description.slice(0, 120)}...
  Skill Level: ${tool.skillLevel}
 `).join('')}
 ${candidateConcepts.length > 0 ? `
 VERFÜGBARE KONZEPTE (${candidateConcepts.length}):
 ${candidateConcepts.map((concept: any) => `
 - ${concept.name}
  Beschreibung: ${concept.description.slice(0, 120)}...
 `).join('')}
 ` : ''}
 AUSWAHLREGELN:
 1. Wähle Tools, die die ${phase.name}-Phase der ursprünglichen Anfrage optimal ergänzen
 2. Priorisiere Tools, die zur Gesamtlösung beitragen
 3. Maximal 2 Tools für diese Phase
 ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
 {
  "selectedTools": ["ToolName1", "ToolName2"],
  "selectedConcepts": ["ConceptName1"],
  "reasoning": "Kurze Begründung der Auswahl für ${phase.name}"
 }`;
  },
  finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => {
    const focus = isWorkflow ? 
      'Workflow-Schritte, Best Practices, Objektivität' :
@@ -213,6 +254,7 @@ export function getPrompt(key: 'phaseToolSelection', userQuery: string, phase: a
 export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number, taskRelevance: number): string;
 export function getPrompt(key: 'backgroundKnowledgeSelection', userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]): string;
 export function getPrompt(key: 'finalRecommendations', isWorkflow: boolean, userQuery: string, selectedToolNames: string[]): string;
 export function getPrompt(key: 'generatePhaseCompletionPrompt', originalQuery: string, phase: any, candidateTools: any[], candidateConcepts: any[]): string;
 export function getPrompt(promptKey: keyof typeof AI_PROMPTS, ...args: any[]): string {
  try {
    const promptFunction = AI_PROMPTS[promptKey];
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -1096,6 +1096,168 @@ class ImprovedMicroTaskAIPipeline {
    return result;
  }
  private async completeUnderrepresentedPhases(
    context: AnalysisContext, 
    toolsData: any,
    originalQuery: string
  ): Promise<void> {
    const phases = toolsData.phases || [];
    const selectedPhases = new Map<string, number>();
    // Count tools per phase from current selection
    context.selectedTools?.forEach(st => {
      const count = selectedPhases.get(st.phase) || 0;
      selectedPhases.set(st.phase, count + 1);
    });
    console.log(`[AI PIPELINE] Phase coverage analysis:`);
    phases.forEach(phase => {
      const count = selectedPhases.get(phase.id) || 0;
      console.log(`[AI PIPELINE]   ${phase.id}: ${count} tools`);
    });
    // Define phase-specific semantic queries
    const phaseQueryTemplates = {
      'data-collection': 'forensic data acquisition imaging memory disk capture evidence collection',
      'examination': 'forensic analysis parsing extraction artifact examination file system',
      'analysis': 'forensic correlation timeline analysis pattern detection investigation',
      'reporting': 'forensic report documentation case management collaboration presentation findings'
    };
    // Identify underrepresented phases (0 tools = missing, 1 tool = underrepresented)
    const underrepresentedPhases = phases.filter(phase => {
      const count = selectedPhases.get(phase.id) || 0;
      return count <= 1; // Missing (0) or underrepresented (1)
    });
    if (underrepresentedPhases.length === 0) {
      console.log(`[AI PIPELINE] All phases adequately represented, no completion needed`);
      return;
    }
    console.log(`[AI PIPELINE] Underrepresented phases: ${underrepresentedPhases.map(p => p.id).join(', ')}`);
    // Process each underrepresented phase
    for (const phase of underrepresentedPhases) {
      await this.completePhaseWithSemanticSearch(context, phase, phaseQueryTemplates, toolsData, originalQuery);
      await this.delay(this.microTaskDelay);
    }
  }
  private async completePhaseWithSemanticSearch(
    context: AnalysisContext,
    phase: any,
    phaseQueryTemplates: Record<string, string>,
    toolsData: any,
    originalQuery: string
  ): Promise<void> {
    const phaseStart = Date.now();
    // Generate phase-specific semantic query
    const phaseQuery = phaseQueryTemplates[phase.id] || `forensic ${phase.name.toLowerCase()} tools methods`;
    console.log(`[AI PIPELINE] Completing phase ${phase.id} with query: "${phaseQuery}"`);
    try {
      // Run semantic search with phase-specific query
      const phaseResults = await embeddingsService.findSimilar(
        phaseQuery,
        20, // Smaller set for phase completion
        0.2  // Lower threshold for more results
      );
      if (phaseResults.length === 0) {
        console.log(`[AI PIPELINE] No semantic results for phase ${phase.id}`);
        return;
      }
      // Filter to tools that actually belong to this phase
      const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
      const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
      const phaseTools = phaseResults
        .filter(result => result.type === 'tool')
        .map(result => toolsMap.get(result.name))
        .filter((tool): tool is any => 
          tool !== undefined && 
          tool.phases && 
          tool.phases.includes(phase.id) &&
          !context.seenToolNames.has(tool.name) // Don't re-select already chosen tools
        )
        .slice(0, 5); // Top 5 candidates for this phase
      const phaseConcepts = phaseResults
        .filter(result => result.type === 'concept')
        .map(result => conceptsMap.get(result.name))
        .filter((concept): concept is any => concept !== undefined)
        .slice(0, 2); // Top 2 concepts
      console.log(`[AI PIPELINE] Phase ${phase.id} semantic search found: ${phaseTools.length} tools, ${phaseConcepts.length} concepts`);
      if (phaseTools.length === 0) {
        console.log(`[AI PIPELINE] No suitable tools found for phase ${phase.id} after filtering`);
        return;
      }
      const prompt = AI_PROMPTS.generatePhaseCompletionPrompt(originalQuery, phase, phaseTools, phaseConcepts);
      const response = await this.callAI(prompt, 800);
      const selection = this.safeParseJSON(response, { selectedTools: [], selectedConcepts: [] });
      const validTools = selection.selectedTools
        .map(name => phaseTools.find(t => t.name === name))
        .filter((tool): tool is any => tool !== undefined)
        .slice(0, 2); 
      validTools.forEach(tool => {
        console.log(`[AI PIPELINE] Adding phase completion tool: ${tool.name} for ${phase.id}`);
        this.addToolToSelection(
          context,
          tool,
          phase.id,
          'medium', // Phase completion tools get medium priority
          `Hinzugefügt zur Vervollständigung der ${phase.name}-Phase`,
          75, // Good relevance for phase-specific search
          ['Via phasenspezifische semantische Suche hinzugefügt']
        );
      });
      // Audit the phase completion
      this.addAuditEntry(context, 'validation', 'phase-completion',
        { 
          phase: phase.id, 
          phaseQuery, 
          candidatesFound: phaseTools.length,
          originalQuery: originalQuery.slice(0, 100) + '...'
        },
        { 
          toolsAdded: validTools.length,
          addedTools: validTools.map(t => t.name),
          semanticResults: phaseResults.length
        },
        validTools.length > 0 ? 80 : 40,
        phaseStart,
        { 
          phaseCompletion: true, 
          semanticSearch: true,
          originalQueryBias: true 
        }
      );
    } catch (error) {
      console.error(`[AI PIPELINE] Phase completion failed for ${phase.id}:`, error);
      this.addAuditEntry(context, 'validation', 'phase-completion-failed',
        { phase: phase.id, phaseQuery },
        { error: error.message },
        10,
        phaseStart,
        { phaseCompletion: true, failed: true }
      );
    }
  }
  private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
    const existingSelection = context.selectedTools?.find(st => st.tool.name === tool.name);
    const taskRelevance = existingSelection?.taskRelevance || 70;
@@ -1373,6 +1535,10 @@ class ImprovedMicroTaskAIPipeline {
          if (toolSelectionResult.success) completeTasks++; else failedTasks++;
          await this.delay(this.microTaskDelay);
        }
        console.log('[AI PIPELINE] Checking for underrepresented phases...');
        await this.completeUnderrepresentedPhases(context, toolsData, userQuery);
      } else {
        const topTools = filteredData.tools.slice(0, 3);
        for (let i = 0; i < topTools.length; i++) {