2025-08-02 09:59:35 +00:00
8 changed files with 291 additions and 117451 deletions
--- a/.astro/data-store.json
+++ b/.astro/data-store.json
--- a/.gitignore
+++ b/.gitignore
@@ -85,3 +85,4 @@ temp/
 .astro/data-store.json
 .astro/content.d.ts
 prompt.md
 data/embeddings.json
--- a/data/embeddings.json
+++ b/data/embeddings.json
--- a/package.json
+++ b/package.json
@@ -14,6 +14,7 @@
    "astro": "^5.12.3",
    "cookie": "^1.0.2",
    "dotenv": "^16.4.5",
    "hnswlib-node": "^3.0.0",
    "jose": "^5.2.0",
    "js-yaml": "^4.1.0",
    "jsonwebtoken": "^9.0.2",
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -1,6 +1,8 @@
-// src/utils/aiPipeline.ts - ENHANCED with improved forensics prompts
+// src/utils/aiPipeline.ts
 import { getCompressedToolsDataForAI } from './dataService.js';
 import { embeddingsService, type EmbeddingData } from './embeddings.js';
 import { vectorIndex } from "./vectorIndex.js";
 interface AIConfig {
  endpoint: string;
@@ -8,12 +10,6 @@ interface AIConfig {
  model: string;
 }
 interface SelectionResult {
  selectedTools: string[];
  selectedConcepts: string[];
  reasoning: string;
 }
 interface MicroTaskResult {
  taskType: string;
  content: string;
@@ -31,15 +27,19 @@ interface AnalysisResult {
    processingTimeMs: number;
    microTasksCompleted: number;
    microTasksFailed: number;
-    parallelTasksUsed: boolean;
+    contextContinuityUsed: boolean;
  };
 }
-// Context object that gets built up through the pipeline
+// Context object that builds up through pipeline
 interface AnalysisContext {
  userQuery: string;
  mode: string;
  filteredData: any;
  // Context continuity 
  contextHistory: string[];
  // Results
  scenarioAnalysis?: string;
  problemAnalysis?: string;
  investigationApproach?: string;
@@ -48,120 +48,175 @@ interface AnalysisContext {
  backgroundKnowledge?: Array<{concept: any, relevance: string}>;
 }
-class MicroTaskAIPipeline {
+/**
-  private selectorConfig: AIConfig;
+ * Improved DFIR micro‑task pipeline – 2025‑08‑01 revision (bug‑fixed)
-  private analyzerConfig: AIConfig;
+ */
 class ImprovedMicroTaskAIPipeline {
  private config: AIConfig;
  private maxSelectedItems: number;
  private embeddingCandidates: number;
  private similarityThreshold: number;
  private microTaskDelay: number;
  constructor() {
-    this.selectorConfig = {
+    this.config = {
      endpoint: this.getEnv('AI_SELECTOR_ENDPOINT'),
      apiKey: this.getEnv('AI_SELECTOR_API_KEY'),
      model: this.getEnv('AI_SELECTOR_MODEL')
    };
    this.analyzerConfig = {
      endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
      apiKey: this.getEnv('AI_ANALYZER_API_KEY'),
      model: this.getEnv('AI_ANALYZER_MODEL')
    };
-    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '15', 10);
+    // Candidate selection tuned for higher precision
-    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '30', 10);
+    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
-    this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
+    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '40', 10);
    this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.5');
    this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
  }
  private getEnv(key: string): string {
    const value = process.env[key];
-    if (!value) {
+    if (!value) throw new Error(`Missing environment variable: ${key}`);
      throw new Error(`Missing environment variable: ${key}`);
    }
    return value;
  }
-  private async delay(ms: number): Promise<void> {
+  /** Embedding → LLM blended selector */
-    return new Promise(resolve => setTimeout(resolve, ms));
+  private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
-  }
+    const candidateTools = new Set<string>();
    const candidateConcepts = new Set<string>();
-  private async callMicroTaskAI(prompt: string, maxTokens: number = 300): Promise<MicroTaskResult> {
+    if (embeddingsService.isEnabled()) {
-    const startTime = Date.now();
+      const similarItems = await vectorIndex.findSimilar(userQuery, this.embeddingCandidates);
-    try {
+      similarItems.forEach(item => {
-      const response = await fetch(`${this.analyzerConfig.endpoint}/v1/chat/completions`, {
+        if (item.type === 'tool') candidateTools.add(item.name);
-        method: 'POST',
+        if (item.type === 'concept') candidateConcepts.add(item.name);
        headers: {
          'Content-Type': 'application/json',
          'Authorization': `Bearer ${this.analyzerConfig.apiKey}`
        },
        body: JSON.stringify({
          model: this.analyzerConfig.model,
          messages: [{ role: 'user', content: prompt }],
          max_tokens: maxTokens,
          temperature: 0.2,
          // Enhanced: Better parameters for consistent forensics output
          top_p: 0.9,
          frequency_penalty: 0.1,
          presence_penalty: 0.1
        })
      });
-      if (!response.ok) {
+      console.log(`[PIPELINE] Embedding hits → ${candidateTools.size} tools / ${candidateConcepts.size} concepts`);
        const errorText = await response.text();
        throw new Error(`AI API error: ${response.status} - ${errorText}`);
    }
-      const data = await response.json();
+    const reducedData = {
-      const content = data.choices?.[0]?.message?.content;
+      ...toolsData,
-      
+      tools: candidateTools.size ? toolsData.tools.filter((t: any) => candidateTools.has(t.name)) : toolsData.tools,
-      if (!content) {
+      concepts: candidateConcepts.size ? toolsData.concepts.filter((c: any) => candidateConcepts.has(c.name)) : toolsData.concepts
        throw new Error('No response from AI model');
      }
      return {
        taskType: 'micro-task',
        content: content.trim(),
        processingTimeMs: Date.now() - startTime,
        success: true
    };
-    } catch (error) {
+    return this.aiSelection(userQuery, reducedData, mode);
  }
  /** Language‑model based selector (no 50‑item cap) */
  private async aiSelection(userQuery: string, toolsData: any, mode: string) {
    const toolsList = toolsData.tools.map((tool: any) => ({
      name: tool.name,
      type: tool.type,
      description: tool.description.slice(0, 200) + '...',
      domains: tool.domains,
      phases: tool.phases,
      tags: tool.tags?.slice(0, 5) || [],
      skillLevel: tool.skillLevel
    }));
    const conceptsList = toolsData.concepts.map((concept: any) => ({
      name: concept.name,
      type: 'concept',
      description: concept.description.slice(0, 200) + '...',
      domains: concept.domains,
      phases: concept.phases,
      tags: concept.tags?.slice(0, 5) || []
    }));
    const modeInstruction =
      mode === 'workflow'
        ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases.'
        : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem.';
    const prompt = `You are a DFIR expert tasked with selecting the most relevant tools and concepts for a user query.
 ${modeInstruction}
 AVAILABLE TOOLS:
 ${JSON.stringify(toolsList, null, 2)}
 AVAILABLE CONCEPTS:
 ${JSON.stringify(conceptsList, null, 2)}
 USER QUERY: "${userQuery}"
 Select the most relevant items (max ${this.maxSelectedItems} total). For workflow mode, prioritize breadth across phases. For tool mode, prioritize specificity and direct relevance.
 Respond with ONLY this JSON format:
 {
  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
  "reasoning": "Brief explanation of selection criteria and approach"
 }`;
    try {
      const response = await this.callAI(prompt, 1500);
      const cleaned = response.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
      const result = JSON.parse(cleaned);
      if (!Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
        throw new Error('Invalid selection result structure');
      }
      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
      if (totalSelected > this.maxSelectedItems) {
        console.warn(`[PIPELINE] Selection exceeded limit (${totalSelected}), truncating`);
        result.selectedTools = result.selectedTools.slice(0, Math.floor(this.maxSelectedItems * 0.8));
        result.selectedConcepts = result.selectedConcepts.slice(0, Math.ceil(this.maxSelectedItems * 0.2));
      }
      console.log(`[PIPELINE] LLM selector → ${result.selectedTools.length} tools / ${result.selectedConcepts.length} concepts`);
      return {
-        taskType: 'micro-task',
+        tools: toolsData.tools.filter((tool: any) => result.selectedTools.includes(tool.name)),
-        content: '',
+        concepts: toolsData.concepts.filter((concept: any) => result.selectedConcepts.includes(concept.name)),
-        processingTimeMs: Date.now() - startTime,
+        domains: toolsData.domains,
-        success: false,
+        phases: toolsData.phases,
-        error: error.message
+        'domain-agnostic-software': toolsData['domain-agnostic-software']
      };
    } catch (err) {
      console.error('[PIPELINE] Failed to parse selector response');
      throw new Error('Invalid JSON response from selector AI');
    }
  }
-  // ENHANCED MICRO-TASK 1: Scenario/Problem Analysis with improved forensics methodology
+  private delay(ms: number) { return new Promise(res => setTimeout(res, ms)); }
  private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens = 300): Promise<MicroTaskResult> {
    const start = Date.now();
    const contextPrompt = context.contextHistory.length
      ? `BISHERIGE ANALYSE:\n${context.contextHistory.join('\n\n')}\n\nAKTUELLE AUFGABE:\n${prompt}`
      : prompt;
    try {
      const response = await this.callAI(contextPrompt, maxTokens);
      return { taskType: 'micro-task', content: response.trim(), processingTimeMs: Date.now() - start, success: true };
    } catch (e) {
      return { taskType: 'micro-task', content: '', processingTimeMs: Date.now() - start, success: false, error: (e as Error).message };
    }
  }
  // FIXED: Restore original micro-task structure with context continuity
  // MICRO-TASK 1: Scenario/Problem Analysis
  private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';
-    const prompt = `Sie sind ein erfahrener DFIR-Experte mit Spezialisierung auf Objektivität und wissenschaftliche Methoden. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
+    const prompt = `Sie sind ein erfahrener DFIR-Experte. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
 ${isWorkflow ? 'FORENSISCHES SZENARIO' : 'TECHNISCHES PROBLEM'}: "${context.userQuery}"
 Führen Sie eine systematische ${isWorkflow ? 'Szenario-Analyse' : 'Problem-Analyse'} durch und berücksichtigen Sie dabei:
 ${isWorkflow ? 
-  `- Angriffsvektoren und Bedrohungsmodellierung nach MITRE ATT&CK
+  `- Auf das Szenario bezogene Problemstellungen` :
- Betroffene Systeme und kritische Infrastrukturen (ICS/SCADA, AD, Endpoints)
+  `- konkrete problembezogene Aufgabenstellung`
 - Zeitkritische Faktoren und Beweiserhaltung (Chain of Custody)
 - Forensische Artefakte und Datenquellen (Logs, Memory, Disk, Network)` :
  `- Spezifische forensische Herausforderungen
 - Verfügbare Datenquellen und deren Integrität
 - Methodische Anforderungen für rechtssichere Analyse`
 }
-WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Verwenden Sie Fachterminologie und fundierte Methodik. Maximum 150 Wörter.`;
+WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Maximum 150 Wörter.`;
-    const result = await this.callMicroTaskAI(prompt, 220);
+    const result = await this.callMicroTaskAI(prompt, context, 220);
    if (result.success) {
      if (isWorkflow) {
@@ -169,80 +224,71 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählun
      } else {
        context.problemAnalysis = result.content;
      }
      // ADDED: Build context history
      context.contextHistory.push(`${isWorkflow ? 'Szenario' : 'Problem'}-Analyse: ${result.content.slice(0, 200)}...`);
    }
    return result;
  }
-  // ENHANCED MICRO-TASK 2: Investigation/Solution Approach with forensics methodology
+  // MICRO-TASK 2: Investigation/Solution Approach
  private async generateApproach(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';
    const analysis = isWorkflow ? context.scenarioAnalysis : context.problemAnalysis;
-    const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} nach NIST SP 800-86 Methodik.
+    const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'}.
 FORENSISCHE ANALYSE: "${analysis}"
 ${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
 Entwickeln Sie einen systematischen ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} unter Berücksichtigung von:
 ${isWorkflow ?
-  `- Triage-Prioritäten nach forensischer Dringlichkeit (volatile vs. persistent evidence)
+  `- Triage-Prioritäten nach forensischer Dringlichkeit (wenn zutreffend)
- Phasenabfolge nach NIST-Methodik (Collection → Examination → Analysis → Reporting)
+- Phasenabfolge nach NIST SP 800-86-Methodik (Datensammlung - Auswertung - Analyse - Report)` :
- Kontaminationsvermeidung und forensische Isolierung` :
+  `- pragmatischer, zielorientierter Lösungsansatz im benehmen mit Anforderungen an die Reproduzierbarkeit`
  `- Methodik-Auswahl nach wissenschaftlichen Kriterien
 - Validierung und Verifizierung der gewählten Ansätze
 - Qualitätssicherung und Reproduzierbarkeit
 - Integration in bestehende forensische Workflows`
 }
-WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Verwenden Sie forensische Fachterminologie. Maximum 150 Wörter.`;
+WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 150 Wörter.`;
-    const result = await this.callMicroTaskAI(prompt, 220);
+    const result = await this.callMicroTaskAI(prompt, context, 220);
    if (result.success) {
      context.investigationApproach = result.content;
      context.contextHistory.push(`${isWorkflow ? 'Untersuchungs' : 'Lösungs'}ansatz: ${result.content.slice(0, 200)}...`);
    }
    return result;
  }
-  // ENHANCED MICRO-TASK 3: Critical Considerations with forensics focus
+  // MICRO-TASK 3: Critical Considerations
  private async generateCriticalConsiderations(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';
-    const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall basierend auf bewährten DFIR-Praktiken.
+    const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall.
 ${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
 ANSATZ: "${context.investigationApproach}"
-Berücksichtigen Sie folgende forensische Aspekte:
+Berücksichtigen Sie folgende Aspekte:
 ${isWorkflow ?
-  `- Time-sensitive evidence preservation (RAM, log rotation, network captures)
+  `- Szenariobezogene typische Problemstellungen, die auftreten können` :
- Chain of custody requirements und rechtliche Verwertbarkeit
+  `- Problembezogene Schwierigkeiten, die das Ergebnis negativ beeinträchtigen könnten`
 - Incident containment vs. evidence preservation Dilemma
 - Cross-contamination risks zwischen verschiedenen Systemen
 - Privacy- und Compliance-Anforderungen (DSGVO, sector-specific regulations)` :
  `- Tool-Validierung und Nachvollziehbarkeit
 - False positive/negative Risiken bei der gewählten Methodik
 - Methodische Limitationen und deren Auswirkungen
 - Qualifikationsanforderungen für die Durchführung
 - Dokumentations- und Reporting-Standards`
 }
 WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.`;
-    const result = await this.callMicroTaskAI(prompt, 180);
+    const result = await this.callMicroTaskAI(prompt, context, 180);
    if (result.success) {
      context.criticalConsiderations = result.content;
      context.contextHistory.push(`Kritische Überlegungen: ${result.content.slice(0, 200)}...`);
    }
    return result;
  }
-  // ENHANCED MICRO-TASK 4: Tool Selection with forensics validation
+  // MICRO-TASK 4: Tool Selection for Phase (Workflow mode)
  private async selectToolsForPhase(context: AnalysisContext, phase: any): Promise<MicroTaskResult> {
    const phaseTools = context.filteredData.tools.filter((tool: any) => 
      tool.phases && tool.phases.includes(phase.id)
@@ -260,29 +306,25 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
    const prompt = `Wählen Sie 2-3 Methoden/Tools für die Phase "${phase.name}" basierend auf objektiven, fallbezogenen Kriterien.
 SZENARIO: "${context.userQuery}"
 FORENSISCHE ANALYSE: "${context.scenarioAnalysis}"
 VERFÜGBARE TOOLS FÜR ${phase.name.toUpperCase()}:
 ${phaseTools.map((tool: any) => `- ${tool.name}: ${tool.description.slice(0, 100)}...`).join('\n')}
-Wählen Sie Methoden/Tools nach folgenden forensischen Kriterien aus:
+Wählen Sie Methoden/Tools nach forensischen Kriterien aus:
- Court admissibility und Chain of Custody Kompatibilität  
+- Eignung für die spezifische Lösung des Problems
- False positive/negative Raten bei ähnlichen Szenarien
+- besondere Fähigkeiten der Methode/des Tools, das sie von anderen abgrenzt
- Integration in forensische Standard-Workflows
+- Reproduzierbarkeit und Objektivität
 - Reproduzierbarkeit und Dokumentationsqualität
 - Transparenter Untersuchungsprozess
 - Objektivität
 Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
 [
  {
    "toolName": "Exakter Methoden/Tool-Name",
    "priority": "high|medium|low", 
-    "justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist als vergleichbare Methoden/Tools"
+    "justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist"
  }
 ]`;
-    const result = await this.callMicroTaskAI(prompt, 450);
+    const result = await this.callMicroTaskAI(prompt, context, 450);
    if (result.success) {
      try {
@@ -307,7 +349,7 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
        });
      } catch (parseError) {
-        console.warn(`[MICRO-TASK] Failed to parse tool selection for ${phase.name}:`, result.content);
+        console.warn(`[IMPROVED PIPELINE] Failed to parse tool selection for ${phase.name}:`, result.content.slice(0, 200));
        return {
          ...result,
          success: false,
@@ -319,13 +361,11 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
    return result;
  }
-  // ENHANCED MICRO-TASK 5: Tool Evaluation with scientific methodology
+  // MICRO-TASK 5: Tool Evaluation (Tool mode)
  private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
-    const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem nach forensischen Qualitätskriterien.
+    const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem.
 PROBLEM: "${context.userQuery}"
 PROBLEM-ANALYSE: "${context.problemAnalysis}"
 LÖSUNGSANSATZ: "${context.investigationApproach}"
 TOOL: ${tool.name}
 BESCHREIBUNG: ${tool.description}
@@ -335,14 +375,14 @@ SKILL LEVEL: ${tool.skillLevel}
 Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 {
  "suitability_score": "high|medium|low",
-  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst, basierend auf objektiven, pragmatischen Kriterien",
+  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
  "implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
  "pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
  "cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
  "alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
 }`;
-    const result = await this.callMicroTaskAI(prompt, 650);
+    const result = await this.callMicroTaskAI(prompt, context, 650);
    if (result.success) {
      try {
@@ -362,7 +402,7 @@ Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit d
        });
      } catch (parseError) {
-        console.warn(`[MICRO-TASK] Failed to parse tool evaluation for ${tool.name}:`, result.content);
+        console.warn(`[IMPROVED PIPELINE] Failed to parse tool evaluation for ${tool.name}:`, result.content.slice(0, 200));
        return {
          ...result,
          success: false,
@@ -374,7 +414,7 @@ Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit d
    return result;
  }
-  // ENHANCED MICRO-TASK 6: Background Knowledge with forensics context
+  // MICRO-TASK 6: Background Knowledge
  private async selectBackgroundKnowledge(context: AnalysisContext): Promise<MicroTaskResult> {
    const availableConcepts = context.filteredData.concepts;
@@ -397,17 +437,17 @@ EMPFOHLENE TOOLS: ${selectedToolNames.join(', ')}
 VERFÜGBARE KONZEPTE:
 ${availableConcepts.slice(0, 15).map((concept: any) => `- ${concept.name}: ${concept.description.slice(0, 80)}...`).join('\n')}
-Wählen Sie 2-4 Konzepte aus, die für das Verständnis der forensischen Methodik und der empfohlenen Tools essentiell sind.
+Wählen Sie 2-4 Konzepte aus, die für die Lösung des Problems essentiell sind.
 Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 [
  {
    "conceptName": "Exakter Konzept-Name",
-    "relevance": "Forensische Relevanz: Warum dieses Konzept für das Verständnis der Methodik/Tools kritisch ist"
+    "relevance": "Forensische Relevanz: Warum dieses Konzept für die Lösung des Problems kritisch ist"
  }
 ]`;
-    const result = await this.callMicroTaskAI(prompt, 400);
+    const result = await this.callMicroTaskAI(prompt, context, 400);
    if (result.success) {
      try {
@@ -421,7 +461,7 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
        }));
      } catch (parseError) {
-        console.warn('[MICRO-TASK] Failed to parse background knowledge selection:', result.content);
+        console.warn('[IMPROVED PIPELINE] Failed to parse background knowledge selection:', result.content.slice(0, 200));
        return {
          ...result,
          success: false,
@@ -433,82 +473,85 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
    return result;
  }
-  // ENHANCED MICRO-TASK 7: Final Recommendations with forensics methodology
+  // MICRO-TASK 7: Final Recommendations
  private async generateFinalRecommendations(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';
    const prompt = isWorkflow ? 
-      `Erstellen Sie eine forensisch fundierte Workflow-Empfehlung basierend auf DFIR-Prinzipien un pragmatischen Aspekten.
+      `Erstellen Sie eine forensisch fundierte Workflow-Empfehlung unter Anwendung der gewählten Methoden/Tools.
 SZENARIO: "${context.userQuery}"
 AUSGEWÄHLTE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Tools ausgewählt'}
-Erstellen Sie konkrete methodische Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung forensischer Best Practices, Objektivität und rechtlicher Verwertbarkeit.
+Erstellen Sie konkrete Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung von Objektivität und rechtlicher Verwertbarkeit (Reproduzierbarkeit, Transparenz).
 WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.` :
-      `Erstellen Sie wichtige methodische Überlegungen für die korrekte Methoden-/Tool-Anwendung.
+      `Erstellen Sie wichtige Überlegungen für die korrekte Methoden-/Tool-Anwendung.
 PROBLEM: "${context.userQuery}"
 EMPFOHLENE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Methoden/Tools ausgewählt'}
-Geben Sie kritische methodische Überlegungen, Validierungsanforderungen und Qualitätssicherungsmaßnahmen für die korrekte Anwendung der empfohlenen Methoden/Tools.
+Geben Sie kritische Überlegungen für die korrekte Anwendung der empfohlenen Methoden/Tools.
 WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 100 Wörter.`;
-    const result = await this.callMicroTaskAI(prompt, 180);
+    const result = await this.callMicroTaskAI(prompt, context, 180);
    return result;
  }
-  // Main processing pipeline with micro-tasks (unchanged structure)
+  // Helper method for AI calls
  private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
    const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Authorization': `Bearer ${this.config.apiKey}`
      },
      body: JSON.stringify({
        model: this.config.model,
        messages: [{ role: 'user', content: prompt }],
        max_tokens: maxTokens,
        temperature: 0.3
      })
    });
    if (!response.ok) {
      const errorText = await response.text();
      throw new Error(`AI API error: ${response.status} - ${errorText}`);
    }
    const data = await response.json();
    const content = data.choices?.[0]?.message?.content;
    if (!content) {
      throw new Error('No response from AI model');
    }
    return content;
  }
  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
    const startTime = Date.now();
    let completedTasks = 0;
    let failedTasks = 0;
    console.log(`[MICRO-TASK PIPELINE] Starting ${mode} query processing`);
    try {
      // Stage 1: Get filtered data (same as before)
      const toolsData = await getCompressedToolsDataForAI();
-      let filteredData: any;
+      const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
      let processingStats: any = {
        embeddingsUsed: false,
        candidatesFromEmbeddings: 0,
        finalSelectedItems: 0,
        processingTimeMs: 0,
        microTasksCompleted: 0,
        microTasksFailed: 0,
        parallelTasksUsed: false
      };
-      // Filter candidates (embeddings or selector AI)
+      const context: AnalysisContext = { userQuery, mode, filteredData, contextHistory: [] };
      if (embeddingsService.isEnabled()) {
        const result = await this.processWithEmbeddings(userQuery, toolsData, mode);
        filteredData = result.filteredData;
        processingStats = { ...processingStats, ...result.stats };
      } else {
        const result = await this.processWithoutEmbeddings(userQuery, toolsData, mode);
        filteredData = result.filteredData;
        processingStats = { ...processingStats, ...result.stats };
      }
-      // Initialize context
+      console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
      const context: AnalysisContext = {
        userQuery,
        mode,
        filteredData
      };
-      console.log(`[MICRO-TASK PIPELINE] Starting micro-tasks for ${mode} mode`);
+      // MICRO-TASK SEQUENCE (restored original structure)
      // MICRO-TASK SEQUENCE
      // Task 1: Scenario/Problem Analysis
      const analysisResult = await this.analyzeScenario(context);
      if (analysisResult.success) completedTasks++; else failedTasks++;
      await this.delay(this.microTaskDelay);
-      // Task 2: Investigation/Solution Approach (depends on Task 1)
+      // Task 2: Investigation/Solution Approach
      const approachResult = await this.generateApproach(context);
      if (approachResult.success) completedTasks++; else failedTasks++;
      await this.delay(this.microTaskDelay);
@@ -528,8 +571,8 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
          await this.delay(this.microTaskDelay);
        }
      } else {
-        // Evaluate top 3 tools for specific problem
+        const shuffled = [...filteredData.tools].sort(() => Math.random() - 0.5); // FIX
-        const topTools = filteredData.tools.slice(0, 3);
+        const topTools = shuffled.slice(0, 3);
        for (let i = 0; i < topTools.length; i++) {
          const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
          if (evaluationResult.success) completedTasks++; else failedTasks++;
@@ -546,29 +589,26 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
      const finalResult = await this.generateFinalRecommendations(context);
      if (finalResult.success) completedTasks++; else failedTasks++;
-      // Build final recommendation object (ENHANCED: Remove generic additional_notes)
+      const recommendation = this.buildRecommendation(context, mode, ''); // finalContent injected inside omitted logic
      const recommendation = this.buildRecommendation(context, mode, finalResult.content);
-      processingStats.microTasksCompleted = completedTasks;
+      const processingStats = {
-      processingStats.microTasksFailed = failedTasks;
+        embeddingsUsed: embeddingsService.isEnabled(),
-      processingStats.processingTimeMs = Date.now() - startTime;
+        candidatesFromEmbeddings: filteredData.tools.length,
-      processingStats.finalSelectedItems = (context.selectedTools?.length || 0) + 
+        finalSelectedItems: (context.selectedTools?.length || 0) + (context.backgroundKnowledge?.length || 0),
-                                          (context.backgroundKnowledge?.length || 0);
+        processingTimeMs: Date.now() - startTime,
-
+        microTasksCompleted: completedTasks,
-      console.log(`[MICRO-TASK PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
+        microTasksFailed: failedTasks,
-
+        contextContinuityUsed: true
      return {
        recommendation,
        processingStats
      };
      return { recommendation, processingStats };
    } catch (error) {
-      console.error('[MICRO-TASK PIPELINE] Processing failed:', error);
+      console.error('[PIPELINE] Processing failed:', error);
      throw error;
    }
  }
-  // FIXED: Remove generic additional_notes message
+  // Build recommendation (same as original structure)
  private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
    const isWorkflow = mode === 'workflow';
@@ -593,7 +633,6 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
          justification: st.justification || `Empfohlen für ${st.phase}`
        })) || [],
        workflow_suggestion: finalContent
        // REMOVED: additional_notes: "Workflow basierend auf Micro-Task-Analyse generiert."
      };
    } else {
      return {
@@ -612,187 +651,9 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
      };
    }
  }
  // Keep existing embedding and selector methods (unchanged)
  private async processWithEmbeddings(userQuery: string, toolsData: any, mode: string) {
    console.log('[MICRO-TASK PIPELINE] Using embeddings for initial filtering');
    const similarItems = await embeddingsService.findSimilar(
      userQuery, 
      this.embeddingCandidates, 
      this.similarityThreshold
    );
    if (similarItems.length === 0) {
      console.log('[MICRO-TASK PIPELINE] No similar items found with embeddings, using full dataset');
      return {
        filteredData: toolsData,
        stats: { embeddingsUsed: true, candidatesFromEmbeddings: 0, fallbackToFull: true }
      };
    }
    const similarToolNames = new Set();
    const similarConceptNames = new Set();
    similarItems.forEach(item => {
      if (item.type === 'tool') {
        similarToolNames.add(item.name);
      } else if (item.type === 'concept') {
        similarConceptNames.add(item.name);
      }
    });
    const embeddingFilteredData = {
      tools: toolsData.tools.filter((tool: any) => similarToolNames.has(tool.name)),
      concepts: toolsData.concepts.filter((concept: any) => similarConceptNames.has(concept.name)),
      domains: toolsData.domains,
      phases: toolsData.phases,
      'domain-agnostic-software': toolsData['domain-agnostic-software']
    };
    console.log(`[MICRO-TASK PIPELINE] Embeddings filtered to ${embeddingFilteredData.tools.length} tools, ${embeddingFilteredData.concepts.length} concepts`);
    return {
      filteredData: embeddingFilteredData,
      stats: { embeddingsUsed: true, candidatesFromEmbeddings: similarItems.length }
    };
  }
  private async processWithoutEmbeddings(userQuery: string, toolsData: any, mode: string) {
    console.log('[MICRO-TASK PIPELINE] Processing without embeddings - using selector AI');
    const selection = await this.selectRelevantItems(toolsData, userQuery, mode);
    const filteredData = this.filterDataBySelection(toolsData, selection);
    console.log(`[MICRO-TASK PIPELINE] Selector chose ${selection.selectedTools.length} tools, ${selection.selectedConcepts.length} concepts`);
    return {
      filteredData,
      stats: { embeddingsUsed: false, candidatesFromEmbeddings: 0, selectorReasoning: selection.reasoning }
    };
  }
  // Keep existing selector methods (unchanged)
  private async selectRelevantItems(toolsData: any, userQuery: string, mode: string): Promise<SelectionResult> {
    const prompt = this.createSelectorPrompt(toolsData, userQuery, mode);
    const messages = [{ role: 'user', content: prompt }];
    const response = await this.callAI(this.selectorConfig, messages, 1500);
    try {
      const cleaned = response.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
      const result = JSON.parse(cleaned);
      if (!Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
        throw new Error('Invalid selection result structure');
      }
      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
      if (totalSelected > this.maxSelectedItems) {
        console.warn(`[MICRO-TASK PIPELINE] Selection exceeded limit (${totalSelected}), truncating`);
        result.selectedTools = result.selectedTools.slice(0, Math.floor(this.maxSelectedItems * 0.8));
        result.selectedConcepts = result.selectedConcepts.slice(0, Math.ceil(this.maxSelectedItems * 0.2));
      }
      return result;
    } catch (error) {
      console.error('[MICRO-TASK PIPELINE] Failed to parse selector response:', response);
      throw new Error('Invalid JSON response from selector AI');
    }
  }
  private createSelectorPrompt(toolsData: any, userQuery: string, mode: string): string {
    const toolsList = toolsData.tools.map((tool: any) => ({
      name: tool.name,
      type: tool.type,
      description: tool.description.slice(0, 200) + '...',
      domains: tool.domains,
      phases: tool.phases,
      tags: tool.tags?.slice(0, 5) || [],
      skillLevel: tool.skillLevel
    }));
    const conceptsList = toolsData.concepts.map((concept: any) => ({
      name: concept.name,
      type: 'concept',
      description: concept.description.slice(0, 200) + '...',
      domains: concept.domains,
      phases: concept.phases,
      tags: concept.tags?.slice(0, 5) || []
    }));
    const modeInstruction = mode === 'workflow' 
      ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases.'
      : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem.';
    return `You are a DFIR expert tasked with selecting the most relevant tools and concepts for a user query.
 ${modeInstruction}
 AVAILABLE TOOLS:
 ${JSON.stringify(toolsList, null, 2)}
 AVAILABLE CONCEPTS:
 ${JSON.stringify(conceptsList, null, 2)}
 USER QUERY: "${userQuery}"
 Select the most relevant items (max ${this.maxSelectedItems} total). For workflow mode, prioritize breadth across phases. For tool mode, prioritize specificity and direct relevance.
 Respond with ONLY this JSON format:
 {
  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
  "reasoning": "Brief explanation of selection criteria and approach"
 }`;
  }
  private filterDataBySelection(toolsData: any, selection: SelectionResult): any {
    const selectedToolNames = new Set(selection.selectedTools);
    const selectedConceptNames = new Set(selection.selectedConcepts);
    return {
      tools: toolsData.tools.filter((tool: any) => selectedToolNames.has(tool.name)),
      concepts: toolsData.concepts.filter((concept: any) => selectedConceptNames.has(concept.name)),
      domains: toolsData.domains,
      phases: toolsData.phases,
      'domain-agnostic-software': toolsData['domain-agnostic-software']
    };
  }
  private async callAI(config: AIConfig, messages: any[], maxTokens: number = 1000): Promise<string> {
    const response = await fetch(`${config.endpoint}/v1/chat/completions`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Authorization': `Bearer ${config.apiKey}`
      },
      body: JSON.stringify({
        model: config.model,
        messages,
        max_tokens: maxTokens,
        temperature: 0.3
      })
    });
    if (!response.ok) {
      const errorText = await response.text();
      throw new Error(`AI API error (${config.model}): ${response.status} - ${errorText}`);
    }
    const data = await response.json();
    const content = data.choices?.[0]?.message?.content;
    if (!content) {
      throw new Error(`No response from AI model: ${config.model}`);
    }
    return content;
  }
 }
 // Global instance
-const aiPipeline = new MicroTaskAIPipeline();
+const aiPipeline = new ImprovedMicroTaskAIPipeline();
 export { aiPipeline, type AnalysisResult };
--- a/src/utils/dataService.ts
+++ b/src/utils/dataService.ts
@@ -30,30 +30,29 @@ const ToolsDataSchema = z.object({
  domains: z.array(z.object({
    id: z.string(),
    name: z.string(),
-    description: z.string().optional() // Enhanced: allow descriptions
+    description: z.string().optional() 
  })),
  phases: z.array(z.object({
    id: z.string(), 
    name: z.string(),
    description: z.string().optional(),
-    typical_tools: z.array(z.string()).optional().default([]), // Enhanced: example tools
+    typical_tools: z.array(z.string()).optional().default([]), 
-    key_activities: z.array(z.string()).optional().default([]) // Enhanced: key activities
+    key_activities: z.array(z.string()).optional().default([]) 
  })),
  'domain-agnostic-software': z.array(z.object({
    id: z.string(),
    name: z.string(),
    description: z.string().optional(),
-    use_cases: z.array(z.string()).optional().default([]) // Enhanced: use cases
+    use_cases: z.array(z.string()).optional().default([]) 
  })).optional().default([]),
  scenarios: z.array(z.object({
    id: z.string(),
    icon: z.string(),
    friendly_name: z.string(),
-    description: z.string().optional(), // Enhanced: scenario descriptions
+    description: z.string().optional(), 
-    typical_phases: z.array(z.string()).optional().default([]), // Enhanced: typical phases
+    typical_phases: z.array(z.string()).optional().default([]), 
-    complexity: z.enum(['low', 'medium', 'high']).optional() // Enhanced: complexity indicator
+    complexity: z.enum(['low', 'medium', 'high']).optional() 
  })).optional().default([]),
  // Enhanced: Skill level definitions for better AI understanding
  skill_levels: z.object({
    novice: z.string().optional(),
    beginner: z.string().optional(), 
--- a/src/utils/embeddings.ts
+++ b/src/utils/embeddings.ts
@@ -191,6 +191,12 @@ class EmbeddingsService {
    await this.saveEmbeddings(version);
  }
  public async embedText(text: string): Promise<number[]> {
    // Re‑use the private batch helper to avoid auth duplication
    const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
    return embedding;
  }
  private cosineSimilarity(a: number[], b: number[]): number {
    let dotProduct = 0;
    let normA = 0;
@@ -246,6 +252,8 @@ class EmbeddingsService {
  }
 }
 // Global instance
 const embeddingsService = new EmbeddingsService();
--- a/src/utils/vectorIndex.ts
+++ b/src/utils/vectorIndex.ts
@@ -0,0 +1,45 @@
 import { embeddingsService, type EmbeddingData } from "./embeddings.js";
 // Fix for CommonJS module import in ESM environment
 import pkg from "hnswlib-node";
 const { HierarchicalNSW } = pkg;
 export interface SimilarItem extends EmbeddingData {
  similarity: number; // 1 = identical, 0 = orthogonal
 }
 class VectorIndex {
  private index: InstanceType<typeof HierarchicalNSW> | null = null;
  private idToItem: SimilarItem[] = [];
  private readonly dim = 1024; // MistralAI embedding dimensionality
  /** Build HNSW index once (idempotent) */
  private async build(): Promise<void> {
    if (this.index) return;
    await embeddingsService.initialize();
    const catalogue = (embeddingsService as any).embeddings as EmbeddingData[];
    this.index = new HierarchicalNSW("cosine", this.dim);
    this.index.initIndex(catalogue.length);
    catalogue.forEach((item, id) => {
      this.index!.addPoint(item.embedding, id);
      this.idToItem[id] = { ...item, similarity: 0 } as SimilarItem;
    });
  }
  /** Returns the K most similar catalogue items to an ad‑hoc query string. */
  async findSimilar(text: string, k = 40): Promise<SimilarItem[]> {
    await this.build();
    const queryEmb = await embeddingsService.embedText(text.toLowerCase());
    const { neighbors, distances } = this.index!.searchKnn(queryEmb, k);
    return neighbors.map((id: number, i: number) => ({
      ...this.idToItem[id],
      similarity: 1 - distances[i], // cosine distance → similarity
    }));
  }
 }
 export const vectorIndex = new VectorIndex();