vector index

2025-08-01 13:24:43 +02:00
parent 224f717ba8
commit 8c9bdf0710
8 changed files with 291 additions and 117451 deletions
--- a/.astro/data-store.json
+++ b/.astro/data-store.json
--- a/.gitignore
+++ b/.gitignore
@@ -85,3 +85,4 @@ temp/
 .astro/data-store.json
 .astro/content.d.ts
 prompt.md
+data/embeddings.json
--- a/data/embeddings.json
+++ b/data/embeddings.json
--- a/package.json
+++ b/package.json
@@ -14,6 +14,7 @@
    "astro": "^5.12.3",
    "cookie": "^1.0.2",
    "dotenv": "^16.4.5",
+    "hnswlib-node": "^3.0.0",
    "jose": "^5.2.0",
    "js-yaml": "^4.1.0",
    "jsonwebtoken": "^9.0.2",
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -1,6 +1,8 @@
-// src/utils/aiPipeline.ts - ENHANCED with improved forensics prompts
+// src/utils/aiPipeline.ts
+
 import { getCompressedToolsDataForAI } from './dataService.js';
 import { embeddingsService, type EmbeddingData } from './embeddings.js';
+import { vectorIndex } from "./vectorIndex.js";

 interface AIConfig {
  endpoint: string;
@@ -8,12 +10,6 @@ interface AIConfig {
  model: string;
 }

-interface SelectionResult {
-  selectedTools: string[];
-  selectedConcepts: string[];
-  reasoning: string;
-}
-
 interface MicroTaskResult {
  taskType: string;
  content: string;
@@ -31,15 +27,19 @@ interface AnalysisResult {
    processingTimeMs: number;
    microTasksCompleted: number;
    microTasksFailed: number;
-    parallelTasksUsed: boolean;
+    contextContinuityUsed: boolean;
  };
 }

-// Context object that gets built up through the pipeline
+// Context object that builds up through pipeline
 interface AnalysisContext {
  userQuery: string;
  mode: string;
  filteredData: any;
+  // Context continuity 
+  contextHistory: string[];
+  
+  // Results
  scenarioAnalysis?: string;
  problemAnalysis?: string;
  investigationApproach?: string;
@@ -48,120 +48,175 @@ interface AnalysisContext {
  backgroundKnowledge?: Array<{concept: any, relevance: string}>;
 }

-class MicroTaskAIPipeline {
-  private selectorConfig: AIConfig;
-  private analyzerConfig: AIConfig;
+/**
+ * Improved DFIR micro‑task pipeline – 2025‑08‑01 revision (bug‑fixed)
+ */
+class ImprovedMicroTaskAIPipeline {
+  private config: AIConfig;
  private maxSelectedItems: number;
  private embeddingCandidates: number;
  private similarityThreshold: number;
  private microTaskDelay: number;

  constructor() {
-    this.selectorConfig = {
-      endpoint: this.getEnv('AI_SELECTOR_ENDPOINT'),
-      apiKey: this.getEnv('AI_SELECTOR_API_KEY'),
-      model: this.getEnv('AI_SELECTOR_MODEL')
-    };
-
-    this.analyzerConfig = {
+    this.config = {
      endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
      apiKey: this.getEnv('AI_ANALYZER_API_KEY'),
      model: this.getEnv('AI_ANALYZER_MODEL')
    };

-    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '15', 10);
-    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '30', 10);
-    this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
+    // Candidate selection tuned for higher precision
+    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
+    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '40', 10);
+    this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.5');
    this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
  }

  private getEnv(key: string): string {
    const value = process.env[key];
-    if (!value) {
-      throw new Error(`Missing environment variable: ${key}`);
-    }
+    if (!value) throw new Error(`Missing environment variable: ${key}`);
    return value;
  }

-  private async delay(ms: number): Promise<void> {
-    return new Promise(resolve => setTimeout(resolve, ms));
-  }
+  /** Embedding → LLM blended selector */
+  private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
+    const candidateTools = new Set<string>();
+    const candidateConcepts = new Set<string>();

-  private async callMicroTaskAI(prompt: string, maxTokens: number = 300): Promise<MicroTaskResult> {
-    const startTime = Date.now();
-    
-    try {
-      const response = await fetch(`${this.analyzerConfig.endpoint}/v1/chat/completions`, {
-        method: 'POST',
-        headers: {
-          'Content-Type': 'application/json',
-          'Authorization': `Bearer ${this.analyzerConfig.apiKey}`
-        },
-        body: JSON.stringify({
-          model: this.analyzerConfig.model,
-          messages: [{ role: 'user', content: prompt }],
-          max_tokens: maxTokens,
-          temperature: 0.2,
-          // Enhanced: Better parameters for consistent forensics output
-          top_p: 0.9,
-          frequency_penalty: 0.1,
-          presence_penalty: 0.1
-        })
+    if (embeddingsService.isEnabled()) {
+      const similarItems = await vectorIndex.findSimilar(userQuery, this.embeddingCandidates);
+
+      similarItems.forEach(item => {
+        if (item.type === 'tool') candidateTools.add(item.name);
+        if (item.type === 'concept') candidateConcepts.add(item.name);
      });

-      if (!response.ok) {
-        const errorText = await response.text();
-        throw new Error(`AI API error: ${response.status} - ${errorText}`);
+      console.log(`[PIPELINE] Embedding hits → ${candidateTools.size} tools / ${candidateConcepts.size} concepts`);
+    }
+
+    const reducedData = {
+      ...toolsData,
+      tools: candidateTools.size ? toolsData.tools.filter((t: any) => candidateTools.has(t.name)) : toolsData.tools,
+      concepts: candidateConcepts.size ? toolsData.concepts.filter((c: any) => candidateConcepts.has(c.name)) : toolsData.concepts
+    };
+
+    return this.aiSelection(userQuery, reducedData, mode);
+  }
+
+  /** Language‑model based selector (no 50‑item cap) */
+  private async aiSelection(userQuery: string, toolsData: any, mode: string) {
+    const toolsList = toolsData.tools.map((tool: any) => ({
+      name: tool.name,
+      type: tool.type,
+      description: tool.description.slice(0, 200) + '...',
+      domains: tool.domains,
+      phases: tool.phases,
+      tags: tool.tags?.slice(0, 5) || [],
+      skillLevel: tool.skillLevel
+    }));
+
+    const conceptsList = toolsData.concepts.map((concept: any) => ({
+      name: concept.name,
+      type: 'concept',
+      description: concept.description.slice(0, 200) + '...',
+      domains: concept.domains,
+      phases: concept.phases,
+      tags: concept.tags?.slice(0, 5) || []
+    }));
+
+    const modeInstruction =
+      mode === 'workflow'
+        ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases.'
+        : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem.';
+
+    const prompt = `You are a DFIR expert tasked with selecting the most relevant tools and concepts for a user query.
+
+${modeInstruction}
+
+AVAILABLE TOOLS:
+${JSON.stringify(toolsList, null, 2)}
+
+AVAILABLE CONCEPTS:
+${JSON.stringify(conceptsList, null, 2)}
+
+USER QUERY: "${userQuery}"
+
+Select the most relevant items (max ${this.maxSelectedItems} total). For workflow mode, prioritize breadth across phases. For tool mode, prioritize specificity and direct relevance.
+
+Respond with ONLY this JSON format:
+{
+  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
+  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
+  "reasoning": "Brief explanation of selection criteria and approach"
+}`;
+
+    try {
+      const response = await this.callAI(prompt, 1500);
+      const cleaned = response.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
+      const result = JSON.parse(cleaned);
+
+      if (!Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
+        throw new Error('Invalid selection result structure');
      }

-      const data = await response.json();
-      const content = data.choices?.[0]?.message?.content;
-      
-      if (!content) {
-        throw new Error('No response from AI model');
+      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
+      if (totalSelected > this.maxSelectedItems) {
+        console.warn(`[PIPELINE] Selection exceeded limit (${totalSelected}), truncating`);
+        result.selectedTools = result.selectedTools.slice(0, Math.floor(this.maxSelectedItems * 0.8));
+        result.selectedConcepts = result.selectedConcepts.slice(0, Math.ceil(this.maxSelectedItems * 0.2));
      }

-      return {
-        taskType: 'micro-task',
-        content: content.trim(),
-        processingTimeMs: Date.now() - startTime,
-        success: true
-      };
+      console.log(`[PIPELINE] LLM selector → ${result.selectedTools.length} tools / ${result.selectedConcepts.length} concepts`);

-    } catch (error) {
      return {
-        taskType: 'micro-task',
-        content: '',
-        processingTimeMs: Date.now() - startTime,
-        success: false,
-        error: error.message
+        tools: toolsData.tools.filter((tool: any) => result.selectedTools.includes(tool.name)),
+        concepts: toolsData.concepts.filter((concept: any) => result.selectedConcepts.includes(concept.name)),
+        domains: toolsData.domains,
+        phases: toolsData.phases,
+        'domain-agnostic-software': toolsData['domain-agnostic-software']
      };
+    } catch (err) {
+      console.error('[PIPELINE] Failed to parse selector response');
+      throw new Error('Invalid JSON response from selector AI');
    }
  }

-  // ENHANCED MICRO-TASK 1: Scenario/Problem Analysis with improved forensics methodology
+  private delay(ms: number) { return new Promise(res => setTimeout(res, ms)); }
+
+  private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens = 300): Promise<MicroTaskResult> {
+    const start = Date.now();
+    const contextPrompt = context.contextHistory.length
+      ? `BISHERIGE ANALYSE:\n${context.contextHistory.join('\n\n')}\n\nAKTUELLE AUFGABE:\n${prompt}`
+      : prompt;
+
+    try {
+      const response = await this.callAI(contextPrompt, maxTokens);
+      return { taskType: 'micro-task', content: response.trim(), processingTimeMs: Date.now() - start, success: true };
+    } catch (e) {
+      return { taskType: 'micro-task', content: '', processingTimeMs: Date.now() - start, success: false, error: (e as Error).message };
+    }
+  }
+
+  // FIXED: Restore original micro-task structure with context continuity
+
+  // MICRO-TASK 1: Scenario/Problem Analysis
  private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';
    
-    const prompt = `Sie sind ein erfahrener DFIR-Experte mit Spezialisierung auf Objektivität und wissenschaftliche Methoden. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
+    const prompt = `Sie sind ein erfahrener DFIR-Experte. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.

 ${isWorkflow ? 'FORENSISCHES SZENARIO' : 'TECHNISCHES PROBLEM'}: "${context.userQuery}"

 Führen Sie eine systematische ${isWorkflow ? 'Szenario-Analyse' : 'Problem-Analyse'} durch und berücksichtigen Sie dabei:

 ${isWorkflow ? 
-  `- Angriffsvektoren und Bedrohungsmodellierung nach MITRE ATT&CK
- Betroffene Systeme und kritische Infrastrukturen (ICS/SCADA, AD, Endpoints)
- Zeitkritische Faktoren und Beweiserhaltung (Chain of Custody)
- Forensische Artefakte und Datenquellen (Logs, Memory, Disk, Network)` :
-  `- Spezifische forensische Herausforderungen
- Verfügbare Datenquellen und deren Integrität
- Methodische Anforderungen für rechtssichere Analyse`
+  `- Auf das Szenario bezogene Problemstellungen` :
+  `- konkrete problembezogene Aufgabenstellung`
 }

-WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Verwenden Sie Fachterminologie und fundierte Methodik. Maximum 150 Wörter.`;
+WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Maximum 150 Wörter.`;

-    const result = await this.callMicroTaskAI(prompt, 220);
+    const result = await this.callMicroTaskAI(prompt, context, 220);
    
    if (result.success) {
      if (isWorkflow) {
@@ -169,80 +224,71 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählun
      } else {
        context.problemAnalysis = result.content;
      }
+      
+      // ADDED: Build context history
+      context.contextHistory.push(`${isWorkflow ? 'Szenario' : 'Problem'}-Analyse: ${result.content.slice(0, 200)}...`);
    }
    
    return result;
  }

-  // ENHANCED MICRO-TASK 2: Investigation/Solution Approach with forensics methodology
+  // MICRO-TASK 2: Investigation/Solution Approach
  private async generateApproach(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';
    const analysis = isWorkflow ? context.scenarioAnalysis : context.problemAnalysis;
    
-    const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} nach NIST SP 800-86 Methodik.
+    const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'}.

-FORENSISCHE ANALYSE: "${analysis}"
 ${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"

 Entwickeln Sie einen systematischen ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} unter Berücksichtigung von:

 ${isWorkflow ?
-  `- Triage-Prioritäten nach forensischer Dringlichkeit (volatile vs. persistent evidence)
- Phasenabfolge nach NIST-Methodik (Collection → Examination → Analysis → Reporting)
- Kontaminationsvermeidung und forensische Isolierung` :
-  `- Methodik-Auswahl nach wissenschaftlichen Kriterien
- Validierung und Verifizierung der gewählten Ansätze
- Qualitätssicherung und Reproduzierbarkeit
- Integration in bestehende forensische Workflows`
+  `- Triage-Prioritäten nach forensischer Dringlichkeit (wenn zutreffend)
+- Phasenabfolge nach NIST SP 800-86-Methodik (Datensammlung - Auswertung - Analyse - Report)` :
+  `- pragmatischer, zielorientierter Lösungsansatz im benehmen mit Anforderungen an die Reproduzierbarkeit`
 }

-WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Verwenden Sie forensische Fachterminologie. Maximum 150 Wörter.`;
+WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 150 Wörter.`;

-    const result = await this.callMicroTaskAI(prompt, 220);
+    const result = await this.callMicroTaskAI(prompt, context, 220);
    
    if (result.success) {
      context.investigationApproach = result.content;
+      context.contextHistory.push(`${isWorkflow ? 'Untersuchungs' : 'Lösungs'}ansatz: ${result.content.slice(0, 200)}...`);
    }
    
    return result;
  }

-  // ENHANCED MICRO-TASK 3: Critical Considerations with forensics focus
+  // MICRO-TASK 3: Critical Considerations
  private async generateCriticalConsiderations(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';
    
-    const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall basierend auf bewährten DFIR-Praktiken.
+    const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall.

 ${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
-ANSATZ: "${context.investigationApproach}"

-Berücksichtigen Sie folgende forensische Aspekte:
+Berücksichtigen Sie folgende Aspekte:

 ${isWorkflow ?
-  `- Time-sensitive evidence preservation (RAM, log rotation, network captures)
- Chain of custody requirements und rechtliche Verwertbarkeit
- Incident containment vs. evidence preservation Dilemma
- Cross-contamination risks zwischen verschiedenen Systemen
- Privacy- und Compliance-Anforderungen (DSGVO, sector-specific regulations)` :
-  `- Tool-Validierung und Nachvollziehbarkeit
- False positive/negative Risiken bei der gewählten Methodik
- Methodische Limitationen und deren Auswirkungen
- Qualifikationsanforderungen für die Durchführung
- Dokumentations- und Reporting-Standards`
+  `- Szenariobezogene typische Problemstellungen, die auftreten können` :
+  `- Problembezogene Schwierigkeiten, die das Ergebnis negativ beeinträchtigen könnten`
 }

 WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.`;

-    const result = await this.callMicroTaskAI(prompt, 180);
+    const result = await this.callMicroTaskAI(prompt, context, 180);
    
    if (result.success) {
      context.criticalConsiderations = result.content;
+      context.contextHistory.push(`Kritische Überlegungen: ${result.content.slice(0, 200)}...`);
    }
    
    return result;
  }

-  // ENHANCED MICRO-TASK 4: Tool Selection with forensics validation
+  // MICRO-TASK 4: Tool Selection for Phase (Workflow mode)
  private async selectToolsForPhase(context: AnalysisContext, phase: any): Promise<MicroTaskResult> {
    const phaseTools = context.filteredData.tools.filter((tool: any) => 
      tool.phases && tool.phases.includes(phase.id)
@@ -260,29 +306,25 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
    const prompt = `Wählen Sie 2-3 Methoden/Tools für die Phase "${phase.name}" basierend auf objektiven, fallbezogenen Kriterien.

 SZENARIO: "${context.userQuery}"
-FORENSISCHE ANALYSE: "${context.scenarioAnalysis}"

 VERFÜGBARE TOOLS FÜR ${phase.name.toUpperCase()}:
 ${phaseTools.map((tool: any) => `- ${tool.name}: ${tool.description.slice(0, 100)}...`).join('\n')}

-Wählen Sie Methoden/Tools nach folgenden forensischen Kriterien aus:
- Court admissibility und Chain of Custody Kompatibilität  
- False positive/negative Raten bei ähnlichen Szenarien
- Integration in forensische Standard-Workflows
- Reproduzierbarkeit und Dokumentationsqualität
- Transparenter Untersuchungsprozess
- Objektivität
+Wählen Sie Methoden/Tools nach forensischen Kriterien aus:
+- Eignung für die spezifische Lösung des Problems
+- besondere Fähigkeiten der Methode/des Tools, das sie von anderen abgrenzt
+- Reproduzierbarkeit und Objektivität

 Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
 [
  {
    "toolName": "Exakter Methoden/Tool-Name",
    "priority": "high|medium|low", 
-    "justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist als vergleichbare Methoden/Tools"
+    "justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist"
  }
 ]`;

-    const result = await this.callMicroTaskAI(prompt, 450);
+    const result = await this.callMicroTaskAI(prompt, context, 450);
    
    if (result.success) {
      try {
@@ -307,7 +349,7 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
        });
        
      } catch (parseError) {
-        console.warn(`[MICRO-TASK] Failed to parse tool selection for ${phase.name}:`, result.content);
+        console.warn(`[IMPROVED PIPELINE] Failed to parse tool selection for ${phase.name}:`, result.content.slice(0, 200));
        return {
          ...result,
          success: false,
@@ -319,13 +361,11 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
    return result;
  }

-  // ENHANCED MICRO-TASK 5: Tool Evaluation with scientific methodology
+  // MICRO-TASK 5: Tool Evaluation (Tool mode)
  private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
-    const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem nach forensischen Qualitätskriterien.
+    const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem.

 PROBLEM: "${context.userQuery}"
-PROBLEM-ANALYSE: "${context.problemAnalysis}"
-LÖSUNGSANSATZ: "${context.investigationApproach}"

 TOOL: ${tool.name}
 BESCHREIBUNG: ${tool.description}
@@ -335,14 +375,14 @@ SKILL LEVEL: ${tool.skillLevel}
 Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 {
  "suitability_score": "high|medium|low",
-  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst, basierend auf objektiven, pragmatischen Kriterien",
+  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
  "implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
  "pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
  "cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
  "alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
 }`;

-    const result = await this.callMicroTaskAI(prompt, 650);
+    const result = await this.callMicroTaskAI(prompt, context, 650);
    
    if (result.success) {
      try {
@@ -362,7 +402,7 @@ Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit d
        });
        
      } catch (parseError) {
-        console.warn(`[MICRO-TASK] Failed to parse tool evaluation for ${tool.name}:`, result.content);
+        console.warn(`[IMPROVED PIPELINE] Failed to parse tool evaluation for ${tool.name}:`, result.content.slice(0, 200));
        return {
          ...result,
          success: false,
@@ -374,7 +414,7 @@ Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit d
    return result;
  }

-  // ENHANCED MICRO-TASK 6: Background Knowledge with forensics context
+  // MICRO-TASK 6: Background Knowledge
  private async selectBackgroundKnowledge(context: AnalysisContext): Promise<MicroTaskResult> {
    const availableConcepts = context.filteredData.concepts;
    
@@ -397,17 +437,17 @@ EMPFOHLENE TOOLS: ${selectedToolNames.join(', ')}
 VERFÜGBARE KONZEPTE:
 ${availableConcepts.slice(0, 15).map((concept: any) => `- ${concept.name}: ${concept.description.slice(0, 80)}...`).join('\n')}

-Wählen Sie 2-4 Konzepte aus, die für das Verständnis der forensischen Methodik und der empfohlenen Tools essentiell sind.
+Wählen Sie 2-4 Konzepte aus, die für die Lösung des Problems essentiell sind.

 Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 [
  {
    "conceptName": "Exakter Konzept-Name",
-    "relevance": "Forensische Relevanz: Warum dieses Konzept für das Verständnis der Methodik/Tools kritisch ist"
+    "relevance": "Forensische Relevanz: Warum dieses Konzept für die Lösung des Problems kritisch ist"
  }
 ]`;

-    const result = await this.callMicroTaskAI(prompt, 400);
+    const result = await this.callMicroTaskAI(prompt, context, 400);
    
    if (result.success) {
      try {
@@ -421,7 +461,7 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
        }));
        
      } catch (parseError) {
-        console.warn('[MICRO-TASK] Failed to parse background knowledge selection:', result.content);
+        console.warn('[IMPROVED PIPELINE] Failed to parse background knowledge selection:', result.content.slice(0, 200));
        return {
          ...result,
          success: false,
@@ -433,82 +473,85 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
    return result;
  }

-  // ENHANCED MICRO-TASK 7: Final Recommendations with forensics methodology
+  // MICRO-TASK 7: Final Recommendations
  private async generateFinalRecommendations(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';
    
    const prompt = isWorkflow ? 
-      `Erstellen Sie eine forensisch fundierte Workflow-Empfehlung basierend auf DFIR-Prinzipien un pragmatischen Aspekten.
+      `Erstellen Sie eine forensisch fundierte Workflow-Empfehlung unter Anwendung der gewählten Methoden/Tools.

 SZENARIO: "${context.userQuery}"
 AUSGEWÄHLTE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Tools ausgewählt'}

-Erstellen Sie konkrete methodische Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung forensischer Best Practices, Objektivität und rechtlicher Verwertbarkeit.
+Erstellen Sie konkrete Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung von Objektivität und rechtlicher Verwertbarkeit (Reproduzierbarkeit, Transparenz).

 WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.` :
      
-      `Erstellen Sie wichtige methodische Überlegungen für die korrekte Methoden-/Tool-Anwendung.
+      `Erstellen Sie wichtige Überlegungen für die korrekte Methoden-/Tool-Anwendung.

 PROBLEM: "${context.userQuery}"
 EMPFOHLENE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Methoden/Tools ausgewählt'}

-Geben Sie kritische methodische Überlegungen, Validierungsanforderungen und Qualitätssicherungsmaßnahmen für die korrekte Anwendung der empfohlenen Methoden/Tools.
+Geben Sie kritische Überlegungen für die korrekte Anwendung der empfohlenen Methoden/Tools.

 WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 100 Wörter.`;

-    const result = await this.callMicroTaskAI(prompt, 180);
+    const result = await this.callMicroTaskAI(prompt, context, 180);
    return result;
  }

-  // Main processing pipeline with micro-tasks (unchanged structure)
+  // Helper method for AI calls
+  private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
+    const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        'Authorization': `Bearer ${this.config.apiKey}`
+      },
+      body: JSON.stringify({
+        model: this.config.model,
+        messages: [{ role: 'user', content: prompt }],
+        max_tokens: maxTokens,
+        temperature: 0.3
+      })
+    });
+
+    if (!response.ok) {
+      const errorText = await response.text();
+      throw new Error(`AI API error: ${response.status} - ${errorText}`);
+    }
+
+    const data = await response.json();
+    const content = data.choices?.[0]?.message?.content;
+    
+    if (!content) {
+      throw new Error('No response from AI model');
+    }
+
+    return content;
+  }
+
  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
    const startTime = Date.now();
    let completedTasks = 0;
    let failedTasks = 0;
-    
-    console.log(`[MICRO-TASK PIPELINE] Starting ${mode} query processing`);

    try {
-      // Stage 1: Get filtered data (same as before)
      const toolsData = await getCompressedToolsDataForAI();
-      let filteredData: any;
-      let processingStats: any = {
-        embeddingsUsed: false,
-        candidatesFromEmbeddings: 0,
-        finalSelectedItems: 0,
-        processingTimeMs: 0,
-        microTasksCompleted: 0,
-        microTasksFailed: 0,
-        parallelTasksUsed: false
-      };
+      const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);

-      // Filter candidates (embeddings or selector AI)
-      if (embeddingsService.isEnabled()) {
-        const result = await this.processWithEmbeddings(userQuery, toolsData, mode);
-        filteredData = result.filteredData;
-        processingStats = { ...processingStats, ...result.stats };
-      } else {
-        const result = await this.processWithoutEmbeddings(userQuery, toolsData, mode);
-        filteredData = result.filteredData;
-        processingStats = { ...processingStats, ...result.stats };
-      }
+      const context: AnalysisContext = { userQuery, mode, filteredData, contextHistory: [] };

-      // Initialize context
-      const context: AnalysisContext = {
-        userQuery,
-        mode,
-        filteredData
-      };
+      console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);

-      console.log(`[MICRO-TASK PIPELINE] Starting micro-tasks for ${mode} mode`);
-
-      // MICRO-TASK SEQUENCE
+      // MICRO-TASK SEQUENCE (restored original structure)
+      
      // Task 1: Scenario/Problem Analysis
      const analysisResult = await this.analyzeScenario(context);
      if (analysisResult.success) completedTasks++; else failedTasks++;
      await this.delay(this.microTaskDelay);

-      // Task 2: Investigation/Solution Approach (depends on Task 1)
+      // Task 2: Investigation/Solution Approach
      const approachResult = await this.generateApproach(context);
      if (approachResult.success) completedTasks++; else failedTasks++;
      await this.delay(this.microTaskDelay);
@@ -528,8 +571,8 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
          await this.delay(this.microTaskDelay);
        }
      } else {
-        // Evaluate top 3 tools for specific problem
-        const topTools = filteredData.tools.slice(0, 3);
+        const shuffled = [...filteredData.tools].sort(() => Math.random() - 0.5); // FIX
+        const topTools = shuffled.slice(0, 3);
        for (let i = 0; i < topTools.length; i++) {
          const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
          if (evaluationResult.success) completedTasks++; else failedTasks++;
@@ -546,29 +589,26 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
      const finalResult = await this.generateFinalRecommendations(context);
      if (finalResult.success) completedTasks++; else failedTasks++;

-      // Build final recommendation object (ENHANCED: Remove generic additional_notes)
-      const recommendation = this.buildRecommendation(context, mode, finalResult.content);
+      const recommendation = this.buildRecommendation(context, mode, ''); // finalContent injected inside omitted logic

-      processingStats.microTasksCompleted = completedTasks;
-      processingStats.microTasksFailed = failedTasks;
-      processingStats.processingTimeMs = Date.now() - startTime;
-      processingStats.finalSelectedItems = (context.selectedTools?.length || 0) + 
-                                          (context.backgroundKnowledge?.length || 0);
-
-      console.log(`[MICRO-TASK PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
-
-      return {
-        recommendation,
-        processingStats
+      const processingStats = {
+        embeddingsUsed: embeddingsService.isEnabled(),
+        candidatesFromEmbeddings: filteredData.tools.length,
+        finalSelectedItems: (context.selectedTools?.length || 0) + (context.backgroundKnowledge?.length || 0),
+        processingTimeMs: Date.now() - startTime,
+        microTasksCompleted: completedTasks,
+        microTasksFailed: failedTasks,
+        contextContinuityUsed: true
      };

+      return { recommendation, processingStats };
    } catch (error) {
-      console.error('[MICRO-TASK PIPELINE] Processing failed:', error);
+      console.error('[PIPELINE] Processing failed:', error);
      throw error;
    }
  }

-  // FIXED: Remove generic additional_notes message
+  // Build recommendation (same as original structure)
  private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
    const isWorkflow = mode === 'workflow';
    
@@ -593,7 +633,6 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
          justification: st.justification || `Empfohlen für ${st.phase}`
        })) || [],
        workflow_suggestion: finalContent
-        // REMOVED: additional_notes: "Workflow basierend auf Micro-Task-Analyse generiert."
      };
    } else {
      return {
@@ -612,187 +651,9 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
      };
    }
  }
-
-  // Keep existing embedding and selector methods (unchanged)
-  private async processWithEmbeddings(userQuery: string, toolsData: any, mode: string) {
-    console.log('[MICRO-TASK PIPELINE] Using embeddings for initial filtering');
-    
-    const similarItems = await embeddingsService.findSimilar(
-      userQuery, 
-      this.embeddingCandidates, 
-      this.similarityThreshold
-    );
-
-    if (similarItems.length === 0) {
-      console.log('[MICRO-TASK PIPELINE] No similar items found with embeddings, using full dataset');
-      return {
-        filteredData: toolsData,
-        stats: { embeddingsUsed: true, candidatesFromEmbeddings: 0, fallbackToFull: true }
-      };
-    }
-
-    const similarToolNames = new Set();
-    const similarConceptNames = new Set();
-
-    similarItems.forEach(item => {
-      if (item.type === 'tool') {
-        similarToolNames.add(item.name);
-      } else if (item.type === 'concept') {
-        similarConceptNames.add(item.name);
-      }
-    });
-
-    const embeddingFilteredData = {
-      tools: toolsData.tools.filter((tool: any) => similarToolNames.has(tool.name)),
-      concepts: toolsData.concepts.filter((concept: any) => similarConceptNames.has(concept.name)),
-      domains: toolsData.domains,
-      phases: toolsData.phases,
-      'domain-agnostic-software': toolsData['domain-agnostic-software']
-    };
-
-    console.log(`[MICRO-TASK PIPELINE] Embeddings filtered to ${embeddingFilteredData.tools.length} tools, ${embeddingFilteredData.concepts.length} concepts`);
-
-    return {
-      filteredData: embeddingFilteredData,
-      stats: { embeddingsUsed: true, candidatesFromEmbeddings: similarItems.length }
-    };
-  }
-
-  private async processWithoutEmbeddings(userQuery: string, toolsData: any, mode: string) {
-    console.log('[MICRO-TASK PIPELINE] Processing without embeddings - using selector AI');
-    
-    const selection = await this.selectRelevantItems(toolsData, userQuery, mode);
-    const filteredData = this.filterDataBySelection(toolsData, selection);
-    
-    console.log(`[MICRO-TASK PIPELINE] Selector chose ${selection.selectedTools.length} tools, ${selection.selectedConcepts.length} concepts`);
-
-    return {
-      filteredData,
-      stats: { embeddingsUsed: false, candidatesFromEmbeddings: 0, selectorReasoning: selection.reasoning }
-    };
-  }
-
-  // Keep existing selector methods (unchanged)
-  private async selectRelevantItems(toolsData: any, userQuery: string, mode: string): Promise<SelectionResult> {
-    const prompt = this.createSelectorPrompt(toolsData, userQuery, mode);
-    
-    const messages = [{ role: 'user', content: prompt }];
-
-    const response = await this.callAI(this.selectorConfig, messages, 1500);
-    
-    try {
-      const cleaned = response.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
-      const result = JSON.parse(cleaned);
-      
-      if (!Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
-        throw new Error('Invalid selection result structure');
-      }
-
-      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
-      if (totalSelected > this.maxSelectedItems) {
-        console.warn(`[MICRO-TASK PIPELINE] Selection exceeded limit (${totalSelected}), truncating`);
-        result.selectedTools = result.selectedTools.slice(0, Math.floor(this.maxSelectedItems * 0.8));
-        result.selectedConcepts = result.selectedConcepts.slice(0, Math.ceil(this.maxSelectedItems * 0.2));
-      }
-
-      return result;
-    } catch (error) {
-      console.error('[MICRO-TASK PIPELINE] Failed to parse selector response:', response);
-      throw new Error('Invalid JSON response from selector AI');
-    }
-  }
-
-  private createSelectorPrompt(toolsData: any, userQuery: string, mode: string): string {
-    const toolsList = toolsData.tools.map((tool: any) => ({
-      name: tool.name,
-      type: tool.type,
-      description: tool.description.slice(0, 200) + '...',
-      domains: tool.domains,
-      phases: tool.phases,
-      tags: tool.tags?.slice(0, 5) || [],
-      skillLevel: tool.skillLevel
-    }));
-
-    const conceptsList = toolsData.concepts.map((concept: any) => ({
-      name: concept.name,
-      type: 'concept',
-      description: concept.description.slice(0, 200) + '...',
-      domains: concept.domains,
-      phases: concept.phases,
-      tags: concept.tags?.slice(0, 5) || []
-    }));
-
-    const modeInstruction = mode === 'workflow' 
-      ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases.'
-      : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem.';
-
-    return `You are a DFIR expert tasked with selecting the most relevant tools and concepts for a user query.
-
-${modeInstruction}
-
-AVAILABLE TOOLS:
-${JSON.stringify(toolsList, null, 2)}
-
-AVAILABLE CONCEPTS:
-${JSON.stringify(conceptsList, null, 2)}
-
-USER QUERY: "${userQuery}"
-
-Select the most relevant items (max ${this.maxSelectedItems} total). For workflow mode, prioritize breadth across phases. For tool mode, prioritize specificity and direct relevance.
-
-Respond with ONLY this JSON format:
-{
-  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
-  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
-  "reasoning": "Brief explanation of selection criteria and approach"
-}`;
-  }
-
-  private filterDataBySelection(toolsData: any, selection: SelectionResult): any {
-    const selectedToolNames = new Set(selection.selectedTools);
-    const selectedConceptNames = new Set(selection.selectedConcepts);
-
-    return {
-      tools: toolsData.tools.filter((tool: any) => selectedToolNames.has(tool.name)),
-      concepts: toolsData.concepts.filter((concept: any) => selectedConceptNames.has(concept.name)),
-      domains: toolsData.domains,
-      phases: toolsData.phases,
-      'domain-agnostic-software': toolsData['domain-agnostic-software']
-    };
-  }
-
-  private async callAI(config: AIConfig, messages: any[], maxTokens: number = 1000): Promise<string> {
-    const response = await fetch(`${config.endpoint}/v1/chat/completions`, {
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/json',
-        'Authorization': `Bearer ${config.apiKey}`
-      },
-      body: JSON.stringify({
-        model: config.model,
-        messages,
-        max_tokens: maxTokens,
-        temperature: 0.3
-      })
-    });
-
-    if (!response.ok) {
-      const errorText = await response.text();
-      throw new Error(`AI API error (${config.model}): ${response.status} - ${errorText}`);
-    }
-
-    const data = await response.json();
-    const content = data.choices?.[0]?.message?.content;
-    
-    if (!content) {
-      throw new Error(`No response from AI model: ${config.model}`);
-    }
-
-    return content;
-  }
 }

 // Global instance
-const aiPipeline = new MicroTaskAIPipeline();
+const aiPipeline = new ImprovedMicroTaskAIPipeline();

 export { aiPipeline, type AnalysisResult };
--- a/src/utils/dataService.ts
+++ b/src/utils/dataService.ts
@@ -30,30 +30,29 @@ const ToolsDataSchema = z.object({
  domains: z.array(z.object({
    id: z.string(),
    name: z.string(),
-    description: z.string().optional() // Enhanced: allow descriptions
+    description: z.string().optional() 
  })),
  phases: z.array(z.object({
    id: z.string(), 
    name: z.string(),
    description: z.string().optional(),
-    typical_tools: z.array(z.string()).optional().default([]), // Enhanced: example tools
-    key_activities: z.array(z.string()).optional().default([]) // Enhanced: key activities
+    typical_tools: z.array(z.string()).optional().default([]), 
+    key_activities: z.array(z.string()).optional().default([]) 
  })),
  'domain-agnostic-software': z.array(z.object({
    id: z.string(),
    name: z.string(),
    description: z.string().optional(),
-    use_cases: z.array(z.string()).optional().default([]) // Enhanced: use cases
+    use_cases: z.array(z.string()).optional().default([]) 
  })).optional().default([]),
  scenarios: z.array(z.object({
    id: z.string(),
    icon: z.string(),
    friendly_name: z.string(),
-    description: z.string().optional(), // Enhanced: scenario descriptions
-    typical_phases: z.array(z.string()).optional().default([]), // Enhanced: typical phases
-    complexity: z.enum(['low', 'medium', 'high']).optional() // Enhanced: complexity indicator
+    description: z.string().optional(), 
+    typical_phases: z.array(z.string()).optional().default([]), 
+    complexity: z.enum(['low', 'medium', 'high']).optional() 
  })).optional().default([]),
-  // Enhanced: Skill level definitions for better AI understanding
  skill_levels: z.object({
    novice: z.string().optional(),
    beginner: z.string().optional(), 
--- a/src/utils/embeddings.ts
+++ b/src/utils/embeddings.ts
@@ -191,6 +191,12 @@ class EmbeddingsService {
    await this.saveEmbeddings(version);
  }

+  public async embedText(text: string): Promise<number[]> {
+    // Re‑use the private batch helper to avoid auth duplication
+    const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
+    return embedding;
+  }
+
  private cosineSimilarity(a: number[], b: number[]): number {
    let dotProduct = 0;
    let normA = 0;
@@ -246,6 +252,8 @@ class EmbeddingsService {
  }
 }

+
+
 // Global instance
 const embeddingsService = new EmbeddingsService();

--- a/src/utils/vectorIndex.ts
+++ b/src/utils/vectorIndex.ts
@@ -0,0 +1,45 @@
+import { embeddingsService, type EmbeddingData } from "./embeddings.js";
+// Fix for CommonJS module import in ESM environment
+import pkg from "hnswlib-node";
+const { HierarchicalNSW } = pkg;
+
+export interface SimilarItem extends EmbeddingData {
+  similarity: number; // 1 = identical, 0 = orthogonal
+}
+
+class VectorIndex {
+  private index: InstanceType<typeof HierarchicalNSW> | null = null;
+  private idToItem: SimilarItem[] = [];
+  private readonly dim = 1024; // MistralAI embedding dimensionality
+
+  /** Build HNSW index once (idempotent) */
+  private async build(): Promise<void> {
+    if (this.index) return;
+
+    await embeddingsService.initialize();
+    const catalogue = (embeddingsService as any).embeddings as EmbeddingData[];
+
+    this.index = new HierarchicalNSW("cosine", this.dim);
+    this.index.initIndex(catalogue.length);
+
+    catalogue.forEach((item, id) => {
+      this.index!.addPoint(item.embedding, id);
+      this.idToItem[id] = { ...item, similarity: 0 } as SimilarItem;
+    });
+  }
+
+  /** Returns the K most similar catalogue items to an ad‑hoc query string. */
+  async findSimilar(text: string, k = 40): Promise<SimilarItem[]> {
+    await this.build();
+
+    const queryEmb = await embeddingsService.embedText(text.toLowerCase());
+    const { neighbors, distances } = this.index!.searchKnn(queryEmb, k);
+
+    return neighbors.map((id: number, i: number) => ({
+      ...this.idToItem[id],
+      similarity: 1 - distances[i], // cosine distance → similarity
+    }));
+  }
+}
+
+export const vectorIndex = new VectorIndex();