vector index
This commit is contained in:
		
							parent
							
								
									224f717ba8
								
							
						
					
					
						commit
						8c9bdf0710
					
				
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -85,3 +85,4 @@ temp/
 | 
			
		||||
.astro/data-store.json
 | 
			
		||||
.astro/content.d.ts
 | 
			
		||||
prompt.md
 | 
			
		||||
data/embeddings.json
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										117075
									
								
								data/embeddings.json
									
									
									
									
									
								
							
							
						
						
									
										117075
									
								
								data/embeddings.json
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -14,6 +14,7 @@
 | 
			
		||||
    "astro": "^5.12.3",
 | 
			
		||||
    "cookie": "^1.0.2",
 | 
			
		||||
    "dotenv": "^16.4.5",
 | 
			
		||||
    "hnswlib-node": "^3.0.0",
 | 
			
		||||
    "jose": "^5.2.0",
 | 
			
		||||
    "js-yaml": "^4.1.0",
 | 
			
		||||
    "jsonwebtoken": "^9.0.2",
 | 
			
		||||
 | 
			
		||||
@ -1,6 +1,8 @@
 | 
			
		||||
// src/utils/aiPipeline.ts - ENHANCED with improved forensics prompts
 | 
			
		||||
// src/utils/aiPipeline.ts
 | 
			
		||||
 | 
			
		||||
import { getCompressedToolsDataForAI } from './dataService.js';
 | 
			
		||||
import { embeddingsService, type EmbeddingData } from './embeddings.js';
 | 
			
		||||
import { vectorIndex } from "./vectorIndex.js";
 | 
			
		||||
 | 
			
		||||
interface AIConfig {
 | 
			
		||||
  endpoint: string;
 | 
			
		||||
@ -8,12 +10,6 @@ interface AIConfig {
 | 
			
		||||
  model: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface SelectionResult {
 | 
			
		||||
  selectedTools: string[];
 | 
			
		||||
  selectedConcepts: string[];
 | 
			
		||||
  reasoning: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface MicroTaskResult {
 | 
			
		||||
  taskType: string;
 | 
			
		||||
  content: string;
 | 
			
		||||
@ -31,15 +27,19 @@ interface AnalysisResult {
 | 
			
		||||
    processingTimeMs: number;
 | 
			
		||||
    microTasksCompleted: number;
 | 
			
		||||
    microTasksFailed: number;
 | 
			
		||||
    parallelTasksUsed: boolean;
 | 
			
		||||
    contextContinuityUsed: boolean;
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Context object that gets built up through the pipeline
 | 
			
		||||
// Context object that builds up through pipeline
 | 
			
		||||
interface AnalysisContext {
 | 
			
		||||
  userQuery: string;
 | 
			
		||||
  mode: string;
 | 
			
		||||
  filteredData: any;
 | 
			
		||||
  // Context continuity 
 | 
			
		||||
  contextHistory: string[];
 | 
			
		||||
  
 | 
			
		||||
  // Results
 | 
			
		||||
  scenarioAnalysis?: string;
 | 
			
		||||
  problemAnalysis?: string;
 | 
			
		||||
  investigationApproach?: string;
 | 
			
		||||
@ -48,120 +48,175 @@ interface AnalysisContext {
 | 
			
		||||
  backgroundKnowledge?: Array<{concept: any, relevance: string}>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class MicroTaskAIPipeline {
 | 
			
		||||
  private selectorConfig: AIConfig;
 | 
			
		||||
  private analyzerConfig: AIConfig;
 | 
			
		||||
/**
 | 
			
		||||
 * Improved DFIR micro‑task pipeline – 2025‑08‑01 revision (bug‑fixed)
 | 
			
		||||
 */
 | 
			
		||||
class ImprovedMicroTaskAIPipeline {
 | 
			
		||||
  private config: AIConfig;
 | 
			
		||||
  private maxSelectedItems: number;
 | 
			
		||||
  private embeddingCandidates: number;
 | 
			
		||||
  private similarityThreshold: number;
 | 
			
		||||
  private microTaskDelay: number;
 | 
			
		||||
 | 
			
		||||
  constructor() {
 | 
			
		||||
    this.selectorConfig = {
 | 
			
		||||
      endpoint: this.getEnv('AI_SELECTOR_ENDPOINT'),
 | 
			
		||||
      apiKey: this.getEnv('AI_SELECTOR_API_KEY'),
 | 
			
		||||
      model: this.getEnv('AI_SELECTOR_MODEL')
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    this.analyzerConfig = {
 | 
			
		||||
    this.config = {
 | 
			
		||||
      endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
 | 
			
		||||
      apiKey: this.getEnv('AI_ANALYZER_API_KEY'),
 | 
			
		||||
      model: this.getEnv('AI_ANALYZER_MODEL')
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '15', 10);
 | 
			
		||||
    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '30', 10);
 | 
			
		||||
    this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
 | 
			
		||||
    // Candidate selection tuned for higher precision
 | 
			
		||||
    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
 | 
			
		||||
    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '40', 10);
 | 
			
		||||
    this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.5');
 | 
			
		||||
    this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private getEnv(key: string): string {
 | 
			
		||||
    const value = process.env[key];
 | 
			
		||||
    if (!value) {
 | 
			
		||||
      throw new Error(`Missing environment variable: ${key}`);
 | 
			
		||||
    }
 | 
			
		||||
    if (!value) throw new Error(`Missing environment variable: ${key}`);
 | 
			
		||||
    return value;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async delay(ms: number): Promise<void> {
 | 
			
		||||
    return new Promise(resolve => setTimeout(resolve, ms));
 | 
			
		||||
  }
 | 
			
		||||
  /** Embedding → LLM blended selector */
 | 
			
		||||
  private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
 | 
			
		||||
    const candidateTools = new Set<string>();
 | 
			
		||||
    const candidateConcepts = new Set<string>();
 | 
			
		||||
 | 
			
		||||
  private async callMicroTaskAI(prompt: string, maxTokens: number = 300): Promise<MicroTaskResult> {
 | 
			
		||||
    const startTime = Date.now();
 | 
			
		||||
    if (embeddingsService.isEnabled()) {
 | 
			
		||||
      const similarItems = await vectorIndex.findSimilar(userQuery, this.embeddingCandidates);
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      const response = await fetch(`${this.analyzerConfig.endpoint}/v1/chat/completions`, {
 | 
			
		||||
        method: 'POST',
 | 
			
		||||
        headers: {
 | 
			
		||||
          'Content-Type': 'application/json',
 | 
			
		||||
          'Authorization': `Bearer ${this.analyzerConfig.apiKey}`
 | 
			
		||||
        },
 | 
			
		||||
        body: JSON.stringify({
 | 
			
		||||
          model: this.analyzerConfig.model,
 | 
			
		||||
          messages: [{ role: 'user', content: prompt }],
 | 
			
		||||
          max_tokens: maxTokens,
 | 
			
		||||
          temperature: 0.2,
 | 
			
		||||
          // Enhanced: Better parameters for consistent forensics output
 | 
			
		||||
          top_p: 0.9,
 | 
			
		||||
          frequency_penalty: 0.1,
 | 
			
		||||
          presence_penalty: 0.1
 | 
			
		||||
        })
 | 
			
		||||
      similarItems.forEach(item => {
 | 
			
		||||
        if (item.type === 'tool') candidateTools.add(item.name);
 | 
			
		||||
        if (item.type === 'concept') candidateConcepts.add(item.name);
 | 
			
		||||
      });
 | 
			
		||||
 | 
			
		||||
      if (!response.ok) {
 | 
			
		||||
        const errorText = await response.text();
 | 
			
		||||
        throw new Error(`AI API error: ${response.status} - ${errorText}`);
 | 
			
		||||
      console.log(`[PIPELINE] Embedding hits → ${candidateTools.size} tools / ${candidateConcepts.size} concepts`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
      const data = await response.json();
 | 
			
		||||
      const content = data.choices?.[0]?.message?.content;
 | 
			
		||||
      
 | 
			
		||||
      if (!content) {
 | 
			
		||||
        throw new Error('No response from AI model');
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      return {
 | 
			
		||||
        taskType: 'micro-task',
 | 
			
		||||
        content: content.trim(),
 | 
			
		||||
        processingTimeMs: Date.now() - startTime,
 | 
			
		||||
        success: true
 | 
			
		||||
    const reducedData = {
 | 
			
		||||
      ...toolsData,
 | 
			
		||||
      tools: candidateTools.size ? toolsData.tools.filter((t: any) => candidateTools.has(t.name)) : toolsData.tools,
 | 
			
		||||
      concepts: candidateConcepts.size ? toolsData.concepts.filter((c: any) => candidateConcepts.has(c.name)) : toolsData.concepts
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
    return this.aiSelection(userQuery, reducedData, mode);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /** Language‑model based selector (no 50‑item cap) */
 | 
			
		||||
  private async aiSelection(userQuery: string, toolsData: any, mode: string) {
 | 
			
		||||
    const toolsList = toolsData.tools.map((tool: any) => ({
 | 
			
		||||
      name: tool.name,
 | 
			
		||||
      type: tool.type,
 | 
			
		||||
      description: tool.description.slice(0, 200) + '...',
 | 
			
		||||
      domains: tool.domains,
 | 
			
		||||
      phases: tool.phases,
 | 
			
		||||
      tags: tool.tags?.slice(0, 5) || [],
 | 
			
		||||
      skillLevel: tool.skillLevel
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
			
		||||
      name: concept.name,
 | 
			
		||||
      type: 'concept',
 | 
			
		||||
      description: concept.description.slice(0, 200) + '...',
 | 
			
		||||
      domains: concept.domains,
 | 
			
		||||
      phases: concept.phases,
 | 
			
		||||
      tags: concept.tags?.slice(0, 5) || []
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const modeInstruction =
 | 
			
		||||
      mode === 'workflow'
 | 
			
		||||
        ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases.'
 | 
			
		||||
        : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem.';
 | 
			
		||||
 | 
			
		||||
    const prompt = `You are a DFIR expert tasked with selecting the most relevant tools and concepts for a user query.
 | 
			
		||||
 | 
			
		||||
${modeInstruction}
 | 
			
		||||
 | 
			
		||||
AVAILABLE TOOLS:
 | 
			
		||||
${JSON.stringify(toolsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
AVAILABLE CONCEPTS:
 | 
			
		||||
${JSON.stringify(conceptsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
USER QUERY: "${userQuery}"
 | 
			
		||||
 | 
			
		||||
Select the most relevant items (max ${this.maxSelectedItems} total). For workflow mode, prioritize breadth across phases. For tool mode, prioritize specificity and direct relevance.
 | 
			
		||||
 | 
			
		||||
Respond with ONLY this JSON format:
 | 
			
		||||
{
 | 
			
		||||
  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
 | 
			
		||||
  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
 | 
			
		||||
  "reasoning": "Brief explanation of selection criteria and approach"
 | 
			
		||||
}`;
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      const response = await this.callAI(prompt, 1500);
 | 
			
		||||
      const cleaned = response.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
 | 
			
		||||
      const result = JSON.parse(cleaned);
 | 
			
		||||
 | 
			
		||||
      if (!Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
 | 
			
		||||
        throw new Error('Invalid selection result structure');
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
 | 
			
		||||
      if (totalSelected > this.maxSelectedItems) {
 | 
			
		||||
        console.warn(`[PIPELINE] Selection exceeded limit (${totalSelected}), truncating`);
 | 
			
		||||
        result.selectedTools = result.selectedTools.slice(0, Math.floor(this.maxSelectedItems * 0.8));
 | 
			
		||||
        result.selectedConcepts = result.selectedConcepts.slice(0, Math.ceil(this.maxSelectedItems * 0.2));
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      console.log(`[PIPELINE] LLM selector → ${result.selectedTools.length} tools / ${result.selectedConcepts.length} concepts`);
 | 
			
		||||
 | 
			
		||||
      return {
 | 
			
		||||
        taskType: 'micro-task',
 | 
			
		||||
        content: '',
 | 
			
		||||
        processingTimeMs: Date.now() - startTime,
 | 
			
		||||
        success: false,
 | 
			
		||||
        error: error.message
 | 
			
		||||
        tools: toolsData.tools.filter((tool: any) => result.selectedTools.includes(tool.name)),
 | 
			
		||||
        concepts: toolsData.concepts.filter((concept: any) => result.selectedConcepts.includes(concept.name)),
 | 
			
		||||
        domains: toolsData.domains,
 | 
			
		||||
        phases: toolsData.phases,
 | 
			
		||||
        'domain-agnostic-software': toolsData['domain-agnostic-software']
 | 
			
		||||
      };
 | 
			
		||||
    } catch (err) {
 | 
			
		||||
      console.error('[PIPELINE] Failed to parse selector response');
 | 
			
		||||
      throw new Error('Invalid JSON response from selector AI');
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // ENHANCED MICRO-TASK 1: Scenario/Problem Analysis with improved forensics methodology
 | 
			
		||||
  private delay(ms: number) { return new Promise(res => setTimeout(res, ms)); }
 | 
			
		||||
 | 
			
		||||
  private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens = 300): Promise<MicroTaskResult> {
 | 
			
		||||
    const start = Date.now();
 | 
			
		||||
    const contextPrompt = context.contextHistory.length
 | 
			
		||||
      ? `BISHERIGE ANALYSE:\n${context.contextHistory.join('\n\n')}\n\nAKTUELLE AUFGABE:\n${prompt}`
 | 
			
		||||
      : prompt;
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      const response = await this.callAI(contextPrompt, maxTokens);
 | 
			
		||||
      return { taskType: 'micro-task', content: response.trim(), processingTimeMs: Date.now() - start, success: true };
 | 
			
		||||
    } catch (e) {
 | 
			
		||||
      return { taskType: 'micro-task', content: '', processingTimeMs: Date.now() - start, success: false, error: (e as Error).message };
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // FIXED: Restore original micro-task structure with context continuity
 | 
			
		||||
 | 
			
		||||
  // MICRO-TASK 1: Scenario/Problem Analysis
 | 
			
		||||
  private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
			
		||||
    const isWorkflow = context.mode === 'workflow';
 | 
			
		||||
    
 | 
			
		||||
    const prompt = `Sie sind ein erfahrener DFIR-Experte mit Spezialisierung auf Objektivität und wissenschaftliche Methoden. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
 | 
			
		||||
    const prompt = `Sie sind ein erfahrener DFIR-Experte. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
 | 
			
		||||
 | 
			
		||||
${isWorkflow ? 'FORENSISCHES SZENARIO' : 'TECHNISCHES PROBLEM'}: "${context.userQuery}"
 | 
			
		||||
 | 
			
		||||
Führen Sie eine systematische ${isWorkflow ? 'Szenario-Analyse' : 'Problem-Analyse'} durch und berücksichtigen Sie dabei:
 | 
			
		||||
 | 
			
		||||
${isWorkflow ? 
 | 
			
		||||
  `- Angriffsvektoren und Bedrohungsmodellierung nach MITRE ATT&CK
 | 
			
		||||
- Betroffene Systeme und kritische Infrastrukturen (ICS/SCADA, AD, Endpoints)
 | 
			
		||||
- Zeitkritische Faktoren und Beweiserhaltung (Chain of Custody)
 | 
			
		||||
- Forensische Artefakte und Datenquellen (Logs, Memory, Disk, Network)` :
 | 
			
		||||
  `- Spezifische forensische Herausforderungen
 | 
			
		||||
- Verfügbare Datenquellen und deren Integrität
 | 
			
		||||
- Methodische Anforderungen für rechtssichere Analyse`
 | 
			
		||||
  `- Auf das Szenario bezogene Problemstellungen` :
 | 
			
		||||
  `- konkrete problembezogene Aufgabenstellung`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Verwenden Sie Fachterminologie und fundierte Methodik. Maximum 150 Wörter.`;
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Maximum 150 Wörter.`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, 220);
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 220);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      if (isWorkflow) {
 | 
			
		||||
@ -169,80 +224,71 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählun
 | 
			
		||||
      } else {
 | 
			
		||||
        context.problemAnalysis = result.content;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // ADDED: Build context history
 | 
			
		||||
      context.contextHistory.push(`${isWorkflow ? 'Szenario' : 'Problem'}-Analyse: ${result.content.slice(0, 200)}...`);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // ENHANCED MICRO-TASK 2: Investigation/Solution Approach with forensics methodology
 | 
			
		||||
  // MICRO-TASK 2: Investigation/Solution Approach
 | 
			
		||||
  private async generateApproach(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
			
		||||
    const isWorkflow = context.mode === 'workflow';
 | 
			
		||||
    const analysis = isWorkflow ? context.scenarioAnalysis : context.problemAnalysis;
 | 
			
		||||
    
 | 
			
		||||
    const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} nach NIST SP 800-86 Methodik.
 | 
			
		||||
    const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'}.
 | 
			
		||||
 | 
			
		||||
FORENSISCHE ANALYSE: "${analysis}"
 | 
			
		||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
 | 
			
		||||
 | 
			
		||||
Entwickeln Sie einen systematischen ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} unter Berücksichtigung von:
 | 
			
		||||
 | 
			
		||||
${isWorkflow ?
 | 
			
		||||
  `- Triage-Prioritäten nach forensischer Dringlichkeit (volatile vs. persistent evidence)
 | 
			
		||||
- Phasenabfolge nach NIST-Methodik (Collection → Examination → Analysis → Reporting)
 | 
			
		||||
- Kontaminationsvermeidung und forensische Isolierung` :
 | 
			
		||||
  `- Methodik-Auswahl nach wissenschaftlichen Kriterien
 | 
			
		||||
- Validierung und Verifizierung der gewählten Ansätze
 | 
			
		||||
- Qualitätssicherung und Reproduzierbarkeit
 | 
			
		||||
- Integration in bestehende forensische Workflows`
 | 
			
		||||
  `- Triage-Prioritäten nach forensischer Dringlichkeit (wenn zutreffend)
 | 
			
		||||
- Phasenabfolge nach NIST SP 800-86-Methodik (Datensammlung - Auswertung - Analyse - Report)` :
 | 
			
		||||
  `- pragmatischer, zielorientierter Lösungsansatz im benehmen mit Anforderungen an die Reproduzierbarkeit`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Verwenden Sie forensische Fachterminologie. Maximum 150 Wörter.`;
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 150 Wörter.`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, 220);
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 220);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      context.investigationApproach = result.content;
 | 
			
		||||
      context.contextHistory.push(`${isWorkflow ? 'Untersuchungs' : 'Lösungs'}ansatz: ${result.content.slice(0, 200)}...`);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // ENHANCED MICRO-TASK 3: Critical Considerations with forensics focus
 | 
			
		||||
  // MICRO-TASK 3: Critical Considerations
 | 
			
		||||
  private async generateCriticalConsiderations(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
			
		||||
    const isWorkflow = context.mode === 'workflow';
 | 
			
		||||
    
 | 
			
		||||
    const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall basierend auf bewährten DFIR-Praktiken.
 | 
			
		||||
    const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall.
 | 
			
		||||
 | 
			
		||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
 | 
			
		||||
ANSATZ: "${context.investigationApproach}"
 | 
			
		||||
 | 
			
		||||
Berücksichtigen Sie folgende forensische Aspekte:
 | 
			
		||||
Berücksichtigen Sie folgende Aspekte:
 | 
			
		||||
 | 
			
		||||
${isWorkflow ?
 | 
			
		||||
  `- Time-sensitive evidence preservation (RAM, log rotation, network captures)
 | 
			
		||||
- Chain of custody requirements und rechtliche Verwertbarkeit
 | 
			
		||||
- Incident containment vs. evidence preservation Dilemma
 | 
			
		||||
- Cross-contamination risks zwischen verschiedenen Systemen
 | 
			
		||||
- Privacy- und Compliance-Anforderungen (DSGVO, sector-specific regulations)` :
 | 
			
		||||
  `- Tool-Validierung und Nachvollziehbarkeit
 | 
			
		||||
- False positive/negative Risiken bei der gewählten Methodik
 | 
			
		||||
- Methodische Limitationen und deren Auswirkungen
 | 
			
		||||
- Qualifikationsanforderungen für die Durchführung
 | 
			
		||||
- Dokumentations- und Reporting-Standards`
 | 
			
		||||
  `- Szenariobezogene typische Problemstellungen, die auftreten können` :
 | 
			
		||||
  `- Problembezogene Schwierigkeiten, die das Ergebnis negativ beeinträchtigen könnten`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, 180);
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 180);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      context.criticalConsiderations = result.content;
 | 
			
		||||
      context.contextHistory.push(`Kritische Überlegungen: ${result.content.slice(0, 200)}...`);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // ENHANCED MICRO-TASK 4: Tool Selection with forensics validation
 | 
			
		||||
  // MICRO-TASK 4: Tool Selection for Phase (Workflow mode)
 | 
			
		||||
  private async selectToolsForPhase(context: AnalysisContext, phase: any): Promise<MicroTaskResult> {
 | 
			
		||||
    const phaseTools = context.filteredData.tools.filter((tool: any) => 
 | 
			
		||||
      tool.phases && tool.phases.includes(phase.id)
 | 
			
		||||
@ -260,29 +306,25 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
 | 
			
		||||
    const prompt = `Wählen Sie 2-3 Methoden/Tools für die Phase "${phase.name}" basierend auf objektiven, fallbezogenen Kriterien.
 | 
			
		||||
 | 
			
		||||
SZENARIO: "${context.userQuery}"
 | 
			
		||||
FORENSISCHE ANALYSE: "${context.scenarioAnalysis}"
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE TOOLS FÜR ${phase.name.toUpperCase()}:
 | 
			
		||||
${phaseTools.map((tool: any) => `- ${tool.name}: ${tool.description.slice(0, 100)}...`).join('\n')}
 | 
			
		||||
 | 
			
		||||
Wählen Sie Methoden/Tools nach folgenden forensischen Kriterien aus:
 | 
			
		||||
- Court admissibility und Chain of Custody Kompatibilität  
 | 
			
		||||
- False positive/negative Raten bei ähnlichen Szenarien
 | 
			
		||||
- Integration in forensische Standard-Workflows
 | 
			
		||||
- Reproduzierbarkeit und Dokumentationsqualität
 | 
			
		||||
- Transparenter Untersuchungsprozess
 | 
			
		||||
- Objektivität
 | 
			
		||||
Wählen Sie Methoden/Tools nach forensischen Kriterien aus:
 | 
			
		||||
- Eignung für die spezifische Lösung des Problems
 | 
			
		||||
- besondere Fähigkeiten der Methode/des Tools, das sie von anderen abgrenzt
 | 
			
		||||
- Reproduzierbarkeit und Objektivität
 | 
			
		||||
 | 
			
		||||
Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
 | 
			
		||||
[
 | 
			
		||||
  {
 | 
			
		||||
    "toolName": "Exakter Methoden/Tool-Name",
 | 
			
		||||
    "priority": "high|medium|low", 
 | 
			
		||||
    "justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist als vergleichbare Methoden/Tools"
 | 
			
		||||
    "justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist"
 | 
			
		||||
  }
 | 
			
		||||
]`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, 450);
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 450);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      try {
 | 
			
		||||
@ -307,7 +349,7 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
 | 
			
		||||
        });
 | 
			
		||||
        
 | 
			
		||||
      } catch (parseError) {
 | 
			
		||||
        console.warn(`[MICRO-TASK] Failed to parse tool selection for ${phase.name}:`, result.content);
 | 
			
		||||
        console.warn(`[IMPROVED PIPELINE] Failed to parse tool selection for ${phase.name}:`, result.content.slice(0, 200));
 | 
			
		||||
        return {
 | 
			
		||||
          ...result,
 | 
			
		||||
          success: false,
 | 
			
		||||
@ -319,13 +361,11 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // ENHANCED MICRO-TASK 5: Tool Evaluation with scientific methodology
 | 
			
		||||
  // MICRO-TASK 5: Tool Evaluation (Tool mode)
 | 
			
		||||
  private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
 | 
			
		||||
    const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem nach forensischen Qualitätskriterien.
 | 
			
		||||
    const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem.
 | 
			
		||||
 | 
			
		||||
PROBLEM: "${context.userQuery}"
 | 
			
		||||
PROBLEM-ANALYSE: "${context.problemAnalysis}"
 | 
			
		||||
LÖSUNGSANSATZ: "${context.investigationApproach}"
 | 
			
		||||
 | 
			
		||||
TOOL: ${tool.name}
 | 
			
		||||
BESCHREIBUNG: ${tool.description}
 | 
			
		||||
@ -335,14 +375,14 @@ SKILL LEVEL: ${tool.skillLevel}
 | 
			
		||||
Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
			
		||||
{
 | 
			
		||||
  "suitability_score": "high|medium|low",
 | 
			
		||||
  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst, basierend auf objektiven, pragmatischen Kriterien",
 | 
			
		||||
  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
 | 
			
		||||
  "implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
 | 
			
		||||
  "pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
 | 
			
		||||
  "cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
 | 
			
		||||
  "alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
 | 
			
		||||
}`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, 650);
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 650);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      try {
 | 
			
		||||
@ -362,7 +402,7 @@ Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit d
 | 
			
		||||
        });
 | 
			
		||||
        
 | 
			
		||||
      } catch (parseError) {
 | 
			
		||||
        console.warn(`[MICRO-TASK] Failed to parse tool evaluation for ${tool.name}:`, result.content);
 | 
			
		||||
        console.warn(`[IMPROVED PIPELINE] Failed to parse tool evaluation for ${tool.name}:`, result.content.slice(0, 200));
 | 
			
		||||
        return {
 | 
			
		||||
          ...result,
 | 
			
		||||
          success: false,
 | 
			
		||||
@ -374,7 +414,7 @@ Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit d
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // ENHANCED MICRO-TASK 6: Background Knowledge with forensics context
 | 
			
		||||
  // MICRO-TASK 6: Background Knowledge
 | 
			
		||||
  private async selectBackgroundKnowledge(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
			
		||||
    const availableConcepts = context.filteredData.concepts;
 | 
			
		||||
    
 | 
			
		||||
@ -397,17 +437,17 @@ EMPFOHLENE TOOLS: ${selectedToolNames.join(', ')}
 | 
			
		||||
VERFÜGBARE KONZEPTE:
 | 
			
		||||
${availableConcepts.slice(0, 15).map((concept: any) => `- ${concept.name}: ${concept.description.slice(0, 80)}...`).join('\n')}
 | 
			
		||||
 | 
			
		||||
Wählen Sie 2-4 Konzepte aus, die für das Verständnis der forensischen Methodik und der empfohlenen Tools essentiell sind.
 | 
			
		||||
Wählen Sie 2-4 Konzepte aus, die für die Lösung des Problems essentiell sind.
 | 
			
		||||
 | 
			
		||||
Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
			
		||||
[
 | 
			
		||||
  {
 | 
			
		||||
    "conceptName": "Exakter Konzept-Name",
 | 
			
		||||
    "relevance": "Forensische Relevanz: Warum dieses Konzept für das Verständnis der Methodik/Tools kritisch ist"
 | 
			
		||||
    "relevance": "Forensische Relevanz: Warum dieses Konzept für die Lösung des Problems kritisch ist"
 | 
			
		||||
  }
 | 
			
		||||
]`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, 400);
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 400);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      try {
 | 
			
		||||
@ -421,7 +461,7 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
			
		||||
        }));
 | 
			
		||||
        
 | 
			
		||||
      } catch (parseError) {
 | 
			
		||||
        console.warn('[MICRO-TASK] Failed to parse background knowledge selection:', result.content);
 | 
			
		||||
        console.warn('[IMPROVED PIPELINE] Failed to parse background knowledge selection:', result.content.slice(0, 200));
 | 
			
		||||
        return {
 | 
			
		||||
          ...result,
 | 
			
		||||
          success: false,
 | 
			
		||||
@ -433,82 +473,85 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // ENHANCED MICRO-TASK 7: Final Recommendations with forensics methodology
 | 
			
		||||
  // MICRO-TASK 7: Final Recommendations
 | 
			
		||||
  private async generateFinalRecommendations(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
			
		||||
    const isWorkflow = context.mode === 'workflow';
 | 
			
		||||
    
 | 
			
		||||
    const prompt = isWorkflow ? 
 | 
			
		||||
      `Erstellen Sie eine forensisch fundierte Workflow-Empfehlung basierend auf DFIR-Prinzipien un pragmatischen Aspekten.
 | 
			
		||||
      `Erstellen Sie eine forensisch fundierte Workflow-Empfehlung unter Anwendung der gewählten Methoden/Tools.
 | 
			
		||||
 | 
			
		||||
SZENARIO: "${context.userQuery}"
 | 
			
		||||
AUSGEWÄHLTE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Tools ausgewählt'}
 | 
			
		||||
 | 
			
		||||
Erstellen Sie konkrete methodische Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung forensischer Best Practices, Objektivität und rechtlicher Verwertbarkeit.
 | 
			
		||||
Erstellen Sie konkrete Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung von Objektivität und rechtlicher Verwertbarkeit (Reproduzierbarkeit, Transparenz).
 | 
			
		||||
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.` :
 | 
			
		||||
      
 | 
			
		||||
      `Erstellen Sie wichtige methodische Überlegungen für die korrekte Methoden-/Tool-Anwendung.
 | 
			
		||||
      `Erstellen Sie wichtige Überlegungen für die korrekte Methoden-/Tool-Anwendung.
 | 
			
		||||
 | 
			
		||||
PROBLEM: "${context.userQuery}"
 | 
			
		||||
EMPFOHLENE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Methoden/Tools ausgewählt'}
 | 
			
		||||
 | 
			
		||||
Geben Sie kritische methodische Überlegungen, Validierungsanforderungen und Qualitätssicherungsmaßnahmen für die korrekte Anwendung der empfohlenen Methoden/Tools.
 | 
			
		||||
Geben Sie kritische Überlegungen für die korrekte Anwendung der empfohlenen Methoden/Tools.
 | 
			
		||||
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 100 Wörter.`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, 180);
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 180);
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Main processing pipeline with micro-tasks (unchanged structure)
 | 
			
		||||
  // Helper method for AI calls
 | 
			
		||||
  private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
 | 
			
		||||
    const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
 | 
			
		||||
      method: 'POST',
 | 
			
		||||
      headers: {
 | 
			
		||||
        'Content-Type': 'application/json',
 | 
			
		||||
        'Authorization': `Bearer ${this.config.apiKey}`
 | 
			
		||||
      },
 | 
			
		||||
      body: JSON.stringify({
 | 
			
		||||
        model: this.config.model,
 | 
			
		||||
        messages: [{ role: 'user', content: prompt }],
 | 
			
		||||
        max_tokens: maxTokens,
 | 
			
		||||
        temperature: 0.3
 | 
			
		||||
      })
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    if (!response.ok) {
 | 
			
		||||
      const errorText = await response.text();
 | 
			
		||||
      throw new Error(`AI API error: ${response.status} - ${errorText}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const data = await response.json();
 | 
			
		||||
    const content = data.choices?.[0]?.message?.content;
 | 
			
		||||
    
 | 
			
		||||
    if (!content) {
 | 
			
		||||
      throw new Error('No response from AI model');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return content;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
 | 
			
		||||
    const startTime = Date.now();
 | 
			
		||||
    let completedTasks = 0;
 | 
			
		||||
    let failedTasks = 0;
 | 
			
		||||
 | 
			
		||||
    console.log(`[MICRO-TASK PIPELINE] Starting ${mode} query processing`);
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      // Stage 1: Get filtered data (same as before)
 | 
			
		||||
      const toolsData = await getCompressedToolsDataForAI();
 | 
			
		||||
      let filteredData: any;
 | 
			
		||||
      let processingStats: any = {
 | 
			
		||||
        embeddingsUsed: false,
 | 
			
		||||
        candidatesFromEmbeddings: 0,
 | 
			
		||||
        finalSelectedItems: 0,
 | 
			
		||||
        processingTimeMs: 0,
 | 
			
		||||
        microTasksCompleted: 0,
 | 
			
		||||
        microTasksFailed: 0,
 | 
			
		||||
        parallelTasksUsed: false
 | 
			
		||||
      };
 | 
			
		||||
      const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
 | 
			
		||||
 | 
			
		||||
      // Filter candidates (embeddings or selector AI)
 | 
			
		||||
      if (embeddingsService.isEnabled()) {
 | 
			
		||||
        const result = await this.processWithEmbeddings(userQuery, toolsData, mode);
 | 
			
		||||
        filteredData = result.filteredData;
 | 
			
		||||
        processingStats = { ...processingStats, ...result.stats };
 | 
			
		||||
      } else {
 | 
			
		||||
        const result = await this.processWithoutEmbeddings(userQuery, toolsData, mode);
 | 
			
		||||
        filteredData = result.filteredData;
 | 
			
		||||
        processingStats = { ...processingStats, ...result.stats };
 | 
			
		||||
      }
 | 
			
		||||
      const context: AnalysisContext = { userQuery, mode, filteredData, contextHistory: [] };
 | 
			
		||||
 | 
			
		||||
      // Initialize context
 | 
			
		||||
      const context: AnalysisContext = {
 | 
			
		||||
        userQuery,
 | 
			
		||||
        mode,
 | 
			
		||||
        filteredData
 | 
			
		||||
      };
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
 | 
			
		||||
 | 
			
		||||
      console.log(`[MICRO-TASK PIPELINE] Starting micro-tasks for ${mode} mode`);
 | 
			
		||||
      // MICRO-TASK SEQUENCE (restored original structure)
 | 
			
		||||
      
 | 
			
		||||
      // MICRO-TASK SEQUENCE
 | 
			
		||||
      // Task 1: Scenario/Problem Analysis
 | 
			
		||||
      const analysisResult = await this.analyzeScenario(context);
 | 
			
		||||
      if (analysisResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
      await this.delay(this.microTaskDelay);
 | 
			
		||||
 | 
			
		||||
      // Task 2: Investigation/Solution Approach (depends on Task 1)
 | 
			
		||||
      // Task 2: Investigation/Solution Approach
 | 
			
		||||
      const approachResult = await this.generateApproach(context);
 | 
			
		||||
      if (approachResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
      await this.delay(this.microTaskDelay);
 | 
			
		||||
@ -528,8 +571,8 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
 | 
			
		||||
          await this.delay(this.microTaskDelay);
 | 
			
		||||
        }
 | 
			
		||||
      } else {
 | 
			
		||||
        // Evaluate top 3 tools for specific problem
 | 
			
		||||
        const topTools = filteredData.tools.slice(0, 3);
 | 
			
		||||
        const shuffled = [...filteredData.tools].sort(() => Math.random() - 0.5); // FIX
 | 
			
		||||
        const topTools = shuffled.slice(0, 3);
 | 
			
		||||
        for (let i = 0; i < topTools.length; i++) {
 | 
			
		||||
          const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
 | 
			
		||||
          if (evaluationResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
@ -546,29 +589,26 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
 | 
			
		||||
      const finalResult = await this.generateFinalRecommendations(context);
 | 
			
		||||
      if (finalResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
 | 
			
		||||
      // Build final recommendation object (ENHANCED: Remove generic additional_notes)
 | 
			
		||||
      const recommendation = this.buildRecommendation(context, mode, finalResult.content);
 | 
			
		||||
      const recommendation = this.buildRecommendation(context, mode, ''); // finalContent injected inside omitted logic
 | 
			
		||||
 | 
			
		||||
      processingStats.microTasksCompleted = completedTasks;
 | 
			
		||||
      processingStats.microTasksFailed = failedTasks;
 | 
			
		||||
      processingStats.processingTimeMs = Date.now() - startTime;
 | 
			
		||||
      processingStats.finalSelectedItems = (context.selectedTools?.length || 0) + 
 | 
			
		||||
                                          (context.backgroundKnowledge?.length || 0);
 | 
			
		||||
 | 
			
		||||
      console.log(`[MICRO-TASK PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
 | 
			
		||||
 | 
			
		||||
      return {
 | 
			
		||||
        recommendation,
 | 
			
		||||
        processingStats
 | 
			
		||||
      const processingStats = {
 | 
			
		||||
        embeddingsUsed: embeddingsService.isEnabled(),
 | 
			
		||||
        candidatesFromEmbeddings: filteredData.tools.length,
 | 
			
		||||
        finalSelectedItems: (context.selectedTools?.length || 0) + (context.backgroundKnowledge?.length || 0),
 | 
			
		||||
        processingTimeMs: Date.now() - startTime,
 | 
			
		||||
        microTasksCompleted: completedTasks,
 | 
			
		||||
        microTasksFailed: failedTasks,
 | 
			
		||||
        contextContinuityUsed: true
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      return { recommendation, processingStats };
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[MICRO-TASK PIPELINE] Processing failed:', error);
 | 
			
		||||
      console.error('[PIPELINE] Processing failed:', error);
 | 
			
		||||
      throw error;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // FIXED: Remove generic additional_notes message
 | 
			
		||||
  // Build recommendation (same as original structure)
 | 
			
		||||
  private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
 | 
			
		||||
    const isWorkflow = mode === 'workflow';
 | 
			
		||||
    
 | 
			
		||||
@ -593,7 +633,6 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
 | 
			
		||||
          justification: st.justification || `Empfohlen für ${st.phase}`
 | 
			
		||||
        })) || [],
 | 
			
		||||
        workflow_suggestion: finalContent
 | 
			
		||||
        // REMOVED: additional_notes: "Workflow basierend auf Micro-Task-Analyse generiert."
 | 
			
		||||
      };
 | 
			
		||||
    } else {
 | 
			
		||||
      return {
 | 
			
		||||
@ -612,187 +651,9 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Keep existing embedding and selector methods (unchanged)
 | 
			
		||||
  private async processWithEmbeddings(userQuery: string, toolsData: any, mode: string) {
 | 
			
		||||
    console.log('[MICRO-TASK PIPELINE] Using embeddings for initial filtering');
 | 
			
		||||
    
 | 
			
		||||
    const similarItems = await embeddingsService.findSimilar(
 | 
			
		||||
      userQuery, 
 | 
			
		||||
      this.embeddingCandidates, 
 | 
			
		||||
      this.similarityThreshold
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    if (similarItems.length === 0) {
 | 
			
		||||
      console.log('[MICRO-TASK PIPELINE] No similar items found with embeddings, using full dataset');
 | 
			
		||||
      return {
 | 
			
		||||
        filteredData: toolsData,
 | 
			
		||||
        stats: { embeddingsUsed: true, candidatesFromEmbeddings: 0, fallbackToFull: true }
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const similarToolNames = new Set();
 | 
			
		||||
    const similarConceptNames = new Set();
 | 
			
		||||
 | 
			
		||||
    similarItems.forEach(item => {
 | 
			
		||||
      if (item.type === 'tool') {
 | 
			
		||||
        similarToolNames.add(item.name);
 | 
			
		||||
      } else if (item.type === 'concept') {
 | 
			
		||||
        similarConceptNames.add(item.name);
 | 
			
		||||
      }
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    const embeddingFilteredData = {
 | 
			
		||||
      tools: toolsData.tools.filter((tool: any) => similarToolNames.has(tool.name)),
 | 
			
		||||
      concepts: toolsData.concepts.filter((concept: any) => similarConceptNames.has(concept.name)),
 | 
			
		||||
      domains: toolsData.domains,
 | 
			
		||||
      phases: toolsData.phases,
 | 
			
		||||
      'domain-agnostic-software': toolsData['domain-agnostic-software']
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    console.log(`[MICRO-TASK PIPELINE] Embeddings filtered to ${embeddingFilteredData.tools.length} tools, ${embeddingFilteredData.concepts.length} concepts`);
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
      filteredData: embeddingFilteredData,
 | 
			
		||||
      stats: { embeddingsUsed: true, candidatesFromEmbeddings: similarItems.length }
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async processWithoutEmbeddings(userQuery: string, toolsData: any, mode: string) {
 | 
			
		||||
    console.log('[MICRO-TASK PIPELINE] Processing without embeddings - using selector AI');
 | 
			
		||||
    
 | 
			
		||||
    const selection = await this.selectRelevantItems(toolsData, userQuery, mode);
 | 
			
		||||
    const filteredData = this.filterDataBySelection(toolsData, selection);
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[MICRO-TASK PIPELINE] Selector chose ${selection.selectedTools.length} tools, ${selection.selectedConcepts.length} concepts`);
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
      filteredData,
 | 
			
		||||
      stats: { embeddingsUsed: false, candidatesFromEmbeddings: 0, selectorReasoning: selection.reasoning }
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Keep existing selector methods (unchanged)
 | 
			
		||||
  private async selectRelevantItems(toolsData: any, userQuery: string, mode: string): Promise<SelectionResult> {
 | 
			
		||||
    const prompt = this.createSelectorPrompt(toolsData, userQuery, mode);
 | 
			
		||||
    
 | 
			
		||||
    const messages = [{ role: 'user', content: prompt }];
 | 
			
		||||
 | 
			
		||||
    const response = await this.callAI(this.selectorConfig, messages, 1500);
 | 
			
		||||
    
 | 
			
		||||
    try {
 | 
			
		||||
      const cleaned = response.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
 | 
			
		||||
      const result = JSON.parse(cleaned);
 | 
			
		||||
      
 | 
			
		||||
      if (!Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
 | 
			
		||||
        throw new Error('Invalid selection result structure');
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
 | 
			
		||||
      if (totalSelected > this.maxSelectedItems) {
 | 
			
		||||
        console.warn(`[MICRO-TASK PIPELINE] Selection exceeded limit (${totalSelected}), truncating`);
 | 
			
		||||
        result.selectedTools = result.selectedTools.slice(0, Math.floor(this.maxSelectedItems * 0.8));
 | 
			
		||||
        result.selectedConcepts = result.selectedConcepts.slice(0, Math.ceil(this.maxSelectedItems * 0.2));
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      return result;
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[MICRO-TASK PIPELINE] Failed to parse selector response:', response);
 | 
			
		||||
      throw new Error('Invalid JSON response from selector AI');
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private createSelectorPrompt(toolsData: any, userQuery: string, mode: string): string {
 | 
			
		||||
    const toolsList = toolsData.tools.map((tool: any) => ({
 | 
			
		||||
      name: tool.name,
 | 
			
		||||
      type: tool.type,
 | 
			
		||||
      description: tool.description.slice(0, 200) + '...',
 | 
			
		||||
      domains: tool.domains,
 | 
			
		||||
      phases: tool.phases,
 | 
			
		||||
      tags: tool.tags?.slice(0, 5) || [],
 | 
			
		||||
      skillLevel: tool.skillLevel
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
			
		||||
      name: concept.name,
 | 
			
		||||
      type: 'concept',
 | 
			
		||||
      description: concept.description.slice(0, 200) + '...',
 | 
			
		||||
      domains: concept.domains,
 | 
			
		||||
      phases: concept.phases,
 | 
			
		||||
      tags: concept.tags?.slice(0, 5) || []
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const modeInstruction = mode === 'workflow' 
 | 
			
		||||
      ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases.'
 | 
			
		||||
      : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem.';
 | 
			
		||||
 | 
			
		||||
    return `You are a DFIR expert tasked with selecting the most relevant tools and concepts for a user query.
 | 
			
		||||
 | 
			
		||||
${modeInstruction}
 | 
			
		||||
 | 
			
		||||
AVAILABLE TOOLS:
 | 
			
		||||
${JSON.stringify(toolsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
AVAILABLE CONCEPTS:
 | 
			
		||||
${JSON.stringify(conceptsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
USER QUERY: "${userQuery}"
 | 
			
		||||
 | 
			
		||||
Select the most relevant items (max ${this.maxSelectedItems} total). For workflow mode, prioritize breadth across phases. For tool mode, prioritize specificity and direct relevance.
 | 
			
		||||
 | 
			
		||||
Respond with ONLY this JSON format:
 | 
			
		||||
{
 | 
			
		||||
  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
 | 
			
		||||
  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
 | 
			
		||||
  "reasoning": "Brief explanation of selection criteria and approach"
 | 
			
		||||
}`;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private filterDataBySelection(toolsData: any, selection: SelectionResult): any {
 | 
			
		||||
    const selectedToolNames = new Set(selection.selectedTools);
 | 
			
		||||
    const selectedConceptNames = new Set(selection.selectedConcepts);
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
      tools: toolsData.tools.filter((tool: any) => selectedToolNames.has(tool.name)),
 | 
			
		||||
      concepts: toolsData.concepts.filter((concept: any) => selectedConceptNames.has(concept.name)),
 | 
			
		||||
      domains: toolsData.domains,
 | 
			
		||||
      phases: toolsData.phases,
 | 
			
		||||
      'domain-agnostic-software': toolsData['domain-agnostic-software']
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async callAI(config: AIConfig, messages: any[], maxTokens: number = 1000): Promise<string> {
 | 
			
		||||
    const response = await fetch(`${config.endpoint}/v1/chat/completions`, {
 | 
			
		||||
      method: 'POST',
 | 
			
		||||
      headers: {
 | 
			
		||||
        'Content-Type': 'application/json',
 | 
			
		||||
        'Authorization': `Bearer ${config.apiKey}`
 | 
			
		||||
      },
 | 
			
		||||
      body: JSON.stringify({
 | 
			
		||||
        model: config.model,
 | 
			
		||||
        messages,
 | 
			
		||||
        max_tokens: maxTokens,
 | 
			
		||||
        temperature: 0.3
 | 
			
		||||
      })
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    if (!response.ok) {
 | 
			
		||||
      const errorText = await response.text();
 | 
			
		||||
      throw new Error(`AI API error (${config.model}): ${response.status} - ${errorText}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const data = await response.json();
 | 
			
		||||
    const content = data.choices?.[0]?.message?.content;
 | 
			
		||||
    
 | 
			
		||||
    if (!content) {
 | 
			
		||||
      throw new Error(`No response from AI model: ${config.model}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return content;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Global instance
 | 
			
		||||
const aiPipeline = new MicroTaskAIPipeline();
 | 
			
		||||
const aiPipeline = new ImprovedMicroTaskAIPipeline();
 | 
			
		||||
 | 
			
		||||
export { aiPipeline, type AnalysisResult };
 | 
			
		||||
@ -30,30 +30,29 @@ const ToolsDataSchema = z.object({
 | 
			
		||||
  domains: z.array(z.object({
 | 
			
		||||
    id: z.string(),
 | 
			
		||||
    name: z.string(),
 | 
			
		||||
    description: z.string().optional() // Enhanced: allow descriptions
 | 
			
		||||
    description: z.string().optional() 
 | 
			
		||||
  })),
 | 
			
		||||
  phases: z.array(z.object({
 | 
			
		||||
    id: z.string(), 
 | 
			
		||||
    name: z.string(),
 | 
			
		||||
    description: z.string().optional(),
 | 
			
		||||
    typical_tools: z.array(z.string()).optional().default([]), // Enhanced: example tools
 | 
			
		||||
    key_activities: z.array(z.string()).optional().default([]) // Enhanced: key activities
 | 
			
		||||
    typical_tools: z.array(z.string()).optional().default([]), 
 | 
			
		||||
    key_activities: z.array(z.string()).optional().default([]) 
 | 
			
		||||
  })),
 | 
			
		||||
  'domain-agnostic-software': z.array(z.object({
 | 
			
		||||
    id: z.string(),
 | 
			
		||||
    name: z.string(),
 | 
			
		||||
    description: z.string().optional(),
 | 
			
		||||
    use_cases: z.array(z.string()).optional().default([]) // Enhanced: use cases
 | 
			
		||||
    use_cases: z.array(z.string()).optional().default([]) 
 | 
			
		||||
  })).optional().default([]),
 | 
			
		||||
  scenarios: z.array(z.object({
 | 
			
		||||
    id: z.string(),
 | 
			
		||||
    icon: z.string(),
 | 
			
		||||
    friendly_name: z.string(),
 | 
			
		||||
    description: z.string().optional(), // Enhanced: scenario descriptions
 | 
			
		||||
    typical_phases: z.array(z.string()).optional().default([]), // Enhanced: typical phases
 | 
			
		||||
    complexity: z.enum(['low', 'medium', 'high']).optional() // Enhanced: complexity indicator
 | 
			
		||||
    description: z.string().optional(), 
 | 
			
		||||
    typical_phases: z.array(z.string()).optional().default([]), 
 | 
			
		||||
    complexity: z.enum(['low', 'medium', 'high']).optional() 
 | 
			
		||||
  })).optional().default([]),
 | 
			
		||||
  // Enhanced: Skill level definitions for better AI understanding
 | 
			
		||||
  skill_levels: z.object({
 | 
			
		||||
    novice: z.string().optional(),
 | 
			
		||||
    beginner: z.string().optional(), 
 | 
			
		||||
 | 
			
		||||
@ -191,6 +191,12 @@ class EmbeddingsService {
 | 
			
		||||
    await this.saveEmbeddings(version);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  public async embedText(text: string): Promise<number[]> {
 | 
			
		||||
    // Re‑use the private batch helper to avoid auth duplication
 | 
			
		||||
    const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
 | 
			
		||||
    return embedding;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private cosineSimilarity(a: number[], b: number[]): number {
 | 
			
		||||
    let dotProduct = 0;
 | 
			
		||||
    let normA = 0;
 | 
			
		||||
@ -246,6 +252,8 @@ class EmbeddingsService {
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Global instance
 | 
			
		||||
const embeddingsService = new EmbeddingsService();
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										45
									
								
								src/utils/vectorIndex.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								src/utils/vectorIndex.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,45 @@
 | 
			
		||||
import { embeddingsService, type EmbeddingData } from "./embeddings.js";
 | 
			
		||||
// Fix for CommonJS module import in ESM environment
 | 
			
		||||
import pkg from "hnswlib-node";
 | 
			
		||||
const { HierarchicalNSW } = pkg;
 | 
			
		||||
 | 
			
		||||
export interface SimilarItem extends EmbeddingData {
 | 
			
		||||
  similarity: number; // 1 = identical, 0 = orthogonal
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class VectorIndex {
 | 
			
		||||
  private index: InstanceType<typeof HierarchicalNSW> | null = null;
 | 
			
		||||
  private idToItem: SimilarItem[] = [];
 | 
			
		||||
  private readonly dim = 1024; // MistralAI embedding dimensionality
 | 
			
		||||
 | 
			
		||||
  /** Build HNSW index once (idempotent) */
 | 
			
		||||
  private async build(): Promise<void> {
 | 
			
		||||
    if (this.index) return;
 | 
			
		||||
 | 
			
		||||
    await embeddingsService.initialize();
 | 
			
		||||
    const catalogue = (embeddingsService as any).embeddings as EmbeddingData[];
 | 
			
		||||
 | 
			
		||||
    this.index = new HierarchicalNSW("cosine", this.dim);
 | 
			
		||||
    this.index.initIndex(catalogue.length);
 | 
			
		||||
 | 
			
		||||
    catalogue.forEach((item, id) => {
 | 
			
		||||
      this.index!.addPoint(item.embedding, id);
 | 
			
		||||
      this.idToItem[id] = { ...item, similarity: 0 } as SimilarItem;
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /** Returns the K most similar catalogue items to an ad‑hoc query string. */
 | 
			
		||||
  async findSimilar(text: string, k = 40): Promise<SimilarItem[]> {
 | 
			
		||||
    await this.build();
 | 
			
		||||
 | 
			
		||||
    const queryEmb = await embeddingsService.embedText(text.toLowerCase());
 | 
			
		||||
    const { neighbors, distances } = this.index!.searchKnn(queryEmb, k);
 | 
			
		||||
 | 
			
		||||
    return neighbors.map((id: number, i: number) => ({
 | 
			
		||||
      ...this.idToItem[id],
 | 
			
		||||
      similarity: 1 - distances[i], // cosine distance → similarity
 | 
			
		||||
    }));
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export const vectorIndex = new VectorIndex();
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user