2025-08-17 22:59:31 +00:00
7 changed files with 149 additions and 167 deletions
--- a/.env.example
+++ b/.env.example
@@ -60,7 +60,7 @@ FORENSIC_AUDIT_MAX_ENTRIES=50

 # === AI SEMANTIC SEARCH ===
 # Enable semantic search (highly recommended for better results)
-AI_EMBEDDINGS_ENABLED=true
+REMOVE_AI_EMBEDDINGS_ENABLED=true
 AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
 AI_EMBEDDINGS_API_KEY=your-embeddings-api-key-here
 AI_EMBEDDINGS_MODEL=mistral-embed
@@ -122,8 +122,8 @@ AI_EMBEDDINGS_BATCH_SIZE=10
 AI_EMBEDDINGS_BATCH_DELAY_MS=1000

 # === Context Management ===
-AI_MAX_CONTEXT_TOKENS=4000
-AI_MAX_PROMPT_TOKENS=2500
+REMOVE_AI_MAX_CONTEXT_TOKENS=4000
+REMOVE_AI_MAX_PROMPT_TOKENS=2500

 # === Confidence Scoring ===
 CONFIDENCE_SEMANTIC_WEIGHT=0.5
--- a/src/config/prompts.ts
+++ b/src/config/prompts.ts
@@ -2,17 +2,15 @@

 export const AI_PROMPTS = {
  
-  toolSelection: (mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number) => {
+  toolSelection: (mode: string, userQuery: string, maxSelectedItems: number) => {
    const modeInstruction = mode === 'workflow' 
      ? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
      : 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';

    return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.

-AUSWAHLMETHODE: ${selectionMethod}
-${selectionMethod === 'embeddings_candidates' ? 
-  '✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe' :
-  '✓ Vollständige Datenbank verfügbar\n✓ Wähle die relevantesten Items'}
+AUSWAHLMETHODE:
+  '✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe'}

 ${modeInstruction}

@@ -309,7 +307,7 @@ Antwort: Fließtext ohne Listen, max ${isWorkflow ? '100' : '80'} Wörter.`;
  }
 } as const;

-export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number): string;
+export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, maxSelectedItems: number): string;
 export function getPrompt(key: 'toolSelectionWithData', basePrompt: string, toolsToSend: any[], conceptsToSend: any[]): string;
 export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuery: string): string;
 export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -11,7 +11,7 @@ import 'dotenv/config';

 interface PipelineConfig {
  microTaskDelay: number;
-  maxContextTokens: number;
+  //maxContextTokens: number;
  maxPromptTokens: number;
  taskRelevanceModeration: {
    maxInitialScore: number;
@@ -36,7 +36,7 @@ interface MicroTaskResult {
 interface AnalysisResult {
  recommendation: any;
  processingStats: {
-    embeddingsUsed: boolean;
+    //embeddingsUsed: boolean;
    candidatesFromEmbeddings: number;
    finalSelectedItems: number;
    processingTimeMs: number;
@@ -57,7 +57,7 @@ interface PipelineContext {
  mode: string;
  filteredData: any;
  contextHistory: string[];
-  maxContextLength: number;
+  //maxContextLength: number;
  currentContextLength: number;
  scenarioAnalysis?: string;
  problemAnalysis?: string;
@@ -91,7 +91,7 @@ class AIPipeline {
  constructor() {
    this.config = {
      microTaskDelay: parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10),
-      maxContextTokens: parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10),
+      //maxContextTokens: parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10),
      maxPromptTokens: parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10),
      taskRelevanceModeration: {
        maxInitialScore: 85,
@@ -123,7 +123,7 @@ class AIPipeline {
        mode,
        filteredData: {},
        contextHistory: [],
-        maxContextLength: this.config.maxContextTokens,
+        //maxContextLength: this.config.maxContextTokens,
        currentContextLength: 0,
        seenToolNames: new Set<string>(),
        embeddingsSimilarities: new Map<string, number>(),
@@ -138,20 +138,20 @@ class AIPipeline {
      const selectionConfidence = this.calculateToolSelectionConfidence(
        candidateData.tools.length,
        toolsData.tools.length,
-        candidateData.selectionMethod,
+        //candidateData.selectionMethod,
        candidateData.concepts.length
      );
      
      auditService.addToolSelection(
        candidateData.tools.map(t => t.name),
        toolsData.tools.map(t => t.name),
-        candidateData.selectionMethod,
+        //candidateData.selectionMethod,
        selectionConfidence,
        candidateSelectionStart,
        {
-          embeddingsUsed: embeddingsService.isEnabled(),
+          //embeddingsUsed: embeddingsService.isEnabled(),
          totalCandidatesFound: candidateData.tools.length + candidateData.concepts.length,
-          selectionMethod: candidateData.selectionMethod,
+          //selectionMethod: candidateData.selectionMethod,
          reductionRatio: candidateData.tools.length / toolsData.tools.length
        }
      );
@@ -201,7 +201,7 @@ class AIPipeline {
      const recommendation = this.buildRecommendation(context, mode, finalResult.content);

      const processingStats = {
-        embeddingsUsed: embeddingsService.isEnabled(),
+        //embeddingsUsed: embeddingsService.isEnabled(),
        candidatesFromEmbeddings: candidateData.tools.length,
        finalSelectedItems: (context.selectedTools?.length || 0) + (context.backgroundKnowledge?.length || 0),
        processingTimeMs: Date.now() - startTime,
@@ -213,7 +213,7 @@ class AIPipeline {
        aiModel: aiConfig.model,
        toolsDataHash,
        temperature: 0.3,
-        maxTokensUsed: 2500
+        maxTokensUsed: 32768
      };

      console.log('[AI-PIPELINE] Pipeline completed successfully:', {
@@ -292,7 +292,7 @@ class AIPipeline {
  private calculateToolSelectionConfidence(
    selectedCount: number,
    totalCount: number,
-    method: string,
+    //method: string,
    conceptsCount: number
  ): number {
    let confidence = 50;
@@ -307,9 +307,9 @@ class AIPipeline {
      confidence -= 15;
    }
    
-    if (method.includes('embeddings')) {
-      confidence += 15;
-    }
+    //if (method.includes('embeddings')) {
+    //confidence += 15;
+    //}
    
    if (conceptsCount > 0) {
      confidence += 10;
@@ -1280,10 +1280,12 @@ class AIPipeline {
    context.contextHistory.push(newEntry);
    context.currentContextLength += entryTokens;
    
-    while (context.currentContextLength > this.config.maxContextTokens && context.contextHistory.length > 1) {
+    /*while (context.currentContextLength > this.config.maxContextTokens && context.contextHistory.length > 1) {
+      const removed = context.contextHistory.shift()!;
+      context.currentContextLength -= aiService.estimateTokens(removed);
+    }*/
    const removed = context.contextHistory.shift()!;
    context.currentContextLength -= aiService.estimateTokens(removed);
-    }
  }

  private addToolToSelection(
--- a/src/utils/aiService.ts
+++ b/src/utils/aiService.ts
@@ -34,7 +34,7 @@ class AIService {
    };

    this.defaultOptions = {
-      maxTokens: 1500,
+      maxTokens: 32768,
      temperature: 0.3,
      timeout: 30000
    };
--- a/src/utils/auditService.ts
+++ b/src/utils/auditService.ts
@@ -26,7 +26,7 @@ export interface AuditEntry {
    completionTokens?: number;
    toolsDataHash?: string;
    embeddingsUsed?: boolean;
-    selectionMethod?: string;
+    //selectionMethod?: string;
    microTaskType?: string;
    confidenceFactors?: string[];
    reasoning?: string;
@@ -146,7 +146,7 @@ class AuditService {
  addToolSelection(
    selectedTools: string[],
    availableTools: string[],
-    selectionMethod: string,
+    //selectionMethod: string,
    confidence: number,
    startTime: number,
    metadata: Record<string, any> = {}
@@ -154,17 +154,22 @@ class AuditService {
    const calculatedConfidence = this.calculateSelectionConfidence(
      selectedTools, 
      availableTools, 
-      selectionMethod, 
+      //selectionMethod, 
      metadata
    );

+    const decisionBasis =
+      metadata.embeddingsUsed || metadata.similarityScores
+        ? 'semantic-search'
+        : (metadata.aiPrompt || metadata.microTaskType ? 'ai-analysis' : 'rule-based');
+
    this.addEntry(
      'tool-selection',
      'selection-decision',
      { 
        availableTools: availableTools.slice(0, 10),
        totalAvailable: availableTools.length,
-        selectionMethod: selectionMethod
+        //selectionMethod: selectionMethod
      },
      { 
        selectedTools: selectedTools,
@@ -174,10 +179,11 @@ class AuditService {
      startTime,
      {
        ...metadata,
-        selectionMethod,
+        //selectionMethod,
        availableToolsCount: availableTools.length,
        selectedToolsCount: selectedTools.length,
-        decisionBasis: selectionMethod.includes('embeddings') ? 'semantic-search' : 'ai-analysis'
+        //decisionBasis: selectionMethod.includes('embeddings') ? 'semantic-search' : 'ai-analysis'
+        decisionBasis
      }
    );
  }
@@ -282,7 +288,7 @@ class AuditService {
  private calculateSelectionConfidence(
    selectedTools: string[], 
    availableTools: string[], 
-    selectionMethod: string, 
+    //selectionMethod: string, 
    metadata: Record<string, any>
  ): number {
    let confidence = 50;
@@ -297,9 +303,9 @@ class AuditService {
      confidence -= 20;
    }
    
-    if (selectionMethod.includes('embeddings')) {
+    /*if (selectionMethod.includes('embeddings')) {
      confidence += 15;
-    }
+    }*/
    
    if (selectedTools.length >= 5 && selectedTools.length <= 25) {
      confidence += 10;
@@ -589,7 +595,8 @@ class AuditService {
  }

  private inferDecisionBasis(metadata: Record<string, any>): string {
-    if (metadata.embeddingsUsed || metadata.selectionMethod?.includes('embeddings')) return 'semantic-search';
+    if (metadata.embeddingsUsed) return 'semantic-search';
+    //if (metadata.embeddingsUsed || metadata.selectionMethod?.includes('embeddings')) return 'semantic-search';
    if (metadata.aiPrompt || metadata.microTaskType) return 'ai-analysis';
    if (metadata.semanticQuery && metadata.aiReasoningUsed) return 'hybrid';
    return 'rule-based';
--- a/src/utils/embeddings.ts
+++ b/src/utils/embeddings.ts
@@ -31,7 +31,7 @@ interface EmbeddingsDatabase {
 }

 interface EmbeddingsConfig {
-  enabled: boolean;
+  //enabled: boolean;
  endpoint?: string;
  apiKey?: string;
  model?: string;
@@ -49,14 +49,14 @@ class EmbeddingsService {
  constructor() {
    this.config = this.loadConfig();
    console.log('[EMBEDDINGS-SERVICE] Initialized:', {
-      enabled: this.config.enabled,
+      //enabled: this.config.enabled,
      hasEndpoint: !!this.config.endpoint,
      hasModel: !!this.config.model
    });
  }

  private loadConfig(): EmbeddingsConfig {
-    const enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
+    //const enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
    const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
    const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
    const model = process.env.AI_EMBEDDINGS_MODEL;
@@ -64,7 +64,7 @@ class EmbeddingsService {
    const batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);

    return {
-      enabled,
+      //enabled,
      endpoint,
      apiKey,
      model,
@@ -92,10 +92,10 @@ class EmbeddingsService {
    try {
      console.log('[EMBEDDINGS-SERVICE] Starting initialization');

-      if (!this.config.enabled) {
+      /*if (!this.config.enabled) {
        console.log('[EMBEDDINGS-SERVICE] Service disabled via configuration');
        return;
-      }
+      }*/

      await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });

@@ -263,7 +263,7 @@ class EmbeddingsService {
  }

  async embedText(text: string): Promise<number[]> {
-    if (!this.isEnabled() || !this.isInitialized) {
+    if (!this.isInitialized) {
      throw new Error('Embeddings service not available');
    }
    
@@ -272,9 +272,9 @@ class EmbeddingsService {
  }

  async waitForInitialization(): Promise<void> {
-    if (!this.config.enabled) {
+    /*if (!this.config.enabled) {
      return Promise.resolve();
-    }
+    }*/

    if (this.isInitialized) {
      return Promise.resolve();
@@ -303,10 +303,10 @@ class EmbeddingsService {
  }

  async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<SimilarityResult[]> {
-    if (!this.config.enabled) {
+    /*if (!this.config.enabled) {
      console.log('[EMBEDDINGS-SERVICE] Service disabled, returning empty results');
      return [];
-    }
+    }*/

    if (!this.isInitialized || this.embeddings.length === 0) {
      console.log('[EMBEDDINGS-SERVICE] Not initialized or no embeddings available');
@@ -349,16 +349,24 @@ class EmbeddingsService {
    }
  }

-  isEnabled(): boolean {
+  /*isEnabled(): boolean {
    return this.config.enabled;
-  }
+  }*/

-  getStats(): { enabled: boolean; initialized: boolean; count: number } {
+  /*getStats(): { enabled: boolean; initialized: boolean; count: number } {
    return {
      enabled: this.config.enabled,
      initialized: this.isInitialized,
      count: this.embeddings.length
    };
+  }*/
+
+  getStats(): {initialized: boolean; count: number } {
+    return {
+      //enabled: this.config.enabled,
+      initialized: this.isInitialized,
+      count: this.embeddings.length
+    };
  }

  getConfig(): EmbeddingsConfig {
--- a/src/utils/toolSelector.ts
+++ b/src/utils/toolSelector.ts
@@ -38,7 +38,7 @@ export interface SelectionContext {
 export interface ToolSelectionResult {
  selectedTools: any[];
  selectedConcepts: any[];
-  selectionMethod: string;
+  //selectionMethod: string;
  confidence: number;
 }

@@ -84,13 +84,13 @@ class ToolSelector {
    domains: any[];
    phases: any[];
    'domain-agnostic-software': any[];
-    selectionMethod: string;
+    //selectionMethod: string;
  }> {
    console.log('[TOOL-SELECTOR] Getting intelligent candidates for query');
    
    let candidateTools: any[] = [];
    let candidateConcepts: any[] = [];
-    let selectionMethod = 'unknown';
+    //let selectionMethod = 'unknown';
    
    context.embeddingsSimilarities.clear();
    
@@ -100,7 +100,6 @@ class ToolSelector {
      console.error('[TOOL-SELECTOR] Embeddings initialization failed:', error);
    }
    
-    if (embeddingsService.isEnabled()) {
    console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
    
    const similarItems = await embeddingsService.findSimilar(
@@ -134,28 +133,23 @@ class ToolSelector {
    if (similarTools.length >= this.config.embeddingsMinTools && reductionRatio <= this.config.embeddingsMaxReductionRatio) {
      candidateTools = similarTools;
      candidateConcepts = similarConcepts;
-        selectionMethod = 'embeddings_candidates';
+      //selectionMethod = 'embeddings_candidates';
      
      console.log('[TOOL-SELECTOR] Using embeddings filtering:', totalAvailableTools, '→', similarTools.length, 'tools');
    } else {
      console.log('[TOOL-SELECTOR] Embeddings filtering insufficient, using full dataset');
      candidateTools = toolsData.tools;
      candidateConcepts = toolsData.concepts;
-        selectionMethod = 'full_dataset';
-      }
-    } else {
-      console.log('[TOOL-SELECTOR] Embeddings disabled, using full dataset');
-      candidateTools = toolsData.tools;
-      candidateConcepts = toolsData.concepts;
-      selectionMethod = 'full_dataset';
+      //selectionMethod = 'full_dataset';
    }

+
    const selection = await this.performAISelection(
      userQuery,
      candidateTools,
      candidateConcepts,
      mode,
-      selectionMethod,
+      //selectionMethod,
      context
    );
    
@@ -165,7 +159,7 @@ class ToolSelector {
      domains: toolsData.domains,
      phases: toolsData.phases,
      'domain-agnostic-software': toolsData['domain-agnostic-software'],
-      selectionMethod
+      //selectionMethod
    };
  }

@@ -174,7 +168,6 @@ class ToolSelector {
    candidateTools: any[],
    candidateConcepts: any[],
    mode: string,
-    selectionMethod: string,
    context: SelectionContext
  ): Promise<ToolSelectionResult> {
    console.log('[TOOL-SELECTOR] Performing AI selection');
@@ -188,53 +181,29 @@ class ToolSelector {
    const softwareWithFullData = candidateSoftware.map(this.createToolData);
    const conceptsWithFullData = candidateConcepts.map(this.createConceptData);

-    let toolsToSend: any[];
-    let conceptsToSend: any[];
+    // Unified selection limits (method-agnostic)
+    const maxTools = Math.min(this.config.embeddingSelectionLimit, this.config.noEmbeddingsToolLimit);
+    const maxConcepts = Math.min(this.config.embeddingConceptsLimit, this.config.noEmbeddingsConceptLimit);
+    const methodLimit = Math.ceil(maxTools * this.config.methodSelectionRatio);
+    const softwareLimit = Math.floor(maxTools * this.config.softwareSelectionRatio);

-    if (selectionMethod === 'embeddings_candidates') {
-      const totalLimit = this.config.embeddingSelectionLimit;
-      const methodLimit = Math.ceil(totalLimit * this.config.methodSelectionRatio);
-      const softwareLimit = Math.floor(totalLimit * this.config.softwareSelectionRatio);
-      
-      toolsToSend = [
+    // Build tool list to send
+    const toolsToSend: any[] = [
      ...methodsWithFullData.slice(0, methodLimit),
-        ...softwareWithFullData.slice(0, softwareLimit)
-      ];
-      
-      const remainingCapacity = totalLimit - toolsToSend.length;
-      if (remainingCapacity > 0) {
-        if (methodsWithFullData.length > methodLimit) {
-          toolsToSend.push(...methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity));
-        } else if (softwareWithFullData.length > softwareLimit) {
-          toolsToSend.push(...softwareWithFullData.slice(softwareLimit, softwareLimit + remainingCapacity));
-        }
-      }
-      
-      conceptsToSend = conceptsWithFullData.slice(0, this.config.embeddingConceptsLimit);
-    } else {
-      const maxTools = this.config.noEmbeddingsToolLimit;
-      const maxConcepts = this.config.noEmbeddingsConceptLimit;
-      const methodLimit = Math.ceil(maxTools * 0.4);
-      const softwareLimit = Math.floor(maxTools * 0.5);
-      
-      toolsToSend = [
-        ...methodsWithFullData.slice(0, methodLimit),
-        ...softwareWithFullData.slice(0, softwareLimit)
+      ...softwareWithFullData.slice(0, softwareLimit),
    ];

    const remainingCapacity = maxTools - toolsToSend.length;
    if (remainingCapacity > 0) {
-        if (methodsWithFullData.length > methodLimit) {
-          toolsToSend.push(...methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity));
-        } else if (softwareWithFullData.length > softwareLimit) {
-          toolsToSend.push(...softwareWithFullData.slice(softwareLimit, softwareLimit + remainingCapacity));
-        }
+      // Fill remainder from whichever bucket still has items
+      const extraMethods = methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity);
+      const extraSoftware = softwareWithFullData.slice(softwareLimit, softwareLimit + (remainingCapacity - extraMethods.length));
+      toolsToSend.push(...extraMethods, ...extraSoftware);
    }

-      conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
-    }
+    const conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);

-    const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.config.maxSelectedItems);
+    const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
    const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);

    aiService.validatePromptLength(prompt);
@@ -246,7 +215,7 @@ class ToolSelector {
    );

    try {
-      const response = await aiService.callAI(prompt, { maxTokens: 2500 });
+      const response = await aiService.callAI(prompt, { maxTokens: 32768 });
      const result = JSONParser.safeParseJSON(response.content, null);

      if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
@@ -275,21 +244,19 @@ class ToolSelector {

      console.log('[TOOL-SELECTOR] AI selected:', selectedMethods.length, 'methods,', selectedSoftware.length, 'software,', selectedConcepts.length, 'concepts');

-      const confidence = confidenceScoring.calculateSelectionConfidence(result, candidateTools.length + candidateConcepts.length);
-      
-      return { 
-        selectedTools, 
-        selectedConcepts, 
-        selectionMethod,
-        confidence
-      };
+      const confidence = confidenceScoring.calculateSelectionConfidence(
+        result,
+        candidateTools.length + candidateConcepts.length
+      );

+      return { selectedTools, selectedConcepts, confidence };
    } catch (error) {
      console.error('[TOOL-SELECTOR] AI selection failed:', error);
      throw error;
    }
  }

+
  async selectToolsForPhase(
    userQuery: string,
    phase: any,