fix tool mode ai pipiline logic

2025-08-29 12:27:15 +02:00
parent 4ee1cc4984
commit b14ca1d243
4 changed files with 253 additions and 147 deletions
--- a/.env.example
+++ b/.env.example
@@ -100,17 +100,11 @@ AI_SOFTWARE_SELECTION_RATIO=0.5  # 50% software tools (increase for more tool re
 # AI selection limits
 AI_MAX_SELECTED_ITEMS=25
 AI_MAX_TOOLS_TO_ANALYZE=20
 AI_MAX_CONCEPTS_TO_ANALYZE=10
 # Efficiency thresholds
 AI_EMBEDDINGS_MIN_TOOLS=8
 AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75
 # Fallback limits when embeddings are disabled
 AI_NO_EMBEDDINGS_TOOL_LIMIT=25
 AI_NO_EMBEDDINGS_CONCEPT_LIMIT=10
 # === Rate Limiting & Timing ===
 AI_MICRO_TASK_TOTAL_LIMIT=30
 AI_MICRO_TASK_DELAY_MS=500
--- a/src/config/prompts.ts
+++ b/src/config/prompts.ts
@@ -3,14 +3,16 @@
 export const AI_PROMPTS = {
  toolSelection: (mode: string, userQuery: string, maxSelectedItems: number) => {
-    const modeInstruction = mode === 'workflow' 
+    const modeInstruction =
-      ? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
+      mode === 'workflow'
-      : 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
+        ? 'Workflow mit 15–25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
        : 'Spezifische Lösung mit 4–10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
    return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.
 AUSWAHLMETHODE:
-  '✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe'}
+  ✓ Semantisch relevante Items bereits vorgefiltert
  ✓ Wähle die BESTEN für die konkrete Aufgabe
 ${modeInstruction}
@@ -22,28 +24,27 @@ VERFÜGBARE ITEM-TYPEN:
 AUSWAHLSTRATEGIE:
 1. **ERSTE PRIORITÄT: Relevanz zur Anfrage**
-  - Direkt anwendbar auf das Problem
+   - Direkt anwendbar auf das Problem
-  - Löst die Kernherausforderung
+   - Löst die Kernherausforderung
 2. **ZWEITE PRIORITÄT: Ausgewogene Mischung**
-  - Tools/Methoden für praktische Umsetzung → selectedTools
+   - Tools/Methoden für praktische Umsetzung → selectedTools
-  - Konzepte für methodisches Verständnis → selectedConcepts
+   - Konzepte für methodisches Verständnis → selectedConcepts
-  - WICHTIG: Auch Konzepte auswählen, nicht nur Tools!
+   - WICHTIG: Auch Konzepte auswählen, nicht nur Tools!
 3. **QUALITÄT > QUANTITÄT**
-  - Lieber weniger perfekte Items als viele mittelmäßige
+   - Lieber weniger perfekte Items als viele mittelmäßige
-  - Jedes Item muss begründbar sein
+   - Jedes Item muss begründbar sein
-4. **TASK RELEVANCE REALISM**
+4. **(Skalenharmonisierung – Info)**
-  - Gib realistische Bewertungen (50-85% typisch)
+   - Spätere Schritte vergeben "taskRelevance" nach EINHEITLICHER Skala:
-  - Vermeide übertriebene 90-100% Scores
+     55–65 (Basis), 66–75 (gut), 76–85 (sehr gut), >85 nur bei perfekter Übereinstimmung.
  - Nur bei perfekter Übereinstimmung >85%
 AUSWAHLREGELN:
- Wähle ${mode === 'workflow' ? '15-25' : '4-10'} Items total, max ${maxSelectedItems}
+- Wähle ${mode === 'workflow' ? '15–25' : '4–10'} Items total, max ${maxSelectedItems}
 - BEIDE Arrays füllen: selectedTools UND selectedConcepts
- Mindestens 1-2 Konzepte auswählen für methodische Fundierung
+- Mindestens 1–2 Konzepte auswählen für methodische Fundierung
- Tools: 40% Methoden (type="method"), Rest Software (type="software")
+- Tools: ca. 40% Methoden (type="method"), Rest Software (type="software"), wenn verfügbar
 ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
 {
@@ -56,26 +57,26 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
  toolSelectionWithData: (basePrompt: string, toolsToSend: any[], conceptsToSend: any[]) => {
    return `${basePrompt}
-VERFÜGBARE TOOLS (${toolsToSend.length} Items - Methoden und Software):
+VERFÜGBARE TOOLS (${toolsToSend.length} Items – Methoden und Software):
 ${JSON.stringify(toolsToSend, null, 2)}
-VERFÜGBARE KONZEPTE (${conceptsToSend.length} Items - theoretisches Wissen):
+VERFÜGBARE KONZEPTE (${conceptsToSend.length} Items – theoretisches Wissen):
 ${JSON.stringify(conceptsToSend, null, 2)}
 WICHTIGER HINWEIS: Wähle sowohl aus TOOLS als auch aus KONZEPTEN aus! Konzepte sind essentiell für methodische Fundierung.
-TASK RELEVANCE GUIDELINES:
+(Hinweis zur späteren Relevanz-Skala – EINHEITLICH ÜBER ALLE MODI)
- 50-65%: Grundlegend relevant, aber nicht optimal
+- 55–65: Grundlegend relevant
- 66-75%: Gut geeignet für die Aufgabe
+- 66–75: Gut geeignet
- 76-85%: Sehr gut geeignet, klare Vorteile
+- 76–85: Sehr gut geeignet
- 86-100%: NUR für perfekte Übereinstimmung verwenden`;
+- >85 : NUR bei nahezu perfekter Übereinstimmung`;
  },
  scenarioAnalysis: (isWorkflow: boolean, userQuery: string) => {
    const analysisType = isWorkflow ? 'Szenario' : 'Problem';
-    const focus = isWorkflow ? 
+    const focus = isWorkflow
-      'Angriffsvektoren, betroffene Systeme, Zeitkritikalität' :
+      ? 'Angriffsvektoren, betroffene Systeme, Zeitkritikalität'
-      'Kernherausforderung, verfügbare Daten, methodische Anforderungen';
+      : 'Kernherausforderung, verfügbare Daten, methodische Anforderungen';
    return `DFIR-Experte: Analysiere das ${analysisType}.
@@ -88,9 +89,9 @@ Antwort: Fließtext ohne Listen, max 100 Wörter.`;
  investigationApproach: (isWorkflow: boolean, userQuery: string) => {
    const approachType = isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz';
-    const focus = isWorkflow ?
+    const focus = isWorkflow
-      'Triage-Prioritäten, Phasenabfolge, Kontaminationsvermeidung' :
+      ? 'Triage-Prioritäten, Phasenabfolge, Kontaminationsvermeidung'
-      'Methodenauswahl, Validierung, Integration';
+      : 'Methodenauswahl, Validierung, Integration';
    return `Entwickle einen ${approachType}.
@@ -102,9 +103,9 @@ Antwort: Fließtext ohne Listen, max 100 Wörter.`;
  },
  criticalConsiderations: (isWorkflow: boolean, userQuery: string) => {
-    const focus = isWorkflow ? 
+    const focus = isWorkflow
-      'Beweissicherung vs. Gründlichkeit, Chain of Custody' : 
+      ? 'Beweissicherung vs. Gründlichkeit, Chain of Custody'
-      'Tool-Validierung, False Positives/Negatives, Qualifikationen';
+      : 'Tool-Validierung, False Positives/Negatives, Qualifikationen';
    return `Identifiziere kritische Überlegungen.
@@ -123,7 +124,7 @@ Antwort: Fließtext ohne Listen, max 100 Wörter.`;
      return `Keine Methoden/Tools für Phase "${phase.name}" verfügbar. Antworte mit leerem Array: []`;
    }
-    return `Du bist ein DFIR-Experte. Wähle die 2-3 BESTEN Items für Phase "${phase.name}".
+    return `Du bist ein DFIR-Experte. Wähle die 2–3 BESTEN Items für Phase "${phase.name}".
 SZENARIO: "${userQuery}"
 PHASE: ${phase.name} - ${phase.description || ''}
@@ -152,16 +153,16 @@ ${tools.map((tool: any) =>
 ` : 'Keine Software-Tools verfügbar'}
 AUSWAHLREGELN FÜR PHASE "${phase.name}":
-1. Wähle die 2-3 BESTEN Items für diese spezifische Phase
+1. Wähle die 2–3 BESTEN Items für diese spezifische Phase
 2. Priorisiere Items, die DIREKT für "${phase.name}" relevant sind
 3. Mindestens 1 Methode wenn verfügbar, Rest Software-Tools
 4. Begründe WARUM jedes Item für diese Phase optimal ist
-TASK RELEVANCE GUIDELINES:
+TASK RELEVANCE – EINHEITLICHE SKALA (GANZZAHL 0–100):
- 60-70%: Grundlegend für diese Phase geeignet
+- 55–65: Basis/ausreichend für diese Phase
- 71-80%: Gut geeignet, klare Phasenrelevanz
+- 66–75: Gut geeignet, klare Phasenrelevanz
- 81-90%: Sehr gut geeignet, optimal für Phase
+- 76–85: Sehr gut geeignet, optimaler Fit
- 91-100%: NUR für perfekte Phasenübereinstimmung
+- >85 : Nur bei nahezu perfekter Übereinstimmung
 WICHTIG: Verwende EXAKT die Namen wie oben aufgelistet (ohne Präfixe wie M1./T2.)!
@@ -169,36 +170,44 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB:
 [
  {
    "toolName": "Exakter Name aus der Liste oben",
-    "taskRelevance": 75,
+    "taskRelevance": 0,
-    "justification": "Detaillierte Begründung (60-80 Wörter) warum optimal für ${phase.name} - erkläre Anwendung, Vorteile und spezifische Relevanz",
+    "justification": "Detaillierte Begründung (60–80 Wörter) warum optimal für ${phase.name} – erkläre Anwendung, Vorteile und spezifische Relevanz",
    "limitations": ["Mögliche Einschränkung für diese Phase"]
  }
 ]`;
  },
-  toolEvaluation: (userQuery: string, tool: any, rank: number, taskRelevance: number) => {
+  toolEvaluation: (userQuery: string, tool: any, rank: number) => {
    const itemType = tool.type === 'method' ? 'Methode' : 'Tool';
-    return `Erkläre die Anwendung dieser/dieses ${itemType}.
+    return `Erkläre die Anwendung dieser/dieses ${itemType} für die Aufgabe.
 PROBLEM: "${userQuery}"
-${itemType.toUpperCase()}: ${tool.name} (${taskRelevance}% Eignung)
+${itemType.toUpperCase()}: ${tool.name}
 TYP: ${tool.type}
-Bereits als Rang ${rank} bewertet.
+ANWEISUNGEN:
 - Beurteile ausschließlich entlang der fachlichen Eignung zum PROBLEM.
 - Nutze nur die dir vorliegenden Texte/Metadaten (keine externen Annahmen).
 - Bestimme eine EIGENE ganzzahlige Bewertung "taskRelevance" von 0–100 (ohne %).
 - Verwende die EINHEITLICHE SKALA:
  55–65 = Basis/ok · 66–75 = gut · 76–85 = sehr gut · >85 = nur bei nahezu perfekter Übereinstimmung.
 - Bevorzuge realistische Scores im Bereich 60–80 für gute Fits.
 Gib nur JSON im folgenden Schema zurück. Keine Texte davor oder danach.
 ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-STRUKTUR:
 {
  "detailed_explanation": "Warum und wie einsetzen",
  "implementation_approach": "Konkrete Schritte",
  "pros": ["Vorteil 1", "Vorteil 2"],
  "limitations": ["Einschränkung 1"],
-  "alternatives": "Alternative Ansätze"
+  "alternatives": "Alternative Ansätze",
  "taskRelevance": 0
 }`;
  },
  backgroundKnowledgeSelection: (userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]) => {
-    return `Wähle 2-4 relevante Konzepte.
+    return `Wähle 2–4 relevante Konzepte.
 ${mode === 'workflow' ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
 AUSGEWÄHLTE TOOLS: ${selectedToolNames.join(', ')}
@@ -253,7 +262,7 @@ Antwort: Prägnanter Fließtext, knappe Begründung für Nachergänzung. Vermeid
    candidateTools: any[],
    candidateConcepts: any[]
  ): string {
-    return `Du bist ein DFIR-Experte. Die initiale KI-Auswahl war zu spezifisch - die Phase "${phase.name}" ist unterrepräsentiert.
+    return `Du bist ein DFIR-Experte. Die initiale KI-Auswahl war zu spezifisch – die Phase "${phase.name}" ist unterrepräsentiert.
 KONTEXT: Die Hauptauswahl hat zu wenige Tools für "${phase.name}" identifiziert. Wähle jetzt ergänzende Tools aus semantischer Nachsuche.
@@ -278,10 +287,10 @@ ${candidateConcepts.map((concept: any) => `
 ` : ''}
 AUSWAHLREGELN FÜR NACHERGÄNZUNG:
-1. Wähle 1-2 BESTE Methoden/Tools die die ${phase.name}-Phase optimal ergänzen
+1. Wähle 1–2 BESTE Methoden/Tools die die ${phase.name}-Phase optimal ergänzen
 2. Methoden/Tools müssen für die ursprüngliche Anfrage relevant sein
-3. Ergänzen, nicht ersetzen - erweitere die zu spezifische Erstauswahl
+3. Ergänzen, nicht ersetzen – erweitere die zu spezifische Erstauswahl
-4. Realistische Task Relevance (70-85% typisch für Nachergänzungen)
+4. Relevanz-Skala weiterhin EINHEITLICH: 55–65/66–75/76–85/>85 nur bei perfekter Übereinstimmung
 ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
 {
@@ -292,9 +301,9 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
  },
  finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => {
-    const focus = isWorkflow ? 
+    const focus = isWorkflow
-      'Workflow-Schritte, Best Practices, Objektivität' :
+      ? 'Workflow-Schritte, Best Practices, Objektivität'
-      'Methodische Überlegungen, Validierung, Qualitätssicherung';
+      : 'Methodische Überlegungen, Validierung, Qualitätssicherung';
    return `Erstelle ${isWorkflow ? 'Workflow-Empfehlung' : 'methodische Überlegungen'}.
@@ -313,11 +322,12 @@ export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuer
 export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;
 export function getPrompt(key: 'criticalConsiderations', isWorkflow: boolean, userQuery: string): string;
 export function getPrompt(key: 'phaseToolSelection', userQuery: string, phase: any, phaseTools: any[]): string;
-export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number, taskRelevance: number): string;
+export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number): string;
 export function getPrompt(key: 'backgroundKnowledgeSelection', userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]): string;
 export function getPrompt(key: 'phaseCompletionReasoning', originalQuery: string, phase: any, selectedToolName: string, tool: any, completionContext: string): string;
 export function getPrompt(key: 'finalRecommendations', isWorkflow: boolean, userQuery: string, selectedToolNames: string[]): string;
 export function getPrompt(key: 'generatePhaseCompletionPrompt', originalQuery: string, phase: any, candidateTools: any[], candidateConcepts: any[]): string;
 export function getPrompt(promptKey: keyof typeof AI_PROMPTS, ...args: any[]): string {
  try {
    const promptFunction = AI_PROMPTS[promptKey];
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -470,15 +470,42 @@ class AIPipeline {
    pipelineStart: number,
    toolsDataHash: string
  ): Promise<{ completed: number; failed: number }> {
-    const topTools = context.filteredData.tools.slice(0, 3);
+    // Evaluate ALL candidates handed over by the embeddings pre-filter.
    const candidates = context.filteredData.tools || [];
    if (!Array.isArray(candidates) || candidates.length === 0) {
      return { completed: completedTasks, failed: failedTasks };
    }
-    for (let i = 0; i < topTools.length; i++) {
+    // Evaluate every candidate (no slicing here)
-      const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1, pipelineStart, toolsDataHash);
+    for (let i = 0; i < candidates.length; i++) {
      const evaluationResult = await this.evaluateSpecificTool(context, candidates[i], i + 1, pipelineStart, toolsDataHash);
      if (evaluationResult.success) completedTasks++; else failedTasks++;
      this.trackTokenUsage(evaluationResult.aiUsage);
      await this.delay(this.config.microTaskDelay);
    }
    // At this point, context.selectedTools may contain 0..N evaluated items (added by evaluateSpecificTool).
    // Now we sort them by AI-derived taskRelevance (after moderation) and keep ONLY the top 3 for UI.
    if (Array.isArray(context.selectedTools) && context.selectedTools.length > 0) {
      context.selectedTools.sort((a: any, b: any) => {
        const ar = typeof a.taskRelevance === 'number' ? a.taskRelevance : -1;
        const br = typeof b.taskRelevance === 'number' ? b.taskRelevance : -1;
        if (br !== ar) return br - ar;
        // tie-breakers without domain heuristics:
        const aLen = (a.justification || '').length;
        const bLen = (b.justification || '').length;
        if (bLen !== aLen) return bLen - aLen;
        const aRank = a.tool?.evaluation?.rank ?? Number.MAX_SAFE_INTEGER;
        const bRank = b.tool?.evaluation?.rank ?? Number.MAX_SAFE_INTEGER;
        return aRank - bRank;
      });
      // Keep top 3 only
      context.selectedTools = context.selectedTools.slice(0, 3);
    }
    return { completed: completedTasks, failed: failedTasks };
  }
@@ -849,68 +876,113 @@ class AIPipeline {
    toolsDataHash: string
  ): Promise<MicroTaskResult> {
    const taskStart = Date.now();
    const existingSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name);
    const originalTaskRelevance = existingSelection?.taskRelevance || 70;
    const moderatedTaskRelevance = this.moderateTaskRelevance(originalTaskRelevance);
    const priority = this.derivePriorityFromScore(moderatedTaskRelevance);
-    const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank, moderatedTaskRelevance);
+    // Build prompt WITHOUT any baseline score
    const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank);
    const result = await this.callMicroTaskAI(prompt, context, 'tool-evaluation');
-    if (result.success) {
+    if (!result.success) {
-      const evaluation = JSONParser.safeParseJSON(result.content, {
+      return result;
-        detailed_explanation: 'Evaluation failed',
+    }
        implementation_approach: '',
        pros: [],
        limitations: [],
        alternatives: ''
      });
-      this.addToolToSelection(context, {
+    // Parse strictly; do NOT provide a default with a score.
-        ...tool,
+    const evaluation = JSONParser.safeParseJSON(result.content, null);
        evaluation: {
          ...evaluation,
          rank,
          task_relevance: moderatedTaskRelevance
        }
      }, 'evaluation', priority, evaluation.detailed_explanation, moderatedTaskRelevance, evaluation.limitations);
-      const responseConfidence = auditService.calculateAIResponseConfidence(
+    // Require a numeric score produced by the model; otherwise, don't add this tool.
-        result.content,
+    const aiProvided = evaluation && typeof evaluation.taskRelevance === 'number' && Number.isFinite(evaluation.taskRelevance)
-        { min: 200, max: 800 },
+      ? Math.round(evaluation.taskRelevance)
-        'tool-evaluation'
+      : null;
      );
      const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
    if (aiProvided === null) {
      // Log the malformed output but avoid injecting a synthetic score.
      auditService.addAIDecision(
        'tool-evaluation',
        prompt,
        result.content,
-        finalConfidence,
+        0,
-        `Bewertete Tool "${tool.name}" (Rang ${rank}) - Analysierte Eignung für spezifische Aufgabenstellung mit Fokus auf praktische Anwendbarkeit und methodische Integration`,
+        `Bewertung für "${tool.name}" ignoriert: fehlender/ungültiger taskRelevance`,
        taskStart,
        {
-          toolsDataHash: toolsDataHash,
+          toolsDataHash,
          microTaskType: 'tool-evaluation',
          toolName: tool.name,
          toolType: tool.type,
          rank,
-          originalTaskRelevance,
+          evaluationParsed: false,
          moderatedTaskRelevance,
          responseConfidence,
          finalConfidence,
          moderationApplied: originalTaskRelevance !== moderatedTaskRelevance,
          evaluationParsed: !!evaluation.detailed_explanation,
          prosCount: evaluation.pros?.length || 0,
          limitationsCount: evaluation.limitations?.length || 0,
          decisionBasis: 'ai-analysis',
          aiModel: aiService.getConfig().model,
-          ...result.aiUsage
+          ...(result.aiUsage || {})
        }
      );
      return result;
    }
    const moderatedTaskRelevance = this.moderateTaskRelevance(aiProvided);
    const priority = this.derivePriorityFromScore(moderatedTaskRelevance);
    // Keep original fields if present; coerce to strings/arrays safely.
    const detailed_explanation = String(evaluation?.detailed_explanation || '').trim();
    const implementation_approach = String(evaluation?.implementation_approach || '').trim();
    const pros = Array.isArray(evaluation?.pros) ? evaluation.pros : [];
    const limitations = Array.isArray(evaluation?.limitations) ? evaluation.limitations : [];
    const alternatives = String(evaluation?.alternatives || '').trim();
    this.addToolToSelection(
      context,
      {
        ...tool,
        evaluation: {
          detailed_explanation,
          implementation_approach,
          pros,
          limitations,
          alternatives,
          rank,
          task_relevance: moderatedTaskRelevance
        }
      },
      'evaluation',
      priority,
      detailed_explanation,
      moderatedTaskRelevance,
      limitations
    );
    const responseConfidence = auditService.calculateAIResponseConfidence(
      result.content,
      { min: 200, max: 800 },
      'tool-evaluation'
    );
    const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
    auditService.addAIDecision(
      'tool-evaluation',
      prompt,
      result.content,
      finalConfidence,
      `Bewertete Tool "${tool.name}" (Rang ${rank}) – AI-Score ${aiProvided}, moderiert ${moderatedTaskRelevance}`,
      taskStart,
      {
        toolsDataHash,
        microTaskType: 'tool-evaluation',
        toolName: tool.name,
        toolType: tool.type,
        rank,
        aiProvidedTaskRelevance: aiProvided,
        moderatedTaskRelevance,
        responseConfidence,
        finalConfidence,
        moderationApplied: aiProvided !== moderatedTaskRelevance,
        evaluationParsed: true,
        prosCount: pros.length,
        limitationsCount: limitations.length,
        decisionBasis: 'ai-analysis',
        aiModel: aiService.getConfig().model,
        ...(result.aiUsage || {})
      }
    );
    return result;
  }
--- a/src/utils/toolSelector.ts
+++ b/src/utils/toolSelector.ts
@@ -12,14 +12,13 @@ export interface ToolSelectionConfig {
  similarityThreshold: number;
  embeddingSelectionLimit: number;
  embeddingConceptsLimit: number;
  noEmbeddingsToolLimit: number;
  noEmbeddingsConceptLimit: number;
  embeddingsMinTools: number;
  embeddingsMaxReductionRatio: number;
  methodSelectionRatio: number;
  softwareSelectionRatio: number;
 }
 export interface SelectionContext {
  userQuery: string;
  mode: string;
@@ -51,14 +50,11 @@ class ToolSelector {
      similarityThreshold: this.getEnvFloat('AI_SIMILARITY_THRESHOLD', 0.3),
      embeddingSelectionLimit: this.getEnvInt('AI_EMBEDDING_SELECTION_LIMIT', 30),
      embeddingConceptsLimit: this.getEnvInt('AI_EMBEDDING_CONCEPTS_LIMIT', 15),
      noEmbeddingsToolLimit: this.getEnvInt('AI_NO_EMBEDDINGS_TOOL_LIMIT', 25),
      noEmbeddingsConceptLimit: this.getEnvInt('AI_NO_EMBEDDINGS_CONCEPT_LIMIT', 10),
      embeddingsMinTools: this.getEnvInt('AI_EMBEDDINGS_MIN_TOOLS', 8),
      embeddingsMaxReductionRatio: this.getEnvFloat('AI_EMBEDDINGS_MAX_REDUCTION_RATIO', 0.75),
      methodSelectionRatio: this.getEnvFloat('AI_METHOD_SELECTION_RATIO', 0.4),
-      softwareSelectionRatio: this.getEnvFloat('AI_SOFTWARE_SELECTION_RATIO', 0.5)
+      softwareSelectionRatio: this.getEnvFloat('AI_SOFTWARE_SELECTION_RATIO', 0.5),
    };
    console.log('[TOOL-SELECTOR] Initialized with config:', this.config);
  }
@@ -185,43 +181,73 @@ class ToolSelector {
  ): Promise<ToolSelectionResult> {
    console.log('[TOOL-SELECTOR] Performing AI selection');
-    const candidateMethods = candidateTools.filter((tool: any) => tool && tool.type === 'method');
+    const candidateMethods = candidateTools.filter((t: any) => t && t.type === 'method');
-    const candidateSoftware = candidateTools.filter((tool: any) => tool && tool.type === 'software');
+    const candidateSoftware = candidateTools.filter((t: any) => t && t.type === 'software');
-    console.log('[TOOL-SELECTOR] Candidates:', candidateMethods.length, 'methods,', candidateSoftware.length, 'software,', candidateConcepts.length, 'concepts');
+    console.log('[TOOL-SELECTOR] Candidates:',
      candidateMethods.length, 'methods,',
      candidateSoftware.length, 'software,',
      candidateConcepts.length, 'concepts'
    );
    const methodsWithFullData = candidateMethods.map(this.createToolData);
    const softwareWithFullData = candidateSoftware.map(this.createToolData);
    const conceptsWithFullData = candidateConcepts.map(this.createConceptData);
-    const maxTools = Math.min(this.config.embeddingSelectionLimit, this.config.noEmbeddingsToolLimit);
+    // Embeddings are always ON → only use embedding limits
-    const maxConcepts = Math.min(this.config.embeddingConceptsLimit, this.config.noEmbeddingsConceptLimit);
+    const maxTools = Math.min(this.config.embeddingSelectionLimit, candidateTools.length);
-    const methodLimit = Math.ceil(maxTools * this.config.methodSelectionRatio);
+    const maxConcepts = Math.min(this.config.embeddingConceptsLimit, candidateConcepts.length);
    const softwareLimit = Math.floor(maxTools * this.config.softwareSelectionRatio);
-    const toolsToSend: any[] = [
+    // Respect ratios first, then fill the remaining capacity
-      ...methodsWithFullData.slice(0, methodLimit),
+    const methodRatio = Math.max(0, Math.min(1, this.config.methodSelectionRatio));
-      ...softwareWithFullData.slice(0, softwareLimit),
+    const softwareRatio = Math.max(0, Math.min(1, this.config.softwareSelectionRatio));
    ];
-    const remainingCapacity = maxTools - toolsToSend.length;
+    let methodLimit = Math.round(maxTools * methodRatio);
-    if (remainingCapacity > 0) {
+    let softwareLimit = Math.round(maxTools * softwareRatio);
-      const extraMethods = methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity);
+
-      const extraSoftware = softwareWithFullData.slice(softwareLimit, softwareLimit + (remainingCapacity - extraMethods.length));
+    // If rounded sum exceeds maxTools, scale down proportionally
-      toolsToSend.push(...extraMethods, ...extraSoftware);
+    if (methodLimit + softwareLimit > maxTools) {
      const scale = maxTools / (methodLimit + softwareLimit);
      methodLimit = Math.floor(methodLimit * scale);
      softwareLimit = Math.floor(softwareLimit * scale);
    }
    const methodsPrimary = methodsWithFullData.slice(0, methodLimit);
    const softwarePrimary = softwareWithFullData.slice(0, softwareLimit);
    const toolsToSend: any[] = [...methodsPrimary, ...softwarePrimary];
    // Fill any remaining capacity from whichever pool still has candidates
    let mIdx = methodsPrimary.length;
    let sIdx = softwarePrimary.length;
    while (toolsToSend.length < maxTools && (mIdx < methodsWithFullData.length || sIdx < softwareWithFullData.length)) {
      const remM = methodsWithFullData.length - mIdx;
      const remS = softwareWithFullData.length - sIdx;
      if (remS >= remM && sIdx < softwareWithFullData.length) {
        toolsToSend.push(softwareWithFullData[sIdx++]);
      } else if (mIdx < methodsWithFullData.length) {
        toolsToSend.push(methodsWithFullData[mIdx++]);
      } else if (sIdx < softwareWithFullData.length) {
        toolsToSend.push(softwareWithFullData[sIdx++]);
      } else {
        break;
      }
    }
    const conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
-    const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
+    console.log('[TOOL-SELECTOR-DEBUG] maxTools:', maxTools, 'maxConcepts:', maxConcepts);
    const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
    console.log('[TOOL-SELECTOR] Sending to AI:',
      toolsToSend.filter((t: any) => t.type === 'method').length, 'methods,',
      toolsToSend.filter((t: any) => t.type === 'software').length, 'software,',
      conceptsToSend.length, 'concepts'
    );
    const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
    const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
    try {
      const response = await aiService.callAI(prompt);
      const result = JSONParser.safeParseJSON(response.content, null);
@@ -250,7 +276,11 @@ class ToolSelector {
      const selectedMethods = selectedTools.filter((t: any) => t && t.type === 'method');
      const selectedSoftware = selectedTools.filter((t: any) => t && t.type === 'software');
-      console.log('[TOOL-SELECTOR] AI selected:', selectedMethods.length, 'methods,', selectedSoftware.length, 'software,', selectedConcepts.length, 'concepts');
+      console.log('[TOOL-SELECTOR] AI selected:',
        selectedMethods.length, 'methods,',
        selectedSoftware.length, 'software,',
        selectedConcepts.length, 'concepts'
      );
      const confidence = confidenceScoring.calculateSelectionConfidence(
        result,