fix tool mode ai pipiline logic

2025-08-29 12:27:15 +02:00
parent 4ee1cc4984
commit b14ca1d243
4 changed files with 253 additions and 147 deletions
--- a/.env.example
+++ b/.env.example
@@ -100,17 +100,11 @@ AI_SOFTWARE_SELECTION_RATIO=0.5  # 50% software tools (increase for more tool re

 # AI selection limits
 AI_MAX_SELECTED_ITEMS=25
-AI_MAX_TOOLS_TO_ANALYZE=20
-AI_MAX_CONCEPTS_TO_ANALYZE=10

 # Efficiency thresholds
 AI_EMBEDDINGS_MIN_TOOLS=8
 AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75

-# Fallback limits when embeddings are disabled
-AI_NO_EMBEDDINGS_TOOL_LIMIT=25
-AI_NO_EMBEDDINGS_CONCEPT_LIMIT=10
-
 # === Rate Limiting & Timing ===
 AI_MICRO_TASK_TOTAL_LIMIT=30
 AI_MICRO_TASK_DELAY_MS=500
--- a/src/config/prompts.ts
+++ b/src/config/prompts.ts
@@ -3,14 +3,16 @@
 export const AI_PROMPTS = {
  
  toolSelection: (mode: string, userQuery: string, maxSelectedItems: number) => {
-    const modeInstruction = mode === 'workflow' 
-      ? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
-      : 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
+    const modeInstruction =
+      mode === 'workflow'
+        ? 'Workflow mit 15–25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
+        : 'Spezifische Lösung mit 4–10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';

    return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.

 AUSWAHLMETHODE:
-  '✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe'}
+  ✓ Semantisch relevante Items bereits vorgefiltert
+  ✓ Wähle die BESTEN für die konkrete Aufgabe

 ${modeInstruction}

@@ -22,28 +24,27 @@ VERFÜGBARE ITEM-TYPEN:

 AUSWAHLSTRATEGIE:
 1. **ERSTE PRIORITÄT: Relevanz zur Anfrage**
-  - Direkt anwendbar auf das Problem
-  - Löst die Kernherausforderung
+   - Direkt anwendbar auf das Problem
+   - Löst die Kernherausforderung

 2. **ZWEITE PRIORITÄT: Ausgewogene Mischung**
-  - Tools/Methoden für praktische Umsetzung → selectedTools
-  - Konzepte für methodisches Verständnis → selectedConcepts
-  - WICHTIG: Auch Konzepte auswählen, nicht nur Tools!
+   - Tools/Methoden für praktische Umsetzung → selectedTools
+   - Konzepte für methodisches Verständnis → selectedConcepts
+   - WICHTIG: Auch Konzepte auswählen, nicht nur Tools!

 3. **QUALITÄT > QUANTITÄT**
-  - Lieber weniger perfekte Items als viele mittelmäßige
-  - Jedes Item muss begründbar sein
+   - Lieber weniger perfekte Items als viele mittelmäßige
+   - Jedes Item muss begründbar sein

-4. **TASK RELEVANCE REALISM**
-  - Gib realistische Bewertungen (50-85% typisch)
-  - Vermeide übertriebene 90-100% Scores
-  - Nur bei perfekter Übereinstimmung >85%
+4. **(Skalenharmonisierung – Info)**
+   - Spätere Schritte vergeben "taskRelevance" nach EINHEITLICHER Skala:
+     55–65 (Basis), 66–75 (gut), 76–85 (sehr gut), >85 nur bei perfekter Übereinstimmung.

 AUSWAHLREGELN:
- Wähle ${mode === 'workflow' ? '15-25' : '4-10'} Items total, max ${maxSelectedItems}
+- Wähle ${mode === 'workflow' ? '15–25' : '4–10'} Items total, max ${maxSelectedItems}
 - BEIDE Arrays füllen: selectedTools UND selectedConcepts
- Mindestens 1-2 Konzepte auswählen für methodische Fundierung
- Tools: 40% Methoden (type="method"), Rest Software (type="software")
+- Mindestens 1–2 Konzepte auswählen für methodische Fundierung
+- Tools: ca. 40% Methoden (type="method"), Rest Software (type="software"), wenn verfügbar

 ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
 {
@@ -56,26 +57,26 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
  toolSelectionWithData: (basePrompt: string, toolsToSend: any[], conceptsToSend: any[]) => {
    return `${basePrompt}

-VERFÜGBARE TOOLS (${toolsToSend.length} Items - Methoden und Software):
+VERFÜGBARE TOOLS (${toolsToSend.length} Items – Methoden und Software):
 ${JSON.stringify(toolsToSend, null, 2)}

-VERFÜGBARE KONZEPTE (${conceptsToSend.length} Items - theoretisches Wissen):
+VERFÜGBARE KONZEPTE (${conceptsToSend.length} Items – theoretisches Wissen):
 ${JSON.stringify(conceptsToSend, null, 2)}

 WICHTIGER HINWEIS: Wähle sowohl aus TOOLS als auch aus KONZEPTEN aus! Konzepte sind essentiell für methodische Fundierung.

-TASK RELEVANCE GUIDELINES:
- 50-65%: Grundlegend relevant, aber nicht optimal
- 66-75%: Gut geeignet für die Aufgabe
- 76-85%: Sehr gut geeignet, klare Vorteile
- 86-100%: NUR für perfekte Übereinstimmung verwenden`;
+(Hinweis zur späteren Relevanz-Skala – EINHEITLICH ÜBER ALLE MODI)
+- 55–65: Grundlegend relevant
+- 66–75: Gut geeignet
+- 76–85: Sehr gut geeignet
+- >85 : NUR bei nahezu perfekter Übereinstimmung`;
  },

  scenarioAnalysis: (isWorkflow: boolean, userQuery: string) => {
    const analysisType = isWorkflow ? 'Szenario' : 'Problem';
-    const focus = isWorkflow ? 
-      'Angriffsvektoren, betroffene Systeme, Zeitkritikalität' :
-      'Kernherausforderung, verfügbare Daten, methodische Anforderungen';
+    const focus = isWorkflow
+      ? 'Angriffsvektoren, betroffene Systeme, Zeitkritikalität'
+      : 'Kernherausforderung, verfügbare Daten, methodische Anforderungen';

    return `DFIR-Experte: Analysiere das ${analysisType}.

@@ -88,9 +89,9 @@ Antwort: Fließtext ohne Listen, max 100 Wörter.`;

  investigationApproach: (isWorkflow: boolean, userQuery: string) => {
    const approachType = isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz';
-    const focus = isWorkflow ?
-      'Triage-Prioritäten, Phasenabfolge, Kontaminationsvermeidung' :
-      'Methodenauswahl, Validierung, Integration';
+    const focus = isWorkflow
+      ? 'Triage-Prioritäten, Phasenabfolge, Kontaminationsvermeidung'
+      : 'Methodenauswahl, Validierung, Integration';

    return `Entwickle einen ${approachType}.

@@ -102,9 +103,9 @@ Antwort: Fließtext ohne Listen, max 100 Wörter.`;
  },

  criticalConsiderations: (isWorkflow: boolean, userQuery: string) => {
-    const focus = isWorkflow ? 
-      'Beweissicherung vs. Gründlichkeit, Chain of Custody' : 
-      'Tool-Validierung, False Positives/Negatives, Qualifikationen';
+    const focus = isWorkflow
+      ? 'Beweissicherung vs. Gründlichkeit, Chain of Custody'
+      : 'Tool-Validierung, False Positives/Negatives, Qualifikationen';

    return `Identifiziere kritische Überlegungen.

@@ -123,7 +124,7 @@ Antwort: Fließtext ohne Listen, max 100 Wörter.`;
      return `Keine Methoden/Tools für Phase "${phase.name}" verfügbar. Antworte mit leerem Array: []`;
    }

-    return `Du bist ein DFIR-Experte. Wähle die 2-3 BESTEN Items für Phase "${phase.name}".
+    return `Du bist ein DFIR-Experte. Wähle die 2–3 BESTEN Items für Phase "${phase.name}".

 SZENARIO: "${userQuery}"
 PHASE: ${phase.name} - ${phase.description || ''}
@@ -152,16 +153,16 @@ ${tools.map((tool: any) =>
 ` : 'Keine Software-Tools verfügbar'}

 AUSWAHLREGELN FÜR PHASE "${phase.name}":
-1. Wähle die 2-3 BESTEN Items für diese spezifische Phase
+1. Wähle die 2–3 BESTEN Items für diese spezifische Phase
 2. Priorisiere Items, die DIREKT für "${phase.name}" relevant sind
 3. Mindestens 1 Methode wenn verfügbar, Rest Software-Tools
 4. Begründe WARUM jedes Item für diese Phase optimal ist

-TASK RELEVANCE GUIDELINES:
- 60-70%: Grundlegend für diese Phase geeignet
- 71-80%: Gut geeignet, klare Phasenrelevanz
- 81-90%: Sehr gut geeignet, optimal für Phase
- 91-100%: NUR für perfekte Phasenübereinstimmung
+TASK RELEVANCE – EINHEITLICHE SKALA (GANZZAHL 0–100):
+- 55–65: Basis/ausreichend für diese Phase
+- 66–75: Gut geeignet, klare Phasenrelevanz
+- 76–85: Sehr gut geeignet, optimaler Fit
+- >85 : Nur bei nahezu perfekter Übereinstimmung

 WICHTIG: Verwende EXAKT die Namen wie oben aufgelistet (ohne Präfixe wie M1./T2.)!

@@ -169,36 +170,44 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB:
 [
  {
    "toolName": "Exakter Name aus der Liste oben",
-    "taskRelevance": 75,
-    "justification": "Detaillierte Begründung (60-80 Wörter) warum optimal für ${phase.name} - erkläre Anwendung, Vorteile und spezifische Relevanz",
+    "taskRelevance": 0,
+    "justification": "Detaillierte Begründung (60–80 Wörter) warum optimal für ${phase.name} – erkläre Anwendung, Vorteile und spezifische Relevanz",
    "limitations": ["Mögliche Einschränkung für diese Phase"]
  }
 ]`;
  },

-  toolEvaluation: (userQuery: string, tool: any, rank: number, taskRelevance: number) => {
+  toolEvaluation: (userQuery: string, tool: any, rank: number) => {
    const itemType = tool.type === 'method' ? 'Methode' : 'Tool';

-    return `Erkläre die Anwendung dieser/dieses ${itemType}.
+    return `Erkläre die Anwendung dieser/dieses ${itemType} für die Aufgabe.

 PROBLEM: "${userQuery}"
-${itemType.toUpperCase()}: ${tool.name} (${taskRelevance}% Eignung)
+${itemType.toUpperCase()}: ${tool.name}
 TYP: ${tool.type}

-Bereits als Rang ${rank} bewertet.
+ANWEISUNGEN:
+- Beurteile ausschließlich entlang der fachlichen Eignung zum PROBLEM.
+- Nutze nur die dir vorliegenden Texte/Metadaten (keine externen Annahmen).
+- Bestimme eine EIGENE ganzzahlige Bewertung "taskRelevance" von 0–100 (ohne %).
+- Verwende die EINHEITLICHE SKALA:
+  55–65 = Basis/ok · 66–75 = gut · 76–85 = sehr gut · >85 = nur bei nahezu perfekter Übereinstimmung.
+- Bevorzuge realistische Scores im Bereich 60–80 für gute Fits.
+
+Gib nur JSON im folgenden Schema zurück. Keine Texte davor oder danach.

-ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-STRUKTUR:
 {
  "detailed_explanation": "Warum und wie einsetzen",
  "implementation_approach": "Konkrete Schritte",
  "pros": ["Vorteil 1", "Vorteil 2"],
  "limitations": ["Einschränkung 1"],
-  "alternatives": "Alternative Ansätze"
+  "alternatives": "Alternative Ansätze",
+  "taskRelevance": 0
 }`;
  },

  backgroundKnowledgeSelection: (userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]) => {
-    return `Wähle 2-4 relevante Konzepte.
+    return `Wähle 2–4 relevante Konzepte.

 ${mode === 'workflow' ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
 AUSGEWÄHLTE TOOLS: ${selectedToolNames.join(', ')}
@@ -253,7 +262,7 @@ Antwort: Prägnanter Fließtext, knappe Begründung für Nachergänzung. Vermeid
    candidateTools: any[],
    candidateConcepts: any[]
  ): string {
-    return `Du bist ein DFIR-Experte. Die initiale KI-Auswahl war zu spezifisch - die Phase "${phase.name}" ist unterrepräsentiert.
+    return `Du bist ein DFIR-Experte. Die initiale KI-Auswahl war zu spezifisch – die Phase "${phase.name}" ist unterrepräsentiert.

 KONTEXT: Die Hauptauswahl hat zu wenige Tools für "${phase.name}" identifiziert. Wähle jetzt ergänzende Tools aus semantischer Nachsuche.

@@ -278,10 +287,10 @@ ${candidateConcepts.map((concept: any) => `
 ` : ''}

 AUSWAHLREGELN FÜR NACHERGÄNZUNG:
-1. Wähle 1-2 BESTE Methoden/Tools die die ${phase.name}-Phase optimal ergänzen
+1. Wähle 1–2 BESTE Methoden/Tools die die ${phase.name}-Phase optimal ergänzen
 2. Methoden/Tools müssen für die ursprüngliche Anfrage relevant sein
-3. Ergänzen, nicht ersetzen - erweitere die zu spezifische Erstauswahl
-4. Realistische Task Relevance (70-85% typisch für Nachergänzungen)
+3. Ergänzen, nicht ersetzen – erweitere die zu spezifische Erstauswahl
+4. Relevanz-Skala weiterhin EINHEITLICH: 55–65/66–75/76–85/>85 nur bei perfekter Übereinstimmung

 ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
 {
@@ -292,9 +301,9 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
  },

  finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => {
-    const focus = isWorkflow ? 
-      'Workflow-Schritte, Best Practices, Objektivität' :
-      'Methodische Überlegungen, Validierung, Qualitätssicherung';
+    const focus = isWorkflow
+      ? 'Workflow-Schritte, Best Practices, Objektivität'
+      : 'Methodische Überlegungen, Validierung, Qualitätssicherung';

    return `Erstelle ${isWorkflow ? 'Workflow-Empfehlung' : 'methodische Überlegungen'}.

@@ -313,11 +322,12 @@ export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuer
 export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;
 export function getPrompt(key: 'criticalConsiderations', isWorkflow: boolean, userQuery: string): string;
 export function getPrompt(key: 'phaseToolSelection', userQuery: string, phase: any, phaseTools: any[]): string;
-export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number, taskRelevance: number): string;
+export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number): string;
 export function getPrompt(key: 'backgroundKnowledgeSelection', userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]): string;
 export function getPrompt(key: 'phaseCompletionReasoning', originalQuery: string, phase: any, selectedToolName: string, tool: any, completionContext: string): string;
 export function getPrompt(key: 'finalRecommendations', isWorkflow: boolean, userQuery: string, selectedToolNames: string[]): string;
 export function getPrompt(key: 'generatePhaseCompletionPrompt', originalQuery: string, phase: any, candidateTools: any[], candidateConcepts: any[]): string;
+
 export function getPrompt(promptKey: keyof typeof AI_PROMPTS, ...args: any[]): string {
  try {
    const promptFunction = AI_PROMPTS[promptKey];
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -470,15 +470,42 @@ class AIPipeline {
    pipelineStart: number,
    toolsDataHash: string
  ): Promise<{ completed: number; failed: number }> {
-    const topTools = context.filteredData.tools.slice(0, 3);
+    // Evaluate ALL candidates handed over by the embeddings pre-filter.
+    const candidates = context.filteredData.tools || [];
+    if (!Array.isArray(candidates) || candidates.length === 0) {
+      return { completed: completedTasks, failed: failedTasks };
+    }

-    for (let i = 0; i < topTools.length; i++) {
-      const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1, pipelineStart, toolsDataHash);
+    // Evaluate every candidate (no slicing here)
+    for (let i = 0; i < candidates.length; i++) {
+      const evaluationResult = await this.evaluateSpecificTool(context, candidates[i], i + 1, pipelineStart, toolsDataHash);
      if (evaluationResult.success) completedTasks++; else failedTasks++;
      this.trackTokenUsage(evaluationResult.aiUsage);
      await this.delay(this.config.microTaskDelay);
    }

+    // At this point, context.selectedTools may contain 0..N evaluated items (added by evaluateSpecificTool).
+    // Now we sort them by AI-derived taskRelevance (after moderation) and keep ONLY the top 3 for UI.
+    if (Array.isArray(context.selectedTools) && context.selectedTools.length > 0) {
+      context.selectedTools.sort((a: any, b: any) => {
+        const ar = typeof a.taskRelevance === 'number' ? a.taskRelevance : -1;
+        const br = typeof b.taskRelevance === 'number' ? b.taskRelevance : -1;
+        if (br !== ar) return br - ar;
+
+        // tie-breakers without domain heuristics:
+        const aLen = (a.justification || '').length;
+        const bLen = (b.justification || '').length;
+        if (bLen !== aLen) return bLen - aLen;
+
+        const aRank = a.tool?.evaluation?.rank ?? Number.MAX_SAFE_INTEGER;
+        const bRank = b.tool?.evaluation?.rank ?? Number.MAX_SAFE_INTEGER;
+        return aRank - bRank;
+      });
+
+      // Keep top 3 only
+      context.selectedTools = context.selectedTools.slice(0, 3);
+    }
+
    return { completed: completedTasks, failed: failedTasks };
  }

@@ -849,68 +876,113 @@ class AIPipeline {
    toolsDataHash: string
  ): Promise<MicroTaskResult> {
    const taskStart = Date.now();
-    const existingSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name);
-    const originalTaskRelevance = existingSelection?.taskRelevance || 70;
-    const moderatedTaskRelevance = this.moderateTaskRelevance(originalTaskRelevance);
-    const priority = this.derivePriorityFromScore(moderatedTaskRelevance);

-    const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank, moderatedTaskRelevance);
+    // Build prompt WITHOUT any baseline score
+    const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank);
    const result = await this.callMicroTaskAI(prompt, context, 'tool-evaluation');

-    if (result.success) {
-      const evaluation = JSONParser.safeParseJSON(result.content, {
-        detailed_explanation: 'Evaluation failed',
-        implementation_approach: '',
-        pros: [],
-        limitations: [],
-        alternatives: ''
-      });
+    if (!result.success) {
+      return result;
+    }

-      this.addToolToSelection(context, {
-        ...tool,
-        evaluation: {
-          ...evaluation,
-          rank,
-          task_relevance: moderatedTaskRelevance
-        }
-      }, 'evaluation', priority, evaluation.detailed_explanation, moderatedTaskRelevance, evaluation.limitations);
+    // Parse strictly; do NOT provide a default with a score.
+    const evaluation = JSONParser.safeParseJSON(result.content, null);

-      const responseConfidence = auditService.calculateAIResponseConfidence(
-        result.content,
-        { min: 200, max: 800 },
-        'tool-evaluation'
-      );
-      
-      const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
+    // Require a numeric score produced by the model; otherwise, don't add this tool.
+    const aiProvided = evaluation && typeof evaluation.taskRelevance === 'number' && Number.isFinite(evaluation.taskRelevance)
+      ? Math.round(evaluation.taskRelevance)
+      : null;

+    if (aiProvided === null) {
+      // Log the malformed output but avoid injecting a synthetic score.
      auditService.addAIDecision(
        'tool-evaluation',
        prompt,
        result.content,
-        finalConfidence,
-        `Bewertete Tool "${tool.name}" (Rang ${rank}) - Analysierte Eignung für spezifische Aufgabenstellung mit Fokus auf praktische Anwendbarkeit und methodische Integration`,
+        0,
+        `Bewertung für "${tool.name}" ignoriert: fehlender/ungültiger taskRelevance`,
        taskStart,
        {
-          toolsDataHash: toolsDataHash,
+          toolsDataHash,
          microTaskType: 'tool-evaluation',
          toolName: tool.name,
          toolType: tool.type,
          rank,
-          originalTaskRelevance,
-          moderatedTaskRelevance,
-          responseConfidence,
-          finalConfidence,
-          moderationApplied: originalTaskRelevance !== moderatedTaskRelevance,
-          evaluationParsed: !!evaluation.detailed_explanation,
-          prosCount: evaluation.pros?.length || 0,
-          limitationsCount: evaluation.limitations?.length || 0,
+          evaluationParsed: false,
          decisionBasis: 'ai-analysis',
          aiModel: aiService.getConfig().model,
-          ...result.aiUsage
+          ...(result.aiUsage || {})
        }
      );
+      return result;
    }

+    const moderatedTaskRelevance = this.moderateTaskRelevance(aiProvided);
+    const priority = this.derivePriorityFromScore(moderatedTaskRelevance);
+
+    // Keep original fields if present; coerce to strings/arrays safely.
+    const detailed_explanation = String(evaluation?.detailed_explanation || '').trim();
+    const implementation_approach = String(evaluation?.implementation_approach || '').trim();
+    const pros = Array.isArray(evaluation?.pros) ? evaluation.pros : [];
+    const limitations = Array.isArray(evaluation?.limitations) ? evaluation.limitations : [];
+    const alternatives = String(evaluation?.alternatives || '').trim();
+
+    this.addToolToSelection(
+      context,
+      {
+        ...tool,
+        evaluation: {
+          detailed_explanation,
+          implementation_approach,
+          pros,
+          limitations,
+          alternatives,
+          rank,
+          task_relevance: moderatedTaskRelevance
+        }
+      },
+      'evaluation',
+      priority,
+      detailed_explanation,
+      moderatedTaskRelevance,
+      limitations
+    );
+
+    const responseConfidence = auditService.calculateAIResponseConfidence(
+      result.content,
+      { min: 200, max: 800 },
+      'tool-evaluation'
+    );
+
+    const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
+
+    auditService.addAIDecision(
+      'tool-evaluation',
+      prompt,
+      result.content,
+      finalConfidence,
+      `Bewertete Tool "${tool.name}" (Rang ${rank}) – AI-Score ${aiProvided}, moderiert ${moderatedTaskRelevance}`,
+      taskStart,
+      {
+        toolsDataHash,
+        microTaskType: 'tool-evaluation',
+        toolName: tool.name,
+        toolType: tool.type,
+        rank,
+        aiProvidedTaskRelevance: aiProvided,
+        moderatedTaskRelevance,
+        responseConfidence,
+        finalConfidence,
+        moderationApplied: aiProvided !== moderatedTaskRelevance,
+        evaluationParsed: true,
+        prosCount: pros.length,
+        limitationsCount: limitations.length,
+        decisionBasis: 'ai-analysis',
+        aiModel: aiService.getConfig().model,
+        ...(result.aiUsage || {})
+      }
+    );
+
    return result;
  }

--- a/src/utils/toolSelector.ts
+++ b/src/utils/toolSelector.ts
@@ -12,14 +12,13 @@ export interface ToolSelectionConfig {
  similarityThreshold: number;
  embeddingSelectionLimit: number;
  embeddingConceptsLimit: number;
-  noEmbeddingsToolLimit: number;
-  noEmbeddingsConceptLimit: number;
  embeddingsMinTools: number;
  embeddingsMaxReductionRatio: number;
  methodSelectionRatio: number;
  softwareSelectionRatio: number;
 }

+
 export interface SelectionContext {
  userQuery: string;
  mode: string;
@@ -51,14 +50,11 @@ class ToolSelector {
      similarityThreshold: this.getEnvFloat('AI_SIMILARITY_THRESHOLD', 0.3),
      embeddingSelectionLimit: this.getEnvInt('AI_EMBEDDING_SELECTION_LIMIT', 30),
      embeddingConceptsLimit: this.getEnvInt('AI_EMBEDDING_CONCEPTS_LIMIT', 15),
-      noEmbeddingsToolLimit: this.getEnvInt('AI_NO_EMBEDDINGS_TOOL_LIMIT', 25),
-      noEmbeddingsConceptLimit: this.getEnvInt('AI_NO_EMBEDDINGS_CONCEPT_LIMIT', 10),
      embeddingsMinTools: this.getEnvInt('AI_EMBEDDINGS_MIN_TOOLS', 8),
      embeddingsMaxReductionRatio: this.getEnvFloat('AI_EMBEDDINGS_MAX_REDUCTION_RATIO', 0.75),
      methodSelectionRatio: this.getEnvFloat('AI_METHOD_SELECTION_RATIO', 0.4),
-      softwareSelectionRatio: this.getEnvFloat('AI_SOFTWARE_SELECTION_RATIO', 0.5)
+      softwareSelectionRatio: this.getEnvFloat('AI_SOFTWARE_SELECTION_RATIO', 0.5),
    };
-
    console.log('[TOOL-SELECTOR] Initialized with config:', this.config);
  }

@@ -185,43 +181,73 @@ class ToolSelector {
  ): Promise<ToolSelectionResult> {
    console.log('[TOOL-SELECTOR] Performing AI selection');

-    const candidateMethods = candidateTools.filter((tool: any) => tool && tool.type === 'method');
-    const candidateSoftware = candidateTools.filter((tool: any) => tool && tool.type === 'software');
+    const candidateMethods = candidateTools.filter((t: any) => t && t.type === 'method');
+    const candidateSoftware = candidateTools.filter((t: any) => t && t.type === 'software');

-    console.log('[TOOL-SELECTOR] Candidates:', candidateMethods.length, 'methods,', candidateSoftware.length, 'software,', candidateConcepts.length, 'concepts');
+    console.log('[TOOL-SELECTOR] Candidates:',
+      candidateMethods.length, 'methods,',
+      candidateSoftware.length, 'software,',
+      candidateConcepts.length, 'concepts'
+    );

    const methodsWithFullData = candidateMethods.map(this.createToolData);
    const softwareWithFullData = candidateSoftware.map(this.createToolData);
    const conceptsWithFullData = candidateConcepts.map(this.createConceptData);

-    const maxTools = Math.min(this.config.embeddingSelectionLimit, this.config.noEmbeddingsToolLimit);
-    const maxConcepts = Math.min(this.config.embeddingConceptsLimit, this.config.noEmbeddingsConceptLimit);
-    const methodLimit = Math.ceil(maxTools * this.config.methodSelectionRatio);
-    const softwareLimit = Math.floor(maxTools * this.config.softwareSelectionRatio);
+    // Embeddings are always ON → only use embedding limits
+    const maxTools = Math.min(this.config.embeddingSelectionLimit, candidateTools.length);
+    const maxConcepts = Math.min(this.config.embeddingConceptsLimit, candidateConcepts.length);

-    const toolsToSend: any[] = [
-      ...methodsWithFullData.slice(0, methodLimit),
-      ...softwareWithFullData.slice(0, softwareLimit),
-    ];
+    // Respect ratios first, then fill the remaining capacity
+    const methodRatio = Math.max(0, Math.min(1, this.config.methodSelectionRatio));
+    const softwareRatio = Math.max(0, Math.min(1, this.config.softwareSelectionRatio));

-    const remainingCapacity = maxTools - toolsToSend.length;
-    if (remainingCapacity > 0) {
-      const extraMethods = methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity);
-      const extraSoftware = softwareWithFullData.slice(softwareLimit, softwareLimit + (remainingCapacity - extraMethods.length));
-      toolsToSend.push(...extraMethods, ...extraSoftware);
+    let methodLimit = Math.round(maxTools * methodRatio);
+    let softwareLimit = Math.round(maxTools * softwareRatio);
+
+    // If rounded sum exceeds maxTools, scale down proportionally
+    if (methodLimit + softwareLimit > maxTools) {
+      const scale = maxTools / (methodLimit + softwareLimit);
+      methodLimit = Math.floor(methodLimit * scale);
+      softwareLimit = Math.floor(softwareLimit * scale);
+    }
+
+    const methodsPrimary = methodsWithFullData.slice(0, methodLimit);
+    const softwarePrimary = softwareWithFullData.slice(0, softwareLimit);
+
+    const toolsToSend: any[] = [...methodsPrimary, ...softwarePrimary];
+
+    // Fill any remaining capacity from whichever pool still has candidates
+    let mIdx = methodsPrimary.length;
+    let sIdx = softwarePrimary.length;
+
+    while (toolsToSend.length < maxTools && (mIdx < methodsWithFullData.length || sIdx < softwareWithFullData.length)) {
+      const remM = methodsWithFullData.length - mIdx;
+      const remS = softwareWithFullData.length - sIdx;
+
+      if (remS >= remM && sIdx < softwareWithFullData.length) {
+        toolsToSend.push(softwareWithFullData[sIdx++]);
+      } else if (mIdx < methodsWithFullData.length) {
+        toolsToSend.push(methodsWithFullData[mIdx++]);
+      } else if (sIdx < softwareWithFullData.length) {
+        toolsToSend.push(softwareWithFullData[sIdx++]);
+      } else {
+        break;
+      }
    }

    const conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);

-    const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
-    const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
-
+    console.log('[TOOL-SELECTOR-DEBUG] maxTools:', maxTools, 'maxConcepts:', maxConcepts);
    console.log('[TOOL-SELECTOR] Sending to AI:',
      toolsToSend.filter((t: any) => t.type === 'method').length, 'methods,',
      toolsToSend.filter((t: any) => t.type === 'software').length, 'software,',
      conceptsToSend.length, 'concepts'
    );

+    const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
+    const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
+
    try {
      const response = await aiService.callAI(prompt);
      const result = JSONParser.safeParseJSON(response.content, null);
@@ -250,7 +276,11 @@ class ToolSelector {
      const selectedMethods = selectedTools.filter((t: any) => t && t.type === 'method');
      const selectedSoftware = selectedTools.filter((t: any) => t && t.type === 'software');

-      console.log('[TOOL-SELECTOR] AI selected:', selectedMethods.length, 'methods,', selectedSoftware.length, 'software,', selectedConcepts.length, 'concepts');
+      console.log('[TOOL-SELECTOR] AI selected:',
+        selectedMethods.length, 'methods,',
+        selectedSoftware.length, 'software,',
+        selectedConcepts.length, 'concepts'
+      );

      const confidence = confidenceScoring.calculateSelectionConfidence(
        result,