fix tool mode ai pipiline logic

2025-08-29 12:27:15 +02:00
parent 4ee1cc4984
commit b14ca1d243
4 changed files with 253 additions and 147 deletions
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -470,15 +470,42 @@ class AIPipeline {
    pipelineStart: number,
    toolsDataHash: string
  ): Promise<{ completed: number; failed: number }> {
-    const topTools = context.filteredData.tools.slice(0, 3);
-    
-    for (let i = 0; i < topTools.length; i++) {
-      const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1, pipelineStart, toolsDataHash);
+    // Evaluate ALL candidates handed over by the embeddings pre-filter.
+    const candidates = context.filteredData.tools || [];
+    if (!Array.isArray(candidates) || candidates.length === 0) {
+      return { completed: completedTasks, failed: failedTasks };
+    }
+
+    // Evaluate every candidate (no slicing here)
+    for (let i = 0; i < candidates.length; i++) {
+      const evaluationResult = await this.evaluateSpecificTool(context, candidates[i], i + 1, pipelineStart, toolsDataHash);
      if (evaluationResult.success) completedTasks++; else failedTasks++;
      this.trackTokenUsage(evaluationResult.aiUsage);
      await this.delay(this.config.microTaskDelay);
    }
-    
+
+    // At this point, context.selectedTools may contain 0..N evaluated items (added by evaluateSpecificTool).
+    // Now we sort them by AI-derived taskRelevance (after moderation) and keep ONLY the top 3 for UI.
+    if (Array.isArray(context.selectedTools) && context.selectedTools.length > 0) {
+      context.selectedTools.sort((a: any, b: any) => {
+        const ar = typeof a.taskRelevance === 'number' ? a.taskRelevance : -1;
+        const br = typeof b.taskRelevance === 'number' ? b.taskRelevance : -1;
+        if (br !== ar) return br - ar;
+
+        // tie-breakers without domain heuristics:
+        const aLen = (a.justification || '').length;
+        const bLen = (b.justification || '').length;
+        if (bLen !== aLen) return bLen - aLen;
+
+        const aRank = a.tool?.evaluation?.rank ?? Number.MAX_SAFE_INTEGER;
+        const bRank = b.tool?.evaluation?.rank ?? Number.MAX_SAFE_INTEGER;
+        return aRank - bRank;
+      });
+
+      // Keep top 3 only
+      context.selectedTools = context.selectedTools.slice(0, 3);
+    }
+
    return { completed: completedTasks, failed: failedTasks };
  }

@@ -849,68 +876,113 @@ class AIPipeline {
    toolsDataHash: string
  ): Promise<MicroTaskResult> {
    const taskStart = Date.now();
-    const existingSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name);
-    const originalTaskRelevance = existingSelection?.taskRelevance || 70;
-    const moderatedTaskRelevance = this.moderateTaskRelevance(originalTaskRelevance);
-    const priority = this.derivePriorityFromScore(moderatedTaskRelevance);
-    
-    const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank, moderatedTaskRelevance);
+
+    // Build prompt WITHOUT any baseline score
+    const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank);
    const result = await this.callMicroTaskAI(prompt, context, 'tool-evaluation');
-    
-    if (result.success) {
-      const evaluation = JSONParser.safeParseJSON(result.content, {
-        detailed_explanation: 'Evaluation failed',
-        implementation_approach: '',
-        pros: [],
-        limitations: [],
-        alternatives: ''
-      });
-      
-      this.addToolToSelection(context, {
-        ...tool,
-        evaluation: {
-          ...evaluation,
-          rank,
-          task_relevance: moderatedTaskRelevance
-        }
-      }, 'evaluation', priority, evaluation.detailed_explanation, moderatedTaskRelevance, evaluation.limitations);
-      
-      const responseConfidence = auditService.calculateAIResponseConfidence(
-        result.content,
-        { min: 200, max: 800 },
-        'tool-evaluation'
-      );
-      
-      const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
-      
+
+    if (!result.success) {
+      return result;
+    }
+
+    // Parse strictly; do NOT provide a default with a score.
+    const evaluation = JSONParser.safeParseJSON(result.content, null);
+
+    // Require a numeric score produced by the model; otherwise, don't add this tool.
+    const aiProvided = evaluation && typeof evaluation.taskRelevance === 'number' && Number.isFinite(evaluation.taskRelevance)
+      ? Math.round(evaluation.taskRelevance)
+      : null;
+
+    if (aiProvided === null) {
+      // Log the malformed output but avoid injecting a synthetic score.
      auditService.addAIDecision(
        'tool-evaluation',
        prompt,
        result.content,
-        finalConfidence,
-        `Bewertete Tool "${tool.name}" (Rang ${rank}) - Analysierte Eignung für spezifische Aufgabenstellung mit Fokus auf praktische Anwendbarkeit und methodische Integration`,
+        0,
+        `Bewertung für "${tool.name}" ignoriert: fehlender/ungültiger taskRelevance`,
        taskStart,
        {
-          toolsDataHash: toolsDataHash,
+          toolsDataHash,
          microTaskType: 'tool-evaluation',
          toolName: tool.name,
          toolType: tool.type,
          rank,
-          originalTaskRelevance,
-          moderatedTaskRelevance,
-          responseConfidence,
-          finalConfidence,
-          moderationApplied: originalTaskRelevance !== moderatedTaskRelevance,
-          evaluationParsed: !!evaluation.detailed_explanation,
-          prosCount: evaluation.pros?.length || 0,
-          limitationsCount: evaluation.limitations?.length || 0,
+          evaluationParsed: false,
          decisionBasis: 'ai-analysis',
          aiModel: aiService.getConfig().model,
-          ...result.aiUsage
+          ...(result.aiUsage || {})
        }
      );
+      return result;
    }
-    
+
+    const moderatedTaskRelevance = this.moderateTaskRelevance(aiProvided);
+    const priority = this.derivePriorityFromScore(moderatedTaskRelevance);
+
+    // Keep original fields if present; coerce to strings/arrays safely.
+    const detailed_explanation = String(evaluation?.detailed_explanation || '').trim();
+    const implementation_approach = String(evaluation?.implementation_approach || '').trim();
+    const pros = Array.isArray(evaluation?.pros) ? evaluation.pros : [];
+    const limitations = Array.isArray(evaluation?.limitations) ? evaluation.limitations : [];
+    const alternatives = String(evaluation?.alternatives || '').trim();
+
+    this.addToolToSelection(
+      context,
+      {
+        ...tool,
+        evaluation: {
+          detailed_explanation,
+          implementation_approach,
+          pros,
+          limitations,
+          alternatives,
+          rank,
+          task_relevance: moderatedTaskRelevance
+        }
+      },
+      'evaluation',
+      priority,
+      detailed_explanation,
+      moderatedTaskRelevance,
+      limitations
+    );
+
+    const responseConfidence = auditService.calculateAIResponseConfidence(
+      result.content,
+      { min: 200, max: 800 },
+      'tool-evaluation'
+    );
+
+    const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
+
+    auditService.addAIDecision(
+      'tool-evaluation',
+      prompt,
+      result.content,
+      finalConfidence,
+      `Bewertete Tool "${tool.name}" (Rang ${rank}) – AI-Score ${aiProvided}, moderiert ${moderatedTaskRelevance}`,
+      taskStart,
+      {
+        toolsDataHash,
+        microTaskType: 'tool-evaluation',
+        toolName: tool.name,
+        toolType: tool.type,
+        rank,
+        aiProvidedTaskRelevance: aiProvided,
+        moderatedTaskRelevance,
+        responseConfidence,
+        finalConfidence,
+        moderationApplied: aiProvided !== moderatedTaskRelevance,
+        evaluationParsed: true,
+        prosCount: pros.length,
+        limitationsCount: limitations.length,
+        decisionBasis: 'ai-analysis',
+        aiModel: aiService.getConfig().model,
+        ...(result.aiUsage || {})
+      }
+    );
+
    return result;
  }