fix tool mode ai pipiline logic
This commit is contained in:
@@ -470,15 +470,42 @@ class AIPipeline {
|
||||
pipelineStart: number,
|
||||
toolsDataHash: string
|
||||
): Promise<{ completed: number; failed: number }> {
|
||||
const topTools = context.filteredData.tools.slice(0, 3);
|
||||
|
||||
for (let i = 0; i < topTools.length; i++) {
|
||||
const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1, pipelineStart, toolsDataHash);
|
||||
// Evaluate ALL candidates handed over by the embeddings pre-filter.
|
||||
const candidates = context.filteredData.tools || [];
|
||||
if (!Array.isArray(candidates) || candidates.length === 0) {
|
||||
return { completed: completedTasks, failed: failedTasks };
|
||||
}
|
||||
|
||||
// Evaluate every candidate (no slicing here)
|
||||
for (let i = 0; i < candidates.length; i++) {
|
||||
const evaluationResult = await this.evaluateSpecificTool(context, candidates[i], i + 1, pipelineStart, toolsDataHash);
|
||||
if (evaluationResult.success) completedTasks++; else failedTasks++;
|
||||
this.trackTokenUsage(evaluationResult.aiUsage);
|
||||
await this.delay(this.config.microTaskDelay);
|
||||
}
|
||||
|
||||
|
||||
// At this point, context.selectedTools may contain 0..N evaluated items (added by evaluateSpecificTool).
|
||||
// Now we sort them by AI-derived taskRelevance (after moderation) and keep ONLY the top 3 for UI.
|
||||
if (Array.isArray(context.selectedTools) && context.selectedTools.length > 0) {
|
||||
context.selectedTools.sort((a: any, b: any) => {
|
||||
const ar = typeof a.taskRelevance === 'number' ? a.taskRelevance : -1;
|
||||
const br = typeof b.taskRelevance === 'number' ? b.taskRelevance : -1;
|
||||
if (br !== ar) return br - ar;
|
||||
|
||||
// tie-breakers without domain heuristics:
|
||||
const aLen = (a.justification || '').length;
|
||||
const bLen = (b.justification || '').length;
|
||||
if (bLen !== aLen) return bLen - aLen;
|
||||
|
||||
const aRank = a.tool?.evaluation?.rank ?? Number.MAX_SAFE_INTEGER;
|
||||
const bRank = b.tool?.evaluation?.rank ?? Number.MAX_SAFE_INTEGER;
|
||||
return aRank - bRank;
|
||||
});
|
||||
|
||||
// Keep top 3 only
|
||||
context.selectedTools = context.selectedTools.slice(0, 3);
|
||||
}
|
||||
|
||||
return { completed: completedTasks, failed: failedTasks };
|
||||
}
|
||||
|
||||
@@ -849,68 +876,113 @@ class AIPipeline {
|
||||
toolsDataHash: string
|
||||
): Promise<MicroTaskResult> {
|
||||
const taskStart = Date.now();
|
||||
const existingSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name);
|
||||
const originalTaskRelevance = existingSelection?.taskRelevance || 70;
|
||||
const moderatedTaskRelevance = this.moderateTaskRelevance(originalTaskRelevance);
|
||||
const priority = this.derivePriorityFromScore(moderatedTaskRelevance);
|
||||
|
||||
const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank, moderatedTaskRelevance);
|
||||
|
||||
// Build prompt WITHOUT any baseline score
|
||||
const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank);
|
||||
const result = await this.callMicroTaskAI(prompt, context, 'tool-evaluation');
|
||||
|
||||
if (result.success) {
|
||||
const evaluation = JSONParser.safeParseJSON(result.content, {
|
||||
detailed_explanation: 'Evaluation failed',
|
||||
implementation_approach: '',
|
||||
pros: [],
|
||||
limitations: [],
|
||||
alternatives: ''
|
||||
});
|
||||
|
||||
this.addToolToSelection(context, {
|
||||
...tool,
|
||||
evaluation: {
|
||||
...evaluation,
|
||||
rank,
|
||||
task_relevance: moderatedTaskRelevance
|
||||
}
|
||||
}, 'evaluation', priority, evaluation.detailed_explanation, moderatedTaskRelevance, evaluation.limitations);
|
||||
|
||||
const responseConfidence = auditService.calculateAIResponseConfidence(
|
||||
result.content,
|
||||
{ min: 200, max: 800 },
|
||||
'tool-evaluation'
|
||||
);
|
||||
|
||||
const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
|
||||
|
||||
|
||||
if (!result.success) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Parse strictly; do NOT provide a default with a score.
|
||||
const evaluation = JSONParser.safeParseJSON(result.content, null);
|
||||
|
||||
// Require a numeric score produced by the model; otherwise, don't add this tool.
|
||||
const aiProvided = evaluation && typeof evaluation.taskRelevance === 'number' && Number.isFinite(evaluation.taskRelevance)
|
||||
? Math.round(evaluation.taskRelevance)
|
||||
: null;
|
||||
|
||||
if (aiProvided === null) {
|
||||
// Log the malformed output but avoid injecting a synthetic score.
|
||||
auditService.addAIDecision(
|
||||
'tool-evaluation',
|
||||
prompt,
|
||||
result.content,
|
||||
finalConfidence,
|
||||
`Bewertete Tool "${tool.name}" (Rang ${rank}) - Analysierte Eignung für spezifische Aufgabenstellung mit Fokus auf praktische Anwendbarkeit und methodische Integration`,
|
||||
0,
|
||||
`Bewertung für "${tool.name}" ignoriert: fehlender/ungültiger taskRelevance`,
|
||||
taskStart,
|
||||
{
|
||||
toolsDataHash: toolsDataHash,
|
||||
toolsDataHash,
|
||||
microTaskType: 'tool-evaluation',
|
||||
toolName: tool.name,
|
||||
toolType: tool.type,
|
||||
rank,
|
||||
originalTaskRelevance,
|
||||
moderatedTaskRelevance,
|
||||
responseConfidence,
|
||||
finalConfidence,
|
||||
moderationApplied: originalTaskRelevance !== moderatedTaskRelevance,
|
||||
evaluationParsed: !!evaluation.detailed_explanation,
|
||||
prosCount: evaluation.pros?.length || 0,
|
||||
limitationsCount: evaluation.limitations?.length || 0,
|
||||
evaluationParsed: false,
|
||||
decisionBasis: 'ai-analysis',
|
||||
aiModel: aiService.getConfig().model,
|
||||
...result.aiUsage
|
||||
...(result.aiUsage || {})
|
||||
}
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
const moderatedTaskRelevance = this.moderateTaskRelevance(aiProvided);
|
||||
const priority = this.derivePriorityFromScore(moderatedTaskRelevance);
|
||||
|
||||
// Keep original fields if present; coerce to strings/arrays safely.
|
||||
const detailed_explanation = String(evaluation?.detailed_explanation || '').trim();
|
||||
const implementation_approach = String(evaluation?.implementation_approach || '').trim();
|
||||
const pros = Array.isArray(evaluation?.pros) ? evaluation.pros : [];
|
||||
const limitations = Array.isArray(evaluation?.limitations) ? evaluation.limitations : [];
|
||||
const alternatives = String(evaluation?.alternatives || '').trim();
|
||||
|
||||
this.addToolToSelection(
|
||||
context,
|
||||
{
|
||||
...tool,
|
||||
evaluation: {
|
||||
detailed_explanation,
|
||||
implementation_approach,
|
||||
pros,
|
||||
limitations,
|
||||
alternatives,
|
||||
rank,
|
||||
task_relevance: moderatedTaskRelevance
|
||||
}
|
||||
},
|
||||
'evaluation',
|
||||
priority,
|
||||
detailed_explanation,
|
||||
moderatedTaskRelevance,
|
||||
limitations
|
||||
);
|
||||
|
||||
const responseConfidence = auditService.calculateAIResponseConfidence(
|
||||
result.content,
|
||||
{ min: 200, max: 800 },
|
||||
'tool-evaluation'
|
||||
);
|
||||
|
||||
const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
|
||||
|
||||
auditService.addAIDecision(
|
||||
'tool-evaluation',
|
||||
prompt,
|
||||
result.content,
|
||||
finalConfidence,
|
||||
`Bewertete Tool "${tool.name}" (Rang ${rank}) – AI-Score ${aiProvided}, moderiert ${moderatedTaskRelevance}`,
|
||||
taskStart,
|
||||
{
|
||||
toolsDataHash,
|
||||
microTaskType: 'tool-evaluation',
|
||||
toolName: tool.name,
|
||||
toolType: tool.type,
|
||||
rank,
|
||||
aiProvidedTaskRelevance: aiProvided,
|
||||
moderatedTaskRelevance,
|
||||
responseConfidence,
|
||||
finalConfidence,
|
||||
moderationApplied: aiProvided !== moderatedTaskRelevance,
|
||||
evaluationParsed: true,
|
||||
prosCount: pros.length,
|
||||
limitationsCount: limitations.length,
|
||||
decisionBasis: 'ai-analysis',
|
||||
aiModel: aiService.getConfig().model,
|
||||
...(result.aiUsage || {})
|
||||
}
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user