fix tool mode ai pipiline logic

This commit is contained in:
overcuriousity
2025-08-29 12:27:15 +02:00
parent 4ee1cc4984
commit b14ca1d243
4 changed files with 253 additions and 147 deletions

View File

@@ -470,15 +470,42 @@ class AIPipeline {
pipelineStart: number,
toolsDataHash: string
): Promise<{ completed: number; failed: number }> {
const topTools = context.filteredData.tools.slice(0, 3);
for (let i = 0; i < topTools.length; i++) {
const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1, pipelineStart, toolsDataHash);
// Evaluate ALL candidates handed over by the embeddings pre-filter.
const candidates = context.filteredData.tools || [];
if (!Array.isArray(candidates) || candidates.length === 0) {
return { completed: completedTasks, failed: failedTasks };
}
// Evaluate every candidate (no slicing here)
for (let i = 0; i < candidates.length; i++) {
const evaluationResult = await this.evaluateSpecificTool(context, candidates[i], i + 1, pipelineStart, toolsDataHash);
if (evaluationResult.success) completedTasks++; else failedTasks++;
this.trackTokenUsage(evaluationResult.aiUsage);
await this.delay(this.config.microTaskDelay);
}
// At this point, context.selectedTools may contain 0..N evaluated items (added by evaluateSpecificTool).
// Now we sort them by AI-derived taskRelevance (after moderation) and keep ONLY the top 3 for UI.
if (Array.isArray(context.selectedTools) && context.selectedTools.length > 0) {
context.selectedTools.sort((a: any, b: any) => {
const ar = typeof a.taskRelevance === 'number' ? a.taskRelevance : -1;
const br = typeof b.taskRelevance === 'number' ? b.taskRelevance : -1;
if (br !== ar) return br - ar;
// tie-breakers without domain heuristics:
const aLen = (a.justification || '').length;
const bLen = (b.justification || '').length;
if (bLen !== aLen) return bLen - aLen;
const aRank = a.tool?.evaluation?.rank ?? Number.MAX_SAFE_INTEGER;
const bRank = b.tool?.evaluation?.rank ?? Number.MAX_SAFE_INTEGER;
return aRank - bRank;
});
// Keep top 3 only
context.selectedTools = context.selectedTools.slice(0, 3);
}
return { completed: completedTasks, failed: failedTasks };
}
@@ -849,68 +876,113 @@ class AIPipeline {
toolsDataHash: string
): Promise<MicroTaskResult> {
const taskStart = Date.now();
const existingSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name);
const originalTaskRelevance = existingSelection?.taskRelevance || 70;
const moderatedTaskRelevance = this.moderateTaskRelevance(originalTaskRelevance);
const priority = this.derivePriorityFromScore(moderatedTaskRelevance);
const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank, moderatedTaskRelevance);
// Build prompt WITHOUT any baseline score
const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank);
const result = await this.callMicroTaskAI(prompt, context, 'tool-evaluation');
if (result.success) {
const evaluation = JSONParser.safeParseJSON(result.content, {
detailed_explanation: 'Evaluation failed',
implementation_approach: '',
pros: [],
limitations: [],
alternatives: ''
});
this.addToolToSelection(context, {
...tool,
evaluation: {
...evaluation,
rank,
task_relevance: moderatedTaskRelevance
}
}, 'evaluation', priority, evaluation.detailed_explanation, moderatedTaskRelevance, evaluation.limitations);
const responseConfidence = auditService.calculateAIResponseConfidence(
result.content,
{ min: 200, max: 800 },
'tool-evaluation'
);
const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
if (!result.success) {
return result;
}
// Parse strictly; do NOT provide a default with a score.
const evaluation = JSONParser.safeParseJSON(result.content, null);
// Require a numeric score produced by the model; otherwise, don't add this tool.
const aiProvided = evaluation && typeof evaluation.taskRelevance === 'number' && Number.isFinite(evaluation.taskRelevance)
? Math.round(evaluation.taskRelevance)
: null;
if (aiProvided === null) {
// Log the malformed output but avoid injecting a synthetic score.
auditService.addAIDecision(
'tool-evaluation',
prompt,
result.content,
finalConfidence,
`Bewertete Tool "${tool.name}" (Rang ${rank}) - Analysierte Eignung für spezifische Aufgabenstellung mit Fokus auf praktische Anwendbarkeit und methodische Integration`,
0,
`Bewertung für "${tool.name}" ignoriert: fehlender/ungültiger taskRelevance`,
taskStart,
{
toolsDataHash: toolsDataHash,
toolsDataHash,
microTaskType: 'tool-evaluation',
toolName: tool.name,
toolType: tool.type,
rank,
originalTaskRelevance,
moderatedTaskRelevance,
responseConfidence,
finalConfidence,
moderationApplied: originalTaskRelevance !== moderatedTaskRelevance,
evaluationParsed: !!evaluation.detailed_explanation,
prosCount: evaluation.pros?.length || 0,
limitationsCount: evaluation.limitations?.length || 0,
evaluationParsed: false,
decisionBasis: 'ai-analysis',
aiModel: aiService.getConfig().model,
...result.aiUsage
...(result.aiUsage || {})
}
);
return result;
}
const moderatedTaskRelevance = this.moderateTaskRelevance(aiProvided);
const priority = this.derivePriorityFromScore(moderatedTaskRelevance);
// Keep original fields if present; coerce to strings/arrays safely.
const detailed_explanation = String(evaluation?.detailed_explanation || '').trim();
const implementation_approach = String(evaluation?.implementation_approach || '').trim();
const pros = Array.isArray(evaluation?.pros) ? evaluation.pros : [];
const limitations = Array.isArray(evaluation?.limitations) ? evaluation.limitations : [];
const alternatives = String(evaluation?.alternatives || '').trim();
this.addToolToSelection(
context,
{
...tool,
evaluation: {
detailed_explanation,
implementation_approach,
pros,
limitations,
alternatives,
rank,
task_relevance: moderatedTaskRelevance
}
},
'evaluation',
priority,
detailed_explanation,
moderatedTaskRelevance,
limitations
);
const responseConfidence = auditService.calculateAIResponseConfidence(
result.content,
{ min: 200, max: 800 },
'tool-evaluation'
);
const finalConfidence = Math.max(responseConfidence, moderatedTaskRelevance);
auditService.addAIDecision(
'tool-evaluation',
prompt,
result.content,
finalConfidence,
`Bewertete Tool "${tool.name}" (Rang ${rank}) AI-Score ${aiProvided}, moderiert ${moderatedTaskRelevance}`,
taskStart,
{
toolsDataHash,
microTaskType: 'tool-evaluation',
toolName: tool.name,
toolType: tool.type,
rank,
aiProvidedTaskRelevance: aiProvided,
moderatedTaskRelevance,
responseConfidence,
finalConfidence,
moderationApplied: aiProvided !== moderatedTaskRelevance,
evaluationParsed: true,
prosCount: pros.length,
limitationsCount: limitations.length,
decisionBasis: 'ai-analysis',
aiModel: aiService.getConfig().model,
...(result.aiUsage || {})
}
);
return result;
}