fine-tuning

2025-08-05 21:47:16 +02:00
parent fe1be323bb
commit 769c223d39
3 changed files with 15 additions and 19 deletions
--- a/src/components/AIQueryInterface.astro
+++ b/src/components/AIQueryInterface.astro
@@ -741,7 +741,7 @@ class AIQueryInterface {

  displayToolResults(recommendation, originalQuery) {
    const html = `
-      <div class="tool-results-container">
+      <div class="workflow-container">
        ${this.renderHeader('Handlungsempfehlung', originalQuery)}
        ${this.renderContextualAnalysis(recommendation, 'tool')}
        ${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
--- a/src/config/prompts.ts
+++ b/src/config/prompts.ts
@@ -169,11 +169,14 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
  "detailed_explanation": "Detaillierte Erklärung warum und wie dieses Tool für diese spezifische Aufgabe eingesetzt wird",
  "implementation_approach": "Konkrete Schritt-für-Schritt Anleitung zur korrekten Anwendung",
  "pros": ["Spezifischer Vorteil 1", "Spezifischer Vorteil 2"],
-  "cons": ["Bekannte Limitation 1", "Bekannte Limitation 2"],
+  "limitations": ["Spezifische Einschränkung 1", "Spezifische Einschränkung 2"],
  "alternatives": "Alternative Ansätze oder Tools falls dieses nicht verfügbar ist"
 }

-WICHTIG: Keine erneute Bewertung - nur detaillierte Erklärung der bereits bewerteten Eignung.`;
+WICHTIG: 
+- Keine erneute Bewertung - nur detaillierte Erklärung der bereits bewerteten Eignung
+- "limitations" soll spezifische technische/methodische Einschränkungen des Tools auflisten
+- "pros" soll die Stärken für diese spezifische Aufgabe hervorheben`;
  },

  // Background knowledge selection prompt
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -776,11 +776,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
      factors.push('Einsteiger-Tool für komplexe Analyse - könnte funktionale Limitierungen haben');
    }
    
-    // Platform availability concerns
-    if (tool.platforms && tool.platforms.length === 1 && tool.platforms[0] === 'Windows' && /linux|unix|server/i.test(context.userQuery)) {
-      factors.push('Nur Windows-Tool bei möglicher Linux/Server-Umgebung - Plattform-Inkompatibilität');
-    }
-    
    // Access and deployment concerns
    if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') {
      factors.push('Installation und Setup erforderlich');
@@ -939,7 +934,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
  }

  private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
-    // Get existing task relevance from previous phase selection
    const existingSelection = context.selectedTools?.find(st => st.tool.name === tool.name);
    const taskRelevance = existingSelection?.taskRelevance || 70;
    const priority = this.derivePriorityFromScore(taskRelevance);
@@ -953,11 +947,10 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
        detailed_explanation: 'Evaluation failed',
        implementation_approach: '',
        pros: [],
-        cons: [],
+        limitations: [], 
        alternatives: ''
      });
      
-      // Store evaluation without re-scoring
      this.addToolToSelection(context, {
        ...tool,
        evaluation: {
@@ -966,7 +959,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
          task_relevance: taskRelevance
        }
      }, 'evaluation', priority, evaluation.detailed_explanation, 
-      taskRelevance, existingSelection?.limitations);
+      taskRelevance, evaluation.limitations); 
      
      this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
        { toolName: tool.name, rank, existingTaskRelevance: taskRelevance, derivedPriority: priority },
@@ -974,11 +967,12 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
          hasExplanation: !!evaluation.detailed_explanation,
          hasImplementationApproach: !!evaluation.implementation_approach,
          prosCount: evaluation.pros?.length || 0,
-          consCount: evaluation.cons?.length || 0
+          limitationsCount: evaluation.limitations?.length || 0, // ← Updated field name
+          hasLimitations: Array.isArray(evaluation.limitations) && evaluation.limitations.length > 0
        },
        70,
        Date.now() - result.processingTimeMs,
-        { toolType: tool.type, explanationOnly: true, priorityDerived: true }
+        { toolType: tool.type, explanationOnly: true, priorityDerived: true, limitationsExtracted: true }
      );
    }
    
@@ -1226,7 +1220,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;

    if (isWorkflow) {
      const recommendedToolsWithConfidence = context.selectedTools?.map(st => {
-        // Calculate enhanced confidence for each tool
        const confidence = this.calculateRecommendationConfidence(
          st.tool,
          context,
@@ -1234,7 +1227,6 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
          st.limitations || []
        );
        
-        // Add audit entry for confidence calculation
        this.addAuditEntry(context, 'validation', 'confidence-scoring',
          { toolName: st.tool.name, phase: st.phase },
          { 
@@ -1279,7 +1271,8 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
          { toolName: st.tool.name, rank: st.tool.evaluation?.rank || 1 },
          { 
            overall: confidence.overall,
-            suitabilityAlignment: st.priority === 'high' && confidence.overall >= this.confidenceConfig.highThreshold
+            suitabilityAlignment: st.priority === 'high' && confidence.overall >= this.confidenceConfig.highThreshold,
+            limitationsUsed: st.limitations?.length || 0
          },
          confidence.overall,
          Date.now(),
@@ -1293,7 +1286,7 @@ ${JSON.stringify(conceptsToSend, null, 2)}`;
          detailed_explanation: st.tool.evaluation?.detailed_explanation || '',
          implementation_approach: st.tool.evaluation?.implementation_approach || '',
          pros: st.tool.evaluation?.pros || [],
-          cons: st.tool.evaluation?.cons || [],
+          cons: st.tool.evaluation?.limitations || [], // ← FIXED: Use limitations as cons for display
          alternatives: st.tool.evaluation?.alternatives || '',
          confidence: confidence,
          recommendationStrength: confidence.overall >= this.confidenceConfig.highThreshold ? 'strong' :