bugfixing in embeddings api

2025-08-04 15:11:30 +02:00
parent 6c73a20dff
commit ec1969b2e2
4 changed files with 400 additions and 138 deletions
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -66,6 +66,11 @@ interface AnalysisContext {
  auditTrail: AuditEntry[];
 }

+interface SimilarityResult extends EmbeddingData {
+  similarity: number;
+}
+
+
 class ImprovedMicroTaskAIPipeline {
  private config: AIConfig;
  private maxSelectedItems: number;
@@ -267,39 +272,62 @@ class ImprovedMicroTaskAIPipeline {
        userQuery, 
        this.embeddingCandidates, 
        this.similarityThreshold
-      );
+      ) as SimilarityResult[]; // Type assertion for similarity property
      
-      const toolNames = new Set<string>();
-      const conceptNames = new Set<string>();
+      console.log(`[IMPROVED PIPELINE] Embeddings found ${similarItems.length} similar items`);
      
-      similarItems.forEach(item => {
-        if (item.type === 'tool') toolNames.add(item.name);
-        if (item.type === 'concept') conceptNames.add(item.name);
-      });
+      // FIXED: Create lookup maps for O(1) access while preserving original data
+      const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
+      const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
      
-      console.log(`[IMPROVED PIPELINE] Embeddings found: ${toolNames.size} tools, ${conceptNames.size} concepts`);
+      // FIXED: Process in similarity order, preserving the ranking
+      const similarTools = similarItems
+        .filter((item): item is SimilarityResult => item.type === 'tool')
+        .map(item => toolsMap.get(item.name))
+        .filter((tool): tool is any => tool !== undefined); // Proper type guard
      
-      if (toolNames.size >= 15) {
-        candidateTools = toolsData.tools.filter((tool: any) => toolNames.has(tool.name));
-        candidateConcepts = toolsData.concepts.filter((concept: any) => conceptNames.has(concept.name));
+      const similarConcepts = similarItems
+        .filter((item): item is SimilarityResult => item.type === 'concept')
+        .map(item => conceptsMap.get(item.name))
+        .filter((concept): concept is any => concept !== undefined); // Proper type guard
+      
+      console.log(`[IMPROVED PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
+      
+      // Log the first few tools to verify ordering is preserved
+      if (similarTools.length > 0) {
+        console.log(`[IMPROVED PIPELINE] Top similar tools (in similarity order):`);
+        similarTools.slice(0, 5).forEach((tool, idx) => {
+          const originalSimilarItem = similarItems.find(item => item.name === tool.name);
+          console.log(`  ${idx + 1}. ${tool.name} (similarity: ${originalSimilarItem?.similarity?.toFixed(4) || 'N/A'})`);
+        });
+      }
+      
+      if (similarTools.length >= 15) {
+        candidateTools = similarTools;
+        candidateConcepts = similarConcepts;
        selectionMethod = 'embeddings_candidates';
        
-        console.log(`[IMPROVED PIPELINE] Using embeddings candidates: ${candidateTools.length} tools`);
+        console.log(`[IMPROVED PIPELINE] Using embeddings candidates in similarity order: ${candidateTools.length} tools`);
      } else {
-        console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${toolNames.size} < 15), using full dataset`);
+        console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${similarTools.length} < 15), using full dataset`);
        candidateTools = toolsData.tools;
        candidateConcepts = toolsData.concepts;
        selectionMethod = 'full_dataset';
      }
      
-      // NEW: Add Audit Entry for Embeddings Search
+      // NEW: Add Audit Entry for Embeddings Search with ordering verification
      if (this.auditConfig.enabled) {
        this.addAuditEntry(null, 'retrieval', 'embeddings-search', 
          { query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates }, 
-          { candidatesFound: similarItems.length, toolNames: Array.from(toolNames), conceptNames: Array.from(conceptNames) },
-          similarItems.length >= 15 ? 85 : 60, // Confidence based on result quality
+          { 
+            candidatesFound: similarItems.length, 
+            toolsInOrder: similarTools.slice(0, 3).map((t: any) => t.name),
+            conceptsInOrder: similarConcepts.slice(0, 3).map((c: any) => c.name),
+            orderingPreserved: true
+          },
+          similarTools.length >= 15 ? 85 : 60,
          embeddingsStart,
-          { selectionMethod, embeddingsEnabled: true }
+          { selectionMethod, embeddingsEnabled: true, orderingFixed: true }
        );
      }
    } else {
@@ -309,7 +337,7 @@ class ImprovedMicroTaskAIPipeline {
      selectionMethod = 'full_dataset';
    }

-    console.log(`[IMPROVED PIPELINE] AI will analyze FULL DATA of ${candidateTools.length} candidate tools`);
+    console.log(`[IMPROVED PIPELINE] AI will analyze ${candidateTools.length} candidate tools (ordering preserved: ${selectionMethod === 'embeddings_candidates'})`);
    const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
    
    return {
@@ -735,33 +763,59 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
  }

  private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
-    const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
-      method: 'POST',
-      headers: {
-        'Content-Type': 'application/json',
-        'Authorization': `Bearer ${this.config.apiKey}`
-      },
-      body: JSON.stringify({
-        model: this.config.model,
-        messages: [{ role: 'user', content: prompt }],
-        max_tokens: maxTokens,
-        temperature: 0.3
-      })
-    });
-
-    if (!response.ok) {
-      const errorText = await response.text();
-      throw new Error(`AI API error: ${response.status} - ${errorText}`);
-    }
-
-    const data = await response.json();
-    const content = data.choices?.[0]?.message?.content;
+    const endpoint = this.config.endpoint;
+    const apiKey = this.config.apiKey;
+    const model = this.config.model;
    
-    if (!content) {
-      throw new Error('No response from AI model');
+    // Simple headers - add auth only if API key exists
+    let headers: Record<string, string> = {
+      'Content-Type': 'application/json'
+    };
+    
+    // Add authentication if API key is provided
+    if (apiKey) {
+      headers['Authorization'] = `Bearer ${apiKey}`;
+      console.log('[AI PIPELINE] Using API key authentication');
+    } else {
+      console.log('[AI PIPELINE] No API key - making request without authentication');
    }
+    
+    // Simple request body
+    const requestBody = {
+      model,
+      messages: [{ role: 'user', content: prompt }],
+      max_tokens: maxTokens,
+      temperature: 0.3
+    };
+    
+    try {
+      // FIXED: Use direct fetch since entire pipeline is already queued at query.ts level
+      const response = await fetch(`${endpoint}/v1/chat/completions`, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify(requestBody)
+      });

-    return content;
+      if (!response.ok) {
+        const errorText = await response.text();
+        console.error(`[AI PIPELINE] AI API Error ${response.status}:`, errorText);
+        throw new Error(`AI API error: ${response.status} - ${errorText}`);
+      }
+
+      const data = await response.json();
+      const content = data.choices?.[0]?.message?.content;
+      
+      if (!content) {
+        console.error('[AI PIPELINE] No response content:', data);
+        throw new Error('No response from AI model');
+      }
+
+      return content;
+      
+    } catch (error) {
+      console.error('[AI PIPELINE] AI service call failed:', error.message);
+      throw error;
+    }
  }

  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {