bugfixing in embeddings api

This commit is contained in:
overcuriousity
2025-08-04 15:11:30 +02:00
parent 6c73a20dff
commit ec1969b2e2
4 changed files with 400 additions and 138 deletions

View File

@@ -66,6 +66,11 @@ interface AnalysisContext {
auditTrail: AuditEntry[];
}
interface SimilarityResult extends EmbeddingData {
similarity: number;
}
class ImprovedMicroTaskAIPipeline {
private config: AIConfig;
private maxSelectedItems: number;
@@ -267,39 +272,62 @@ class ImprovedMicroTaskAIPipeline {
userQuery,
this.embeddingCandidates,
this.similarityThreshold
);
) as SimilarityResult[]; // Type assertion for similarity property
const toolNames = new Set<string>();
const conceptNames = new Set<string>();
console.log(`[IMPROVED PIPELINE] Embeddings found ${similarItems.length} similar items`);
similarItems.forEach(item => {
if (item.type === 'tool') toolNames.add(item.name);
if (item.type === 'concept') conceptNames.add(item.name);
});
// FIXED: Create lookup maps for O(1) access while preserving original data
const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
console.log(`[IMPROVED PIPELINE] Embeddings found: ${toolNames.size} tools, ${conceptNames.size} concepts`);
// FIXED: Process in similarity order, preserving the ranking
const similarTools = similarItems
.filter((item): item is SimilarityResult => item.type === 'tool')
.map(item => toolsMap.get(item.name))
.filter((tool): tool is any => tool !== undefined); // Proper type guard
if (toolNames.size >= 15) {
candidateTools = toolsData.tools.filter((tool: any) => toolNames.has(tool.name));
candidateConcepts = toolsData.concepts.filter((concept: any) => conceptNames.has(concept.name));
const similarConcepts = similarItems
.filter((item): item is SimilarityResult => item.type === 'concept')
.map(item => conceptsMap.get(item.name))
.filter((concept): concept is any => concept !== undefined); // Proper type guard
console.log(`[IMPROVED PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
// Log the first few tools to verify ordering is preserved
if (similarTools.length > 0) {
console.log(`[IMPROVED PIPELINE] Top similar tools (in similarity order):`);
similarTools.slice(0, 5).forEach((tool, idx) => {
const originalSimilarItem = similarItems.find(item => item.name === tool.name);
console.log(` ${idx + 1}. ${tool.name} (similarity: ${originalSimilarItem?.similarity?.toFixed(4) || 'N/A'})`);
});
}
if (similarTools.length >= 15) {
candidateTools = similarTools;
candidateConcepts = similarConcepts;
selectionMethod = 'embeddings_candidates';
console.log(`[IMPROVED PIPELINE] Using embeddings candidates: ${candidateTools.length} tools`);
console.log(`[IMPROVED PIPELINE] Using embeddings candidates in similarity order: ${candidateTools.length} tools`);
} else {
console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${toolNames.size} < 15), using full dataset`);
console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${similarTools.length} < 15), using full dataset`);
candidateTools = toolsData.tools;
candidateConcepts = toolsData.concepts;
selectionMethod = 'full_dataset';
}
// NEW: Add Audit Entry for Embeddings Search
// NEW: Add Audit Entry for Embeddings Search with ordering verification
if (this.auditConfig.enabled) {
this.addAuditEntry(null, 'retrieval', 'embeddings-search',
{ query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates },
{ candidatesFound: similarItems.length, toolNames: Array.from(toolNames), conceptNames: Array.from(conceptNames) },
similarItems.length >= 15 ? 85 : 60, // Confidence based on result quality
{
candidatesFound: similarItems.length,
toolsInOrder: similarTools.slice(0, 3).map((t: any) => t.name),
conceptsInOrder: similarConcepts.slice(0, 3).map((c: any) => c.name),
orderingPreserved: true
},
similarTools.length >= 15 ? 85 : 60,
embeddingsStart,
{ selectionMethod, embeddingsEnabled: true }
{ selectionMethod, embeddingsEnabled: true, orderingFixed: true }
);
}
} else {
@@ -309,7 +337,7 @@ class ImprovedMicroTaskAIPipeline {
selectionMethod = 'full_dataset';
}
console.log(`[IMPROVED PIPELINE] AI will analyze FULL DATA of ${candidateTools.length} candidate tools`);
console.log(`[IMPROVED PIPELINE] AI will analyze ${candidateTools.length} candidate tools (ordering preserved: ${selectionMethod === 'embeddings_candidates'})`);
const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
return {
@@ -735,33 +763,59 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
}
private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${this.config.apiKey}`
},
body: JSON.stringify({
model: this.config.model,
messages: [{ role: 'user', content: prompt }],
max_tokens: maxTokens,
temperature: 0.3
})
});
if (!response.ok) {
const errorText = await response.text();
throw new Error(`AI API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
const content = data.choices?.[0]?.message?.content;
const endpoint = this.config.endpoint;
const apiKey = this.config.apiKey;
const model = this.config.model;
if (!content) {
throw new Error('No response from AI model');
// Simple headers - add auth only if API key exists
let headers: Record<string, string> = {
'Content-Type': 'application/json'
};
// Add authentication if API key is provided
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
console.log('[AI PIPELINE] Using API key authentication');
} else {
console.log('[AI PIPELINE] No API key - making request without authentication');
}
// Simple request body
const requestBody = {
model,
messages: [{ role: 'user', content: prompt }],
max_tokens: maxTokens,
temperature: 0.3
};
try {
// FIXED: Use direct fetch since entire pipeline is already queued at query.ts level
const response = await fetch(`${endpoint}/v1/chat/completions`, {
method: 'POST',
headers,
body: JSON.stringify(requestBody)
});
return content;
if (!response.ok) {
const errorText = await response.text();
console.error(`[AI PIPELINE] AI API Error ${response.status}:`, errorText);
throw new Error(`AI API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
const content = data.choices?.[0]?.message?.content;
if (!content) {
console.error('[AI PIPELINE] No response content:', data);
throw new Error('No response from AI model');
}
return content;
} catch (error) {
console.error('[AI PIPELINE] AI service call failed:', error.message);
throw error;
}
}
async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {