From 1b59f5585e38146ad7d48b52ed8b722bea81ac8e Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Wed, 6 Aug 2025 15:06:53 +0200 Subject: [PATCH] semanticsearch --- src/components/ToolFilters.astro | 211 +++++++++++++---- src/pages/api/ai/embeddings-status.ts | 37 +++ src/pages/api/ai/embeddings.status.ts | 22 -- src/pages/api/debug-env.ts | 42 ++++ src/pages/api/search/semantic.ts | 83 +++++++ src/pages/index.astro | 279 ++++++++++++++++------ src/styles/global.css | 150 ++++++++++++ src/utils/aiPipeline.ts | 15 +- src/utils/embeddings.ts | 329 +++++++++++++++++++------- 9 files changed, 934 insertions(+), 234 deletions(-) create mode 100644 src/pages/api/ai/embeddings-status.ts delete mode 100644 src/pages/api/ai/embeddings.status.ts create mode 100644 src/pages/api/debug-env.ts create mode 100644 src/pages/api/search/semantic.ts diff --git a/src/components/ToolFilters.astro b/src/components/ToolFilters.astro index be956ea..43ed53e 100644 --- a/src/components/ToolFilters.astro +++ b/src/components/ToolFilters.astro @@ -25,33 +25,55 @@ const sortedTags = Object.entries(tagFrequency)
-
-
-
-

🔍 Suche

-
-
-
- - - - +
+
+
+

🔍 Suche

+
+
+
+
+ + + + +
+ + +
+ + + +
+ + + - -
-
@@ -289,6 +311,10 @@ const sortedTags = Object.entries(tagFrequency) const elements = { searchInput: document.getElementById('search-input'), clearSearch: document.getElementById('clear-search'), + semanticContainer: document.getElementById('semantic-search-container'), + semanticCheckbox: document.getElementById('semantic-search-enabled'), + semanticStatus: document.getElementById('semantic-status'), + semanticResultsCount: document.querySelector('.semantic-results-count'), domainSelect: document.getElementById('domain-select'), phaseSelect: document.getElementById('phase-select'), typeSelect: document.getElementById('type-select'), @@ -324,6 +350,54 @@ const sortedTags = Object.entries(tagFrequency) let selectedTags = new Set(); let selectedPhase = ''; let isTagCloudExpanded = false; + let semanticSearchEnabled = false; + let semanticSearchAvailable = false; + let lastSemanticResults = null; + +// Check embeddings availability + async function checkEmbeddingsAvailability() { + try { + const response = await fetch('/api/ai/embeddings-status'); + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + + const data = await response.json(); + semanticSearchAvailable = data.embeddings?.enabled && data.embeddings?.initialized; + + if (semanticSearchAvailable && elements.semanticContainer) { + elements.semanticContainer.classList.remove('hidden'); + } + } catch (error) { + console.error('[EMBEDDINGS] Status check failed:', error.message); + semanticSearchAvailable = false; + } + } + + // Semantic search function + async function performSemanticSearch(query) { + if (!semanticSearchAvailable || !query.trim()) { + return null; + } + + try { + const response = await fetch('/api/search/semantic', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ query: query.trim() }) + }); + + if (!response.ok) { + throw new Error(`HTTP ${response.status}`); + } + + const data = await response.json(); + return data.results || []; + } catch (error) { + console.error('[SEMANTIC] Search failed:', error); + return null; + } + } function toggleCollapsible(toggleBtn, content, storageKey) { const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true'; @@ -494,7 +568,19 @@ const sortedTags = Object.entries(tagFrequency) : `${count} von ${total} Tools`; } - function filterTools() { + function updateSemanticStatus(results) { + if (!elements.semanticStatus || !elements.semanticResultsCount) return; + + if (semanticSearchEnabled && results?.length > 0) { + elements.semanticStatus.classList.remove('hidden'); + elements.semanticResultsCount.textContent = `${results.length} semantische Treffer`; + } else { + elements.semanticStatus.classList.add('hidden'); + } + } + + // FIXED: Consolidated filtering logic with semantic search support + async function filterTools() { const searchTerm = elements.searchInput.value.trim().toLowerCase(); const selectedDomain = elements.domainSelect.value; const selectedPhaseFromSelect = elements.phaseSelect.value; @@ -508,15 +594,32 @@ const sortedTags = Object.entries(tagFrequency) const activePhase = selectedPhaseFromSelect || selectedPhase; - const filtered = window.toolsData.filter(tool => { - if (searchTerm && !( - tool.name.toLowerCase().includes(searchTerm) || - tool.description.toLowerCase().includes(searchTerm) || - (tool.tags || []).some(tag => tag.toLowerCase().includes(searchTerm)) - )) { - return false; - } + let filteredTools = window.toolsData; + let semanticResults = null; + + // CONSOLIDATED: Use semantic search if enabled and search term exists + if (semanticSearchEnabled && semanticSearchAvailable && searchTerm) { + semanticResults = await performSemanticSearch(searchTerm); + lastSemanticResults = semanticResults; + if (semanticResults?.length > 0) { + filteredTools = [...semanticResults]; + } + } else { + lastSemanticResults = null; + + // Traditional text-based search + if (searchTerm) { + filteredTools = window.toolsData.filter(tool => + tool.name.toLowerCase().includes(searchTerm) || + tool.description.toLowerCase().includes(searchTerm) || + (tool.tags || []).some(tag => tag.toLowerCase().includes(searchTerm)) + ); + } + } + + // Apply additional filters to the results + filteredTools = filteredTools.filter(tool => { if (selectedDomain && !(tool.domains || []).includes(selectedDomain)) { return false; } @@ -560,13 +663,20 @@ const sortedTags = Object.entries(tagFrequency) return true; }); - const finalResults = searchTerm && window.prioritizeSearchResults - ? window.prioritizeSearchResults(filtered, searchTerm) - : filtered; + // FIXED: Preserve semantic order when semantic search is used + const finalResults = semanticSearchEnabled && lastSemanticResults + ? filteredTools // Already sorted by semantic similarity + : (searchTerm && window.prioritizeSearchResults + ? window.prioritizeSearchResults(filteredTools, searchTerm) + : filteredTools); updateResultsCounter(finalResults.length); + updateSemanticStatus(lastSemanticResults); - window.dispatchEvent(new CustomEvent('toolsFiltered', { detail: finalResults })); + window.dispatchEvent(new CustomEvent('toolsFiltered', { + detail: finalResults, + semanticSearch: semanticSearchEnabled && !!lastSemanticResults + })); } function resetPrimaryFilters() { @@ -599,12 +709,17 @@ const sortedTags = Object.entries(tagFrequency) function resetAllFilters() { elements.searchInput.value = ''; elements.clearSearch.classList.add('hidden'); + elements.semanticCheckbox.checked = false; + semanticSearchEnabled = false; + lastSemanticResults = null; + updateSemanticStatus(null); resetPrimaryFilters(); resetAdvancedFilters(); resetTags(); filterTagCloud(); } + // Event listeners elements.searchInput.addEventListener('input', (e) => { const hasValue = e.target.value.length > 0; elements.clearSearch.classList.toggle('hidden', !hasValue); @@ -619,16 +734,30 @@ const sortedTags = Object.entries(tagFrequency) filterTools(); }); + // Semantic search checkbox handler + if (elements.semanticCheckbox) { + elements.semanticCheckbox.addEventListener('change', (e) => { + semanticSearchEnabled = e.target.checked; + filterTools(); + }); + } + [elements.domainSelect, elements.phaseSelect, elements.typeSelect, elements.skillSelect, elements.platformSelect, elements.licenseSelect, elements.accessSelect].forEach(select => { - select.addEventListener('change', filterTools); + if (select) { + select.addEventListener('change', filterTools); + } }); [elements.hostedOnly, elements.knowledgebaseOnly].forEach(checkbox => { - checkbox.addEventListener('change', filterTools); + if (checkbox) { + checkbox.addEventListener('change', filterTools); + } }); - elements.tagCloudToggle.addEventListener('click', toggleTagCloud); + if (elements.tagCloudToggle) { + elements.tagCloudToggle.addEventListener('click', toggleTagCloud); + } elements.tagCloudItems.forEach(item => { item.addEventListener('click', () => { @@ -676,6 +805,8 @@ const sortedTags = Object.entries(tagFrequency) window.clearTagFilters = resetTags; window.clearAllFilters = resetAllFilters; + // Initialize + checkEmbeddingsAvailability(); initializeCollapsible(); initTagCloud(); filterTagCloud(); diff --git a/src/pages/api/ai/embeddings-status.ts b/src/pages/api/ai/embeddings-status.ts new file mode 100644 index 0000000..9d9b9c8 --- /dev/null +++ b/src/pages/api/ai/embeddings-status.ts @@ -0,0 +1,37 @@ +// src/pages/api/ai/embeddings-status.ts +import type { APIRoute } from 'astro'; + +export const prerender = false; + +export const GET: APIRoute = async () => { + try { + const { embeddingsService } = await import('../../../utils/embeddings.js'); + await embeddingsService.waitForInitialization(); + + const stats = embeddingsService.getStats(); + const status = stats.enabled && stats.initialized ? 'ready' : + stats.enabled && !stats.initialized ? 'initializing' : 'disabled'; + + return new Response(JSON.stringify({ + success: true, + embeddings: stats, + timestamp: new Date().toISOString(), + status: status + }), { + status: 200, + headers: { 'Content-Type': 'application/json' } + }); + + } catch (error) { + return new Response(JSON.stringify({ + success: false, + embeddings: { enabled: false, initialized: false, count: 0 }, + timestamp: new Date().toISOString(), + status: 'disabled', + error: error.message + }), { + status: 200, + headers: { 'Content-Type': 'application/json' } + }); + } +}; \ No newline at end of file diff --git a/src/pages/api/ai/embeddings.status.ts b/src/pages/api/ai/embeddings.status.ts deleted file mode 100644 index 2ca02c2..0000000 --- a/src/pages/api/ai/embeddings.status.ts +++ /dev/null @@ -1,22 +0,0 @@ -// src/pages/api/ai/embeddings-status.ts -import type { APIRoute } from 'astro'; -import { embeddingsService } from '../../../utils/embeddings.js'; -import { apiResponse, apiServerError } from '../../../utils/api.js'; - -export const prerender = false; - -export const GET: APIRoute = async () => { - try { - const stats = embeddingsService.getStats(); - - return apiResponse.success({ - embeddings: stats, - timestamp: new Date().toISOString(), - status: stats.enabled && stats.initialized ? 'ready' : - stats.enabled && !stats.initialized ? 'initializing' : 'disabled' - }); - } catch (error) { - console.error('Embeddings status error:', error); - return apiServerError.internal('Failed to get embeddings status'); - } -}; \ No newline at end of file diff --git a/src/pages/api/debug-env.ts b/src/pages/api/debug-env.ts new file mode 100644 index 0000000..3da0f50 --- /dev/null +++ b/src/pages/api/debug-env.ts @@ -0,0 +1,42 @@ +// src/pages/api/debug-env.ts +import type { APIRoute } from 'astro'; +import { debugEmbeddings } from '../../utils/embeddings.js'; + +export const GET: APIRoute = async () => { + const embeddingVars = Object.keys(process.env) + .filter(key => key.includes('EMBEDDINGS')) + .reduce((obj: Record, key) => { + obj[key] = process.env[key] || 'undefined'; + return obj; + }, {}); + + const aiVars = Object.keys(process.env) + .filter(key => key.includes('AI_')) + .reduce((obj: Record, key) => { + // Mask sensitive values + const value = process.env[key] || 'undefined'; + obj[key] = key.includes('KEY') || key.includes('SECRET') ? + (value.length > 10 ? `${value.slice(0, 6)}...${value.slice(-4)}` : value) : + value; + return obj; + }, {}); + + // Force recheck embeddings environment + await debugEmbeddings.recheckEnvironment(); + const embeddingsStatus = debugEmbeddings.getStatus(); + + return new Response(JSON.stringify({ + timestamp: new Date().toISOString(), + embeddingVars, + allAiVars: aiVars, + totalEnvVars: Object.keys(process.env).length, + embeddingsStatus, + nodeEnv: process.env.NODE_ENV, + platform: process.platform + }, null, 2), { + status: 200, + headers: { + 'Content-Type': 'application/json' + } + }); +}; \ No newline at end of file diff --git a/src/pages/api/search/semantic.ts b/src/pages/api/search/semantic.ts new file mode 100644 index 0000000..32d4d22 --- /dev/null +++ b/src/pages/api/search/semantic.ts @@ -0,0 +1,83 @@ +// src/pages/api/search/semantic.ts +import type { APIRoute } from 'astro'; +import { getToolsData } from '../../../utils/dataService.js'; + +export const prerender = false; + +export const POST: APIRoute = async ({ request }) => { + try { + const { query, maxResults = 50, threshold = 0.15 } = await request.json(); + + if (!query || typeof query !== 'string') { + return new Response(JSON.stringify({ + success: false, + error: 'Query is required' + }), { + status: 400, + headers: { 'Content-Type': 'application/json' } + }); + } + + // Import embeddings service dynamically + const { embeddingsService } = await import('../../../utils/embeddings.js'); + + // Check if embeddings are available + if (!embeddingsService.isEnabled()) { + return new Response(JSON.stringify({ + success: false, + error: 'Semantic search not available' + }), { + status: 400, + headers: { 'Content-Type': 'application/json' } + }); + } + + // Wait for embeddings initialization if needed + await embeddingsService.waitForInitialization(); + + // Get similar items using embeddings + const similarItems = await embeddingsService.findSimilar( + query.trim(), + maxResults, + threshold + ); + + // Get current tools data + const toolsData = await getToolsData(); + + // Map similarity results back to full tool objects, preserving similarity ranking + const rankedTools = similarItems + .map(similarItem => { + const tool = toolsData.tools.find(t => t.name === similarItem.name); + return tool ? { + ...tool, + _semanticSimilarity: similarItem.similarity, + _semanticRank: similarItems.indexOf(similarItem) + 1 + } : null; + }) + .filter(Boolean); + + return new Response(JSON.stringify({ + success: true, + query: query.trim(), + results: rankedTools, + totalFound: rankedTools.length, + semanticSearch: true, + threshold, + maxSimilarity: rankedTools.length > 0 ? rankedTools[0]._semanticSimilarity : 0 + }), { + status: 200, + headers: { 'Content-Type': 'application/json' } + }); + + } catch (error) { + console.error('Semantic search error:', error); + return new Response(JSON.stringify({ + success: false, + error: 'Semantic search failed' + }), { + status: 500, + headers: { 'Content-Type': 'application/json' } + }); + } +}; \ No newline at end of file diff --git a/src/pages/index.astro b/src/pages/index.astro index b5c8c35..78607bf 100644 --- a/src/pages/index.astro +++ b/src/pages/index.astro @@ -198,12 +198,15 @@ const phases = data.phases; \ No newline at end of file + + \ No newline at end of file diff --git a/src/styles/global.css b/src/styles/global.css index be43144..aa107a2 100644 --- a/src/styles/global.css +++ b/src/styles/global.css @@ -1689,6 +1689,156 @@ input[type="checkbox"] { font-size: 0.75rem; } +/* =================================================================== + SEMANTIC SEARCH STYLES - INLINE VERSION (REPLACE EXISTING) + ================================================================= */ + +/* Search row with inline semantic toggle */ +.search-row { + display: flex; + align-items: center; + gap: 1rem; +} + +.search-wrapper { + flex: 1; + position: relative; + display: flex; + align-items: center; +} + +/* Inline semantic search toggle */ +.semantic-search-inline { + flex-shrink: 0; +} + +.semantic-toggle-wrapper { + display: flex; + align-items: center; + gap: 0.5rem; + cursor: pointer; + padding: 0.5rem 0.75rem; + border-radius: 0.375rem; + border: 1px solid var(--color-border); + background-color: var(--color-bg-secondary); + transition: var(--transition-fast); + user-select: none; + white-space: nowrap; +} + +.semantic-toggle-wrapper:hover { + background-color: var(--color-bg-tertiary); + border-color: var(--color-accent); +} + +.semantic-toggle-wrapper input[type="checkbox"] { + display: none; +} + +.semantic-checkbox-custom { + width: 16px; + height: 16px; + border: 2px solid var(--color-border); + border-radius: 0.25rem; + background-color: var(--color-bg); + transition: var(--transition-fast); + position: relative; + flex-shrink: 0; +} + +.semantic-checkbox-custom::after { + content: ''; + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%) scale(0); + width: 8px; + height: 8px; + background-color: white; + border-radius: 0.125rem; + transition: var(--transition-fast); +} + +.semantic-toggle-wrapper input:checked + .semantic-checkbox-custom { + background-color: var(--color-accent); + border-color: var(--color-accent); +} + +.semantic-toggle-wrapper input:checked + .semantic-checkbox-custom::after { + transform: translate(-50%, -50%) scale(1); +} + +.semantic-toggle-label { + display: flex; + align-items: center; + gap: 0.375rem; + font-size: 0.8125rem; + font-weight: 500; + color: var(--color-text); +} + +.semantic-toggle-label svg { + width: 14px; + height: 14px; + color: var(--color-accent); + flex-shrink: 0; +} + +/* Semantic Status Display */ +.semantic-status { + margin-top: 0.75rem; + padding: 0.375rem 0.75rem; + background-color: var(--color-accent); + color: white; + border-radius: 1rem; + font-size: 0.75rem; + font-weight: 500; + text-align: center; + opacity: 0.9; +} + +.semantic-results-count { + display: flex; + align-items: center; + justify-content: center; + gap: 0.375rem; +} + +.semantic-results-count::before { + content: '🧠'; + font-size: 0.875rem; +} + +/* Responsive adjustments */ +@media (width <= 768px) { + .search-row { + flex-direction: column; + align-items: stretch; + gap: 0.75rem; + } + + .semantic-toggle-wrapper { + justify-content: center; + padding: 0.625rem; + } + + .semantic-toggle-label { + font-size: 0.875rem; + } +} + +@media (width <= 480px) { + .semantic-toggle-label span { + display: none; /* Hide "Semantisch" text on very small screens */ + } + + .semantic-toggle-wrapper { + padding: 0.5rem; + min-width: 40px; + justify-content: center; + } +} + /* =================================================================== 16. AI INTERFACE (CONSOLIDATED) ================================================================= */ diff --git a/src/utils/aiPipeline.ts b/src/utils/aiPipeline.ts index c1583c0..8a8c7fc 100644 --- a/src/utils/aiPipeline.ts +++ b/src/utils/aiPipeline.ts @@ -385,14 +385,13 @@ class ImprovedMicroTaskAIPipeline { context.embeddingsSimilarities = new Map(); - if (process.env.AI_EMBEDDINGS_ENABLED === 'true') { - try { - console.log('[AI PIPELINE] Waiting for embeddings initialization...'); - await embeddingsService.waitForInitialization(); - console.log('[AI PIPELINE] Embeddings ready, proceeding with similarity search'); - } catch (error) { - console.error('[AI PIPELINE] Embeddings initialization failed, falling back to full dataset:', error); - } + // Always try to initialize embeddings - let the service decide if it should be enabled + try { + console.log('[AI PIPELINE] Attempting embeddings initialization...'); + await embeddingsService.waitForInitialization(); + console.log('[AI PIPELINE] Embeddings initialization completed'); + } catch (error) { + console.error('[AI PIPELINE] Embeddings initialization failed:', error); } if (embeddingsService.isEnabled()) { diff --git a/src/utils/embeddings.ts b/src/utils/embeddings.ts index 165fb90..fa52128 100644 --- a/src/utils/embeddings.ts +++ b/src/utils/embeddings.ts @@ -35,12 +35,56 @@ class EmbeddingsService { private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json'); private readonly batchSize: number; private readonly batchDelay: number; - private readonly enabled: boolean; + private enabled: boolean = false; // Make mutable again constructor() { - this.enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true'; this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10); this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10); + + // Don't call async method from constructor - handle in initialize() instead + this.enabled = true; // Start optimistically enabled for development + } + + private async checkEnabledStatus(): Promise { + try { + // Add debugging to see what's actually in process.env + console.log('[EMBEDDINGS] Debug env check:', { + AI_EMBEDDINGS_ENABLED: process.env.AI_EMBEDDINGS_ENABLED, + envKeys: Object.keys(process.env).filter(k => k.includes('EMBEDDINGS')).length, + allEnvKeys: Object.keys(process.env).length + }); + + const envEnabled = process.env.AI_EMBEDDINGS_ENABLED; + + if (envEnabled === 'true') { + // Check if we have the required API configuration + const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT; + const model = process.env.AI_EMBEDDINGS_MODEL; + + if (!endpoint || !model) { + console.warn('[EMBEDDINGS] Embeddings enabled but API configuration missing - disabling'); + this.enabled = false; + return; + } + + console.log('[EMBEDDINGS] All requirements met - enabling embeddings'); + this.enabled = true; + return; + } + + // Check if embeddings file exists + try { + await fs.stat(this.embeddingsPath); + console.log('[EMBEDDINGS] Existing embeddings file found - enabling'); + this.enabled = true; + } catch { + console.log('[EMBEDDINGS] Embeddings not explicitly enabled - disabling'); + this.enabled = false; + } + } catch (error) { + console.error('[EMBEDDINGS] Error checking enabled status:', error); + this.enabled = false; + } } async initialize(): Promise { @@ -57,58 +101,55 @@ class EmbeddingsService { } private async performInitialization(): Promise { + // 1️⃣ Respect the on/off switch that the newer code introduced + await this.checkEnabledStatus(); if (!this.enabled) { console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization'); return; } + const initStart = Date.now(); try { - console.log('[EMBEDDINGS] Initializing embeddings system...'); - + console.log('[EMBEDDINGS] Initializing embeddings system…'); + + // Make sure the data folder exists await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true }); - - const toolsData = await getCompressedToolsDataForAI(); - const currentDataHash = this.hashData(toolsData); - - const existingEmbeddings = await this.loadEmbeddings(); - - if (existingEmbeddings && existingEmbeddings.version === currentDataHash) { + + // Load current tools / concepts and generate a hash + const toolsData = await getCompressedToolsDataForAI(); + const currentDataHash = this.hashData(toolsData); // <- keep the old helper + // (SHA-256, xxHash etc.) + + // Try to read an existing file + const existing = await this.loadEmbeddings(); + + const cacheIsUsable = + existing && + existing.version === currentDataHash && + Array.isArray(existing.embeddings) && + existing.embeddings.length > 0; + + if (cacheIsUsable) { console.log('[EMBEDDINGS] Using cached embeddings'); - this.embeddings = existingEmbeddings.embeddings; + this.embeddings = existing.embeddings; } else { - console.log('[EMBEDDINGS] Generating new embeddings...'); - await this.generateEmbeddings(toolsData, currentDataHash); + console.log('[EMBEDDINGS] Generating new embeddings…'); + // 2️⃣ Build and persist new vectors + await this.generateEmbeddings(toolsData, currentDataHash); // <- old helper } - + this.isInitialized = true; - console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings`); - - } catch (error) { - console.error('[EMBEDDINGS] Failed to initialize:', error); + console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings in ${Date.now() - initStart} ms`); + } catch (err) { + console.error('[EMBEDDINGS] Failed to initialize:', err); this.isInitialized = false; - throw error; + throw err; // Let the caller know – same behaviour as before } finally { + // 3️⃣ Always clear the promise so subsequent calls don't hang this.initializationPromise = null; } } - async waitForInitialization(): Promise { - if (!this.enabled) { - return Promise.resolve(); - } - - if (this.isInitialized) { - return Promise.resolve(); - } - - if (this.initializationPromise) { - await this.initializationPromise; - return; - } - - return this.initialize(); - } - private hashData(data: any): string { return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32); } @@ -152,7 +193,10 @@ class EmbeddingsService { const model = process.env.AI_EMBEDDINGS_MODEL; if (!endpoint || !model) { - throw new Error('Missing embeddings API configuration'); + const missing: string[] = []; + if (!endpoint) missing.push('AI_EMBEDDINGS_ENDPOINT'); + if (!model) missing.push('AI_EMBEDDINGS_MODEL'); + throw new Error(`Missing embeddings API configuration: ${missing.join(', ')}`); } const headers: Record = { @@ -240,10 +284,37 @@ class EmbeddingsService { } public async embedText(text: string): Promise { + if (!this.enabled || !this.isInitialized) { + throw new Error('Embeddings service not available'); + } const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]); return embedding; } + async waitForInitialization(): Promise { + // Always re-check environment status first in case variables loaded after initial check + await this.checkEnabledStatus(); + + if (!this.enabled || this.isInitialized) { + return Promise.resolve(); + } + + if (this.initializationPromise) { + await this.initializationPromise; + return; + } + + return this.initialize(); + } + + // Force re-check of environment status (useful for development) + async forceRecheckEnvironment(): Promise { + this.enabled = false; + this.isInitialized = false; + await this.checkEnabledStatus(); + console.log('[EMBEDDINGS] Environment status re-checked, enabled:', this.enabled); + } + private cosineSimilarity(a: number[], b: number[]): number { let dotProduct = 0; let normA = 0; @@ -259,54 +330,123 @@ class EmbeddingsService { } async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise { - if (!this.enabled || !this.isInitialized || this.embeddings.length === 0) { - console.log('[EMBEDDINGS] Service not available for similarity search'); + if (!this.enabled) { + console.log('[EMBEDDINGS] Service disabled for similarity search'); return []; } try { - const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]); - const queryEmbedding = queryEmbeddings[0]; - - console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`); - - const similarities: SimilarityResult[] = this.embeddings.map(item => ({ - ...item, - similarity: this.cosineSimilarity(queryEmbedding, item.embedding) - })); - - const results = similarities - .filter(item => item.similarity >= threshold) - .sort((a, b) => b.similarity - a.similarity) - .slice(0, maxResults); - - const orderingValid = results.every((item, index) => { - if (index === 0) return true; - return item.similarity <= results[index - 1].similarity; - }); - - if (!orderingValid) { - console.error('[EMBEDDINGS] CRITICAL: Similarity ordering is broken!'); - results.forEach((item, idx) => { - console.error(` ${idx}: ${item.name} = ${item.similarity.toFixed(4)}`); - }); - } - - console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`); - if (results.length > 0) { - console.log('[EMBEDDINGS] Top 10 similarity matches:'); - results.slice(0, 10).forEach((item, idx) => { - console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`); - }); + // If we have embeddings data, use it + if (this.isInitialized && this.embeddings.length > 0) { + console.log(`[EMBEDDINGS] Using embeddings data for similarity search: ${query}`); - const topSimilarity = results[0].similarity; - const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity); - if (hasHigherSimilarity) { - console.error('[EMBEDDINGS] CRITICAL: Top result is not actually the highest similarity!'); - } - } + const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]); + const queryEmbedding = queryEmbeddings[0]; - return results; + console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`); + + const similarities: SimilarityResult[] = this.embeddings.map(item => ({ + ...item, + similarity: this.cosineSimilarity(queryEmbedding, item.embedding) + })); + + const results = similarities + .filter(item => item.similarity >= threshold) + .sort((a, b) => b.similarity - a.similarity) + .slice(0, maxResults); + + const orderingValid = results.every((item, index) => { + if (index === 0) return true; + return item.similarity <= results[index - 1].similarity; + }); + + if (!orderingValid) { + console.error('[EMBEDDINGS] CRITICAL: Similarity ordering is broken!'); + results.forEach((item, idx) => { + console.error(` ${idx}: ${item.name} = ${item.similarity.toFixed(4)}`); + }); + } + + console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`); + if (results.length > 0) { + console.log('[EMBEDDINGS] Top 10 similarity matches:'); + results.slice(0, 10).forEach((item, idx) => { + console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`); + }); + + const topSimilarity = results[0].similarity; + const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity); + if (hasHigherSimilarity) { + console.error('[EMBEDDINGS] CRITICAL: Top result is not actually the highest similarity!'); + } + } + + return results; + + } else { + // Fallback: generate mock similarity results from actual tools data + console.log(`[EMBEDDINGS] No embeddings data, using fallback text matching: ${query}`); + + const { getToolsData } = await import('./dataService.js'); + const toolsData = await getToolsData(); + + const queryLower = query.toLowerCase(); + const queryWords = queryLower.split(/\s+/).filter(w => w.length > 2); + + const similarities: SimilarityResult[] = toolsData.tools + .map((tool: any) => { + let similarity = 0; + + // Name matching + if (tool.name.toLowerCase().includes(queryLower)) { + similarity += 0.8; + } + + // Description matching + if (tool.description && tool.description.toLowerCase().includes(queryLower)) { + similarity += 0.6; + } + + // Tag matching + if (tool.tags && Array.isArray(tool.tags)) { + const matchingTags = tool.tags.filter((tag: string) => + tag.toLowerCase().includes(queryLower) || queryLower.includes(tag.toLowerCase()) + ); + if (tool.tags.length > 0) { + similarity += (matchingTags.length / tool.tags.length) * 0.4; + } + } + + // Word-level matching + const toolText = `${tool.name} ${tool.description || ''} ${(tool.tags || []).join(' ')}`.toLowerCase(); + const matchingWords = queryWords.filter(word => toolText.includes(word)); + if (queryWords.length > 0) { + similarity += (matchingWords.length / queryWords.length) * 0.3; + } + + return { + id: `tool_${tool.name.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase()}`, + type: 'tool' as const, + name: tool.name, + content: toolText, + embedding: [], // Empty for fallback + metadata: { + domains: tool.domains || [], + phases: tool.phases || [], + tags: tool.tags || [], + skillLevel: tool.skillLevel, + type: tool.type + }, + similarity: Math.min(similarity, 1.0) + }; + }) + .filter(item => item.similarity >= threshold) + .sort((a, b) => b.similarity - a.similarity) + .slice(0, maxResults); + + console.log(`[EMBEDDINGS] Fallback found ${similarities.length} similar items`); + return similarities; + } } catch (error) { console.error('[EMBEDDINGS] Failed to find similar items:', error); @@ -315,26 +455,37 @@ class EmbeddingsService { } isEnabled(): boolean { - return this.enabled && this.isInitialized; + // If not enabled and not initialized, try re-checking environment + // This handles the case where environment variables loaded after initial check + if (!this.enabled && !this.isInitialized) { + // Don't await this, just trigger it and return current status + this.checkEnabledStatus().catch(console.error); + } + + return this.enabled; } getStats(): { enabled: boolean; initialized: boolean; count: number } { return { - enabled: this.enabled, + enabled: this.enabled, // Always true during development initialized: this.isInitialized, count: this.embeddings.length }; } } - - const embeddingsService = new EmbeddingsService(); export { embeddingsService, type EmbeddingData, type SimilarityResult }; -if (typeof window === 'undefined' && process.env.NODE_ENV !== 'test') { - embeddingsService.initialize().catch(error => { - console.error('[EMBEDDINGS] Auto-initialization failed:', error); - }); -} \ No newline at end of file +// Export utility functions for debugging +export const debugEmbeddings = { + async recheckEnvironment() { + return embeddingsService.forceRecheckEnvironment(); + }, + getStatus() { + return embeddingsService.getStats(); + } +}; + +// Remove auto-initialization - let it initialize lazily when first needed \ No newline at end of file