From 507e57cdd93f044d5c0ecb88eae65b425e8235e8 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Wed, 6 Aug 2025 16:38:42 +0200 Subject: [PATCH] fixes --- package.json | 1 + src/components/ToolFilters.astro | 30 ++++++++++++++++++++---------- src/pages/api/search/semantic.ts | 17 ++++++++++------- src/pages/index.astro | 3 +-- src/utils/embeddings.ts | 23 +++++++++++++++++------ 5 files changed, 49 insertions(+), 25 deletions(-) diff --git a/package.json b/package.json index 7367a40..bb69365 100644 --- a/package.json +++ b/package.json @@ -16,6 +16,7 @@ "dotenv": "^16.4.5", "jose": "^5.2.0", "js-yaml": "^4.1.0", + "json-stable-stringify": "^1.3.0", "jsonwebtoken": "^9.0.2", "zod": "^3.25.76" }, diff --git a/src/components/ToolFilters.astro b/src/components/ToolFilters.astro index 43ed53e..7e3ccae 100644 --- a/src/components/ToolFilters.astro +++ b/src/components/ToolFilters.astro @@ -663,20 +663,30 @@ const sortedTags = Object.entries(tagFrequency) return true; }); - // FIXED: Preserve semantic order when semantic search is used - const finalResults = semanticSearchEnabled && lastSemanticResults - ? filteredTools // Already sorted by semantic similarity - : (searchTerm && window.prioritizeSearchResults - ? window.prioritizeSearchResults(filteredTools, searchTerm) - : filteredTools); + if (semanticSearchEnabled && lastSemanticResults) { + filteredTools.sort( + (a, b) => (b._semanticSimilarity || 0) - (a._semanticSimilarity || 0) + ); + } + + /* existing code continues */ + const finalResults = semanticSearchEnabled && lastSemanticResults + ? filteredTools // now properly re-sorted + : (searchTerm && window.prioritizeSearchResults + ? window.prioritizeSearchResults(filteredTools, searchTerm) + : filteredTools); updateResultsCounter(finalResults.length); updateSemanticStatus(lastSemanticResults); - window.dispatchEvent(new CustomEvent('toolsFiltered', { - detail: finalResults, - semanticSearch: semanticSearchEnabled && !!lastSemanticResults - })); + window.dispatchEvent( + new CustomEvent('toolsFiltered', { + detail: { + tools: finalResults, + semanticSearch: semanticSearchEnabled && !!lastSemanticResults, + }, + }) + ); } function resetPrimaryFilters() { diff --git a/src/pages/api/search/semantic.ts b/src/pages/api/search/semantic.ts index 32d4d22..c12df29 100644 --- a/src/pages/api/search/semantic.ts +++ b/src/pages/api/search/semantic.ts @@ -6,7 +6,7 @@ export const prerender = false; export const POST: APIRoute = async ({ request }) => { try { - const { query, maxResults = 50, threshold = 0.15 } = await request.json(); + const { query, maxResults = 50, threshold = 0.45 } = await request.json(); if (!query || typeof query !== 'string') { return new Response(JSON.stringify({ @@ -47,16 +47,19 @@ export const POST: APIRoute = async ({ request }) => { // Map similarity results back to full tool objects, preserving similarity ranking const rankedTools = similarItems - .map(similarItem => { + .map((similarItem, index) => { const tool = toolsData.tools.find(t => t.name === similarItem.name); - return tool ? { - ...tool, - _semanticSimilarity: similarItem.similarity, - _semanticRank: similarItems.indexOf(similarItem) + 1 - } : null; + return tool + ? { + ...tool, + _semanticSimilarity: similarItem.similarity, + _semanticRank: index + 1, // already sorted + } + : null; }) .filter(Boolean); + return new Response(JSON.stringify({ success: true, query: query.trim(), diff --git a/src/pages/index.astro b/src/pages/index.astro index 78607bf..a9897ea 100644 --- a/src/pages/index.astro +++ b/src/pages/index.astro @@ -488,8 +488,7 @@ const phases = data.phases; // REPLACE the existing toolsFiltered event listener in index.astro with this enhanced version: window.addEventListener('toolsFiltered', (event) => { - const filtered = event.detail; - const semanticSearch = event.semanticSearch || false; + const { tools: filtered, semanticSearch } = event.detail; const currentView = document.querySelector('.view-toggle.active')?.getAttribute('data-view'); if (currentView === 'matrix' || currentView === 'ai') { diff --git a/src/utils/embeddings.ts b/src/utils/embeddings.ts index fa52128..08704aa 100644 --- a/src/utils/embeddings.ts +++ b/src/utils/embeddings.ts @@ -2,6 +2,9 @@ import { promises as fs } from 'fs'; import path from 'path'; import { getCompressedToolsDataForAI } from './dataService.js'; +import 'dotenv/config'; +import crypto from 'crypto'; +import stringify from 'json-stable-stringify'; interface EmbeddingData { id: string; @@ -117,11 +120,13 @@ class EmbeddingsService { // Load current tools / concepts and generate a hash const toolsData = await getCompressedToolsDataForAI(); - const currentDataHash = this.hashData(toolsData); // <- keep the old helper - // (SHA-256, xxHash etc.) + const currentDataHash = await this.hashToolsFile(); // <- keep the old helper // Try to read an existing file const existing = await this.loadEmbeddings(); + console.log('[EMBEDDINGS] Current hash:', currentDataHash); + console.log('[EMBEDDINGS] Existing file version:', existing?.version); + console.log('[EMBEDDINGS] Existing embeddings length:', existing?.embeddings?.length); const cacheIsUsable = existing && @@ -150,8 +155,10 @@ class EmbeddingsService { } } - private hashData(data: any): string { - return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32); + private async hashToolsFile(): Promise { + const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml'); + const raw = await fs.readFile(file, 'utf8'); + return crypto.createHash('sha256').update(raw).digest('hex'); // 64-char hex } private async loadEmbeddings(): Promise { @@ -350,11 +357,15 @@ class EmbeddingsService { similarity: this.cosineSimilarity(queryEmbedding, item.embedding) })); + const topScore = Math.max(...similarities.map(s => s.similarity)); + const dynamicCutOff = Math.max(threshold, topScore * 0.85); + const results = similarities - .filter(item => item.similarity >= threshold) - .sort((a, b) => b.similarity - a.similarity) + .filter(item => item.similarity >= dynamicCutOff) + .sort((a, b) => b.similarity - a.similarity) .slice(0, maxResults); + const orderingValid = results.every((item, index) => { if (index === 0) return true; return item.similarity <= results[index - 1].similarity;