semanticsearch #5

Merged
mstoeck3 merged 4 commits from semanticsearch into main 2025-08-06 21:12:07 +00:00
5 changed files with 49 additions and 25 deletions
Showing only changes of commit 507e57cdd9 - Show all commits

View File

@ -16,6 +16,7 @@
"dotenv": "^16.4.5", "dotenv": "^16.4.5",
"jose": "^5.2.0", "jose": "^5.2.0",
"js-yaml": "^4.1.0", "js-yaml": "^4.1.0",
"json-stable-stringify": "^1.3.0",
"jsonwebtoken": "^9.0.2", "jsonwebtoken": "^9.0.2",
"zod": "^3.25.76" "zod": "^3.25.76"
}, },

View File

@ -663,9 +663,15 @@ const sortedTags = Object.entries(tagFrequency)
return true; return true;
}); });
// FIXED: Preserve semantic order when semantic search is used if (semanticSearchEnabled && lastSemanticResults) {
filteredTools.sort(
(a, b) => (b._semanticSimilarity || 0) - (a._semanticSimilarity || 0)
);
}
/* existing code continues */
const finalResults = semanticSearchEnabled && lastSemanticResults const finalResults = semanticSearchEnabled && lastSemanticResults
? filteredTools // Already sorted by semantic similarity ? filteredTools // now properly re-sorted
: (searchTerm && window.prioritizeSearchResults : (searchTerm && window.prioritizeSearchResults
? window.prioritizeSearchResults(filteredTools, searchTerm) ? window.prioritizeSearchResults(filteredTools, searchTerm)
: filteredTools); : filteredTools);
@ -673,10 +679,14 @@ const sortedTags = Object.entries(tagFrequency)
updateResultsCounter(finalResults.length); updateResultsCounter(finalResults.length);
updateSemanticStatus(lastSemanticResults); updateSemanticStatus(lastSemanticResults);
window.dispatchEvent(new CustomEvent('toolsFiltered', { window.dispatchEvent(
detail: finalResults, new CustomEvent('toolsFiltered', {
semanticSearch: semanticSearchEnabled && !!lastSemanticResults detail: {
})); tools: finalResults,
semanticSearch: semanticSearchEnabled && !!lastSemanticResults,
},
})
);
} }
function resetPrimaryFilters() { function resetPrimaryFilters() {

View File

@ -6,7 +6,7 @@ export const prerender = false;
export const POST: APIRoute = async ({ request }) => { export const POST: APIRoute = async ({ request }) => {
try { try {
const { query, maxResults = 50, threshold = 0.15 } = await request.json(); const { query, maxResults = 50, threshold = 0.45 } = await request.json();
if (!query || typeof query !== 'string') { if (!query || typeof query !== 'string') {
return new Response(JSON.stringify({ return new Response(JSON.stringify({
@ -47,16 +47,19 @@ export const POST: APIRoute = async ({ request }) => {
// Map similarity results back to full tool objects, preserving similarity ranking // Map similarity results back to full tool objects, preserving similarity ranking
const rankedTools = similarItems const rankedTools = similarItems
.map(similarItem => { .map((similarItem, index) => {
const tool = toolsData.tools.find(t => t.name === similarItem.name); const tool = toolsData.tools.find(t => t.name === similarItem.name);
return tool ? { return tool
? {
...tool, ...tool,
_semanticSimilarity: similarItem.similarity, _semanticSimilarity: similarItem.similarity,
_semanticRank: similarItems.indexOf(similarItem) + 1 _semanticRank: index + 1, // already sorted
} : null; }
: null;
}) })
.filter(Boolean); .filter(Boolean);
return new Response(JSON.stringify({ return new Response(JSON.stringify({
success: true, success: true,
query: query.trim(), query: query.trim(),

View File

@ -488,8 +488,7 @@ const phases = data.phases;
// REPLACE the existing toolsFiltered event listener in index.astro with this enhanced version: // REPLACE the existing toolsFiltered event listener in index.astro with this enhanced version:
window.addEventListener('toolsFiltered', (event) => { window.addEventListener('toolsFiltered', (event) => {
const filtered = event.detail; const { tools: filtered, semanticSearch } = event.detail;
const semanticSearch = event.semanticSearch || false;
const currentView = document.querySelector('.view-toggle.active')?.getAttribute('data-view'); const currentView = document.querySelector('.view-toggle.active')?.getAttribute('data-view');
if (currentView === 'matrix' || currentView === 'ai') { if (currentView === 'matrix' || currentView === 'ai') {

View File

@ -2,6 +2,9 @@
import { promises as fs } from 'fs'; import { promises as fs } from 'fs';
import path from 'path'; import path from 'path';
import { getCompressedToolsDataForAI } from './dataService.js'; import { getCompressedToolsDataForAI } from './dataService.js';
import 'dotenv/config';
import crypto from 'crypto';
import stringify from 'json-stable-stringify';
interface EmbeddingData { interface EmbeddingData {
id: string; id: string;
@ -117,11 +120,13 @@ class EmbeddingsService {
// Load current tools / concepts and generate a hash // Load current tools / concepts and generate a hash
const toolsData = await getCompressedToolsDataForAI(); const toolsData = await getCompressedToolsDataForAI();
const currentDataHash = this.hashData(toolsData); // <- keep the old helper const currentDataHash = await this.hashToolsFile(); // <- keep the old helper
// (SHA-256, xxHash etc.)
// Try to read an existing file // Try to read an existing file
const existing = await this.loadEmbeddings(); const existing = await this.loadEmbeddings();
console.log('[EMBEDDINGS] Current hash:', currentDataHash);
console.log('[EMBEDDINGS] Existing file version:', existing?.version);
console.log('[EMBEDDINGS] Existing embeddings length:', existing?.embeddings?.length);
const cacheIsUsable = const cacheIsUsable =
existing && existing &&
@ -150,8 +155,10 @@ class EmbeddingsService {
} }
} }
private hashData(data: any): string { private async hashToolsFile(): Promise<string> {
return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32); const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
const raw = await fs.readFile(file, 'utf8');
return crypto.createHash('sha256').update(raw).digest('hex'); // 64-char hex
} }
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> { private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
@ -350,11 +357,15 @@ class EmbeddingsService {
similarity: this.cosineSimilarity(queryEmbedding, item.embedding) similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
})); }));
const topScore = Math.max(...similarities.map(s => s.similarity));
const dynamicCutOff = Math.max(threshold, topScore * 0.85);
const results = similarities const results = similarities
.filter(item => item.similarity >= threshold) .filter(item => item.similarity >= dynamicCutOff)
.sort((a, b) => b.similarity - a.similarity) .sort((a, b) => b.similarity - a.similarity)
.slice(0, maxResults); .slice(0, maxResults);
const orderingValid = results.every((item, index) => { const orderingValid = results.every((item, index) => {
if (index === 0) return true; if (index === 0) return true;
return item.similarity <= results[index - 1].similarity; return item.similarity <= results[index - 1].similarity;