semanticsearch #5
@ -16,6 +16,7 @@
|
||||
"dotenv": "^16.4.5",
|
||||
"jose": "^5.2.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"json-stable-stringify": "^1.3.0",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"zod": "^3.25.76"
|
||||
},
|
||||
|
@ -663,9 +663,15 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
return true;
|
||||
});
|
||||
|
||||
// FIXED: Preserve semantic order when semantic search is used
|
||||
if (semanticSearchEnabled && lastSemanticResults) {
|
||||
filteredTools.sort(
|
||||
(a, b) => (b._semanticSimilarity || 0) - (a._semanticSimilarity || 0)
|
||||
);
|
||||
}
|
||||
|
||||
/* existing code continues */
|
||||
const finalResults = semanticSearchEnabled && lastSemanticResults
|
||||
? filteredTools // Already sorted by semantic similarity
|
||||
? filteredTools // now properly re-sorted
|
||||
: (searchTerm && window.prioritizeSearchResults
|
||||
? window.prioritizeSearchResults(filteredTools, searchTerm)
|
||||
: filteredTools);
|
||||
@ -673,10 +679,14 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
updateResultsCounter(finalResults.length);
|
||||
updateSemanticStatus(lastSemanticResults);
|
||||
|
||||
window.dispatchEvent(new CustomEvent('toolsFiltered', {
|
||||
detail: finalResults,
|
||||
semanticSearch: semanticSearchEnabled && !!lastSemanticResults
|
||||
}));
|
||||
window.dispatchEvent(
|
||||
new CustomEvent('toolsFiltered', {
|
||||
detail: {
|
||||
tools: finalResults,
|
||||
semanticSearch: semanticSearchEnabled && !!lastSemanticResults,
|
||||
},
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
function resetPrimaryFilters() {
|
||||
|
@ -6,7 +6,7 @@ export const prerender = false;
|
||||
|
||||
export const POST: APIRoute = async ({ request }) => {
|
||||
try {
|
||||
const { query, maxResults = 50, threshold = 0.15 } = await request.json();
|
||||
const { query, maxResults = 50, threshold = 0.45 } = await request.json();
|
||||
|
||||
if (!query || typeof query !== 'string') {
|
||||
return new Response(JSON.stringify({
|
||||
@ -47,16 +47,19 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
|
||||
// Map similarity results back to full tool objects, preserving similarity ranking
|
||||
const rankedTools = similarItems
|
||||
.map(similarItem => {
|
||||
.map((similarItem, index) => {
|
||||
const tool = toolsData.tools.find(t => t.name === similarItem.name);
|
||||
return tool ? {
|
||||
return tool
|
||||
? {
|
||||
...tool,
|
||||
_semanticSimilarity: similarItem.similarity,
|
||||
_semanticRank: similarItems.indexOf(similarItem) + 1
|
||||
} : null;
|
||||
_semanticRank: index + 1, // already sorted
|
||||
}
|
||||
: null;
|
||||
})
|
||||
.filter(Boolean);
|
||||
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
query: query.trim(),
|
||||
|
@ -488,8 +488,7 @@ const phases = data.phases;
|
||||
// REPLACE the existing toolsFiltered event listener in index.astro with this enhanced version:
|
||||
|
||||
window.addEventListener('toolsFiltered', (event) => {
|
||||
const filtered = event.detail;
|
||||
const semanticSearch = event.semanticSearch || false;
|
||||
const { tools: filtered, semanticSearch } = event.detail;
|
||||
const currentView = document.querySelector('.view-toggle.active')?.getAttribute('data-view');
|
||||
|
||||
if (currentView === 'matrix' || currentView === 'ai') {
|
||||
|
@ -2,6 +2,9 @@
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import { getCompressedToolsDataForAI } from './dataService.js';
|
||||
import 'dotenv/config';
|
||||
import crypto from 'crypto';
|
||||
import stringify from 'json-stable-stringify';
|
||||
|
||||
interface EmbeddingData {
|
||||
id: string;
|
||||
@ -117,11 +120,13 @@ class EmbeddingsService {
|
||||
|
||||
// Load current tools / concepts and generate a hash
|
||||
const toolsData = await getCompressedToolsDataForAI();
|
||||
const currentDataHash = this.hashData(toolsData); // <- keep the old helper
|
||||
// (SHA-256, xxHash etc.)
|
||||
const currentDataHash = await this.hashToolsFile(); // <- keep the old helper
|
||||
|
||||
// Try to read an existing file
|
||||
const existing = await this.loadEmbeddings();
|
||||
console.log('[EMBEDDINGS] Current hash:', currentDataHash);
|
||||
console.log('[EMBEDDINGS] Existing file version:', existing?.version);
|
||||
console.log('[EMBEDDINGS] Existing embeddings length:', existing?.embeddings?.length);
|
||||
|
||||
const cacheIsUsable =
|
||||
existing &&
|
||||
@ -150,8 +155,10 @@ class EmbeddingsService {
|
||||
}
|
||||
}
|
||||
|
||||
private hashData(data: any): string {
|
||||
return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
|
||||
private async hashToolsFile(): Promise<string> {
|
||||
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
|
||||
const raw = await fs.readFile(file, 'utf8');
|
||||
return crypto.createHash('sha256').update(raw).digest('hex'); // 64-char hex
|
||||
}
|
||||
|
||||
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
|
||||
@ -350,11 +357,15 @@ class EmbeddingsService {
|
||||
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
||||
}));
|
||||
|
||||
const topScore = Math.max(...similarities.map(s => s.similarity));
|
||||
const dynamicCutOff = Math.max(threshold, topScore * 0.85);
|
||||
|
||||
const results = similarities
|
||||
.filter(item => item.similarity >= threshold)
|
||||
.filter(item => item.similarity >= dynamicCutOff)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, maxResults);
|
||||
|
||||
|
||||
const orderingValid = results.every((item, index) => {
|
||||
if (index === 0) return true;
|
||||
return item.similarity <= results[index - 1].similarity;
|
||||
|
Loading…
x
Reference in New Issue
Block a user