main #11

Merged
mstoeck3 merged 66 commits from main into forensic-ai 2025-08-11 12:02:56 +00:00
5 changed files with 49 additions and 25 deletions
Showing only changes of commit 507e57cdd9 - Show all commits

View File

@ -16,6 +16,7 @@
"dotenv": "^16.4.5",
"jose": "^5.2.0",
"js-yaml": "^4.1.0",
"json-stable-stringify": "^1.3.0",
"jsonwebtoken": "^9.0.2",
"zod": "^3.25.76"
},

View File

@ -663,20 +663,30 @@ const sortedTags = Object.entries(tagFrequency)
return true;
});
// FIXED: Preserve semantic order when semantic search is used
if (semanticSearchEnabled && lastSemanticResults) {
filteredTools.sort(
(a, b) => (b._semanticSimilarity || 0) - (a._semanticSimilarity || 0)
);
}
/* existing code continues */
const finalResults = semanticSearchEnabled && lastSemanticResults
? filteredTools // Already sorted by semantic similarity
? filteredTools // now properly re-sorted
: (searchTerm && window.prioritizeSearchResults
? window.prioritizeSearchResults(filteredTools, searchTerm)
: filteredTools);
? window.prioritizeSearchResults(filteredTools, searchTerm)
: filteredTools);
updateResultsCounter(finalResults.length);
updateSemanticStatus(lastSemanticResults);
window.dispatchEvent(new CustomEvent('toolsFiltered', {
detail: finalResults,
semanticSearch: semanticSearchEnabled && !!lastSemanticResults
}));
window.dispatchEvent(
new CustomEvent('toolsFiltered', {
detail: {
tools: finalResults,
semanticSearch: semanticSearchEnabled && !!lastSemanticResults,
},
})
);
}
function resetPrimaryFilters() {

View File

@ -6,7 +6,7 @@ export const prerender = false;
export const POST: APIRoute = async ({ request }) => {
try {
const { query, maxResults = 50, threshold = 0.15 } = await request.json();
const { query, maxResults = 50, threshold = 0.45 } = await request.json();
if (!query || typeof query !== 'string') {
return new Response(JSON.stringify({
@ -47,16 +47,19 @@ export const POST: APIRoute = async ({ request }) => {
// Map similarity results back to full tool objects, preserving similarity ranking
const rankedTools = similarItems
.map(similarItem => {
.map((similarItem, index) => {
const tool = toolsData.tools.find(t => t.name === similarItem.name);
return tool ? {
...tool,
_semanticSimilarity: similarItem.similarity,
_semanticRank: similarItems.indexOf(similarItem) + 1
} : null;
return tool
? {
...tool,
_semanticSimilarity: similarItem.similarity,
_semanticRank: index + 1, // already sorted
}
: null;
})
.filter(Boolean);
return new Response(JSON.stringify({
success: true,
query: query.trim(),

View File

@ -488,8 +488,7 @@ const phases = data.phases;
// REPLACE the existing toolsFiltered event listener in index.astro with this enhanced version:
window.addEventListener('toolsFiltered', (event) => {
const filtered = event.detail;
const semanticSearch = event.semanticSearch || false;
const { tools: filtered, semanticSearch } = event.detail;
const currentView = document.querySelector('.view-toggle.active')?.getAttribute('data-view');
if (currentView === 'matrix' || currentView === 'ai') {

View File

@ -2,6 +2,9 @@
import { promises as fs } from 'fs';
import path from 'path';
import { getCompressedToolsDataForAI } from './dataService.js';
import 'dotenv/config';
import crypto from 'crypto';
import stringify from 'json-stable-stringify';
interface EmbeddingData {
id: string;
@ -117,11 +120,13 @@ class EmbeddingsService {
// Load current tools / concepts and generate a hash
const toolsData = await getCompressedToolsDataForAI();
const currentDataHash = this.hashData(toolsData); // <- keep the old helper
// (SHA-256, xxHash etc.)
const currentDataHash = await this.hashToolsFile(); // <- keep the old helper
// Try to read an existing file
const existing = await this.loadEmbeddings();
console.log('[EMBEDDINGS] Current hash:', currentDataHash);
console.log('[EMBEDDINGS] Existing file version:', existing?.version);
console.log('[EMBEDDINGS] Existing embeddings length:', existing?.embeddings?.length);
const cacheIsUsable =
existing &&
@ -150,8 +155,10 @@ class EmbeddingsService {
}
}
private hashData(data: any): string {
return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
private async hashToolsFile(): Promise<string> {
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
const raw = await fs.readFile(file, 'utf8');
return crypto.createHash('sha256').update(raw).digest('hex'); // 64-char hex
}
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
@ -350,11 +357,15 @@ class EmbeddingsService {
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
}));
const topScore = Math.max(...similarities.map(s => s.similarity));
const dynamicCutOff = Math.max(threshold, topScore * 0.85);
const results = similarities
.filter(item => item.similarity >= threshold)
.filter(item => item.similarity >= dynamicCutOff)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, maxResults);
const orderingValid = results.every((item, index) => {
if (index === 0) return true;
return item.similarity <= results[index - 1].similarity;