From a0955c2e580b33db928fd06d9faa99b14aecb073 Mon Sep 17 00:00:00 2001 From: overcuriousity Date: Mon, 4 Aug 2025 21:05:15 +0200 Subject: [PATCH] Progress --- src/components/AIQueryInterface.astro | 24 +++++------ src/styles/global.css | 1 + src/utils/aiPipeline.ts | 13 +++++- src/utils/embeddings.ts | 57 ++++++++++++++++++++------- 4 files changed, 67 insertions(+), 28 deletions(-) diff --git a/src/components/AIQueryInterface.astro b/src/components/AIQueryInterface.astro index 0c8d86e..9b80036 100644 --- a/src/components/AIQueryInterface.astro +++ b/src/components/AIQueryInterface.astro @@ -15,7 +15,7 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || []; - KI-gestützte Workflow-Empfehlungen + Forensic AI

Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen @@ -169,16 +169,16 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];

@@ -292,13 +292,13 @@ class AIQueryInterface { return { workflow: { placeholder: "Beschreiben Sie Ihr forensisches Szenario... z.B. 'Verdacht auf Ransomware-Angriff auf Windows-Domänencontroller'", - description: "Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen.", + description: "Beschreiben Sie Ihre Untersuchungssituation und erhalten Empfehlungen für alle Phasen der Untersuchung.", submitText: "Empfehlungen generieren", loadingText: "Analysiere Szenario und generiere Empfehlungen..." }, tool: { placeholder: "Beschreiben Sie Ihr Problem... z.B. 'Analyse von Android-Backups mit WhatsApp-Nachrichten'", - description: "Beschreiben Sie Ihr Problem und erhalten Sie 1-3 gezielt passende Empfehlungen.", + description: "Beschreiben Sie Ihre Untersuchungssituation und erhalten Empfehlungen für eine spezifische Aufgabenstellung.", submitText: "Empfehlungen finden", loadingText: "Analysiere Anforderungen und suche passende Methode..." } @@ -706,7 +706,7 @@ class AIQueryInterface { const html = `
- ${this.renderHeader('Empfohlener DFIR-Workflow', originalQuery)} + ${this.renderHeader('Untersuchungsansatz', originalQuery)} ${this.renderContextualAnalysis(recommendation, 'workflow')} ${this.renderBackgroundKnowledge(recommendation.background_knowledge)} ${this.renderWorkflowPhases(toolsByPhase, phaseOrder, phaseNames)} @@ -721,7 +721,7 @@ class AIQueryInterface { displayToolResults(recommendation, originalQuery) { const html = `
- ${this.renderHeader('Passende Empfehlungen', originalQuery)} + ${this.renderHeader('Handlungsempfehlung', originalQuery)} ${this.renderContextualAnalysis(recommendation, 'tool')} ${this.renderBackgroundKnowledge(recommendation.background_knowledge)} ${this.renderToolRecommendations(recommendation.recommended_tools)} diff --git a/src/styles/global.css b/src/styles/global.css index ca7ceee..79d1dce 100644 --- a/src/styles/global.css +++ b/src/styles/global.css @@ -2015,6 +2015,7 @@ input[type="checkbox"] { gap: 1rem; max-width: 1200px; margin: 0 auto; + margin-top: 1rem; } .phase-header { diff --git a/src/utils/aiPipeline.ts b/src/utils/aiPipeline.ts index 34dd1ee..5b14b4d 100644 --- a/src/utils/aiPipeline.ts +++ b/src/utils/aiPipeline.ts @@ -357,6 +357,17 @@ class ImprovedMicroTaskAIPipeline { let candidateConcepts: any[] = []; let selectionMethod = 'unknown'; + // WAIT for embeddings initialization if embeddings are enabled + if (process.env.AI_EMBEDDINGS_ENABLED === 'true') { + try { + console.log('[AI PIPELINE] Waiting for embeddings initialization...'); + await embeddingsService.waitForInitialization(); + console.log('[AI PIPELINE] Embeddings ready, proceeding with similarity search'); + } catch (error) { + console.error('[AI PIPELINE] Embeddings initialization failed, falling back to full dataset:', error); + } + } + if (embeddingsService.isEnabled()) { const embeddingsStart = Date.now(); const similarItems = await embeddingsService.findSimilar( @@ -425,7 +436,7 @@ class ImprovedMicroTaskAIPipeline { ); } } else { - console.log(`[AI PIPELINE] Embeddings disabled, using full dataset`); + console.log(`[AI PIPELINE] Embeddings disabled or not ready, using full dataset`); candidateTools = toolsData.tools; candidateConcepts = toolsData.concepts; selectionMethod = 'full_dataset'; diff --git a/src/utils/embeddings.ts b/src/utils/embeddings.ts index b7bbbaa..d70d0ca 100644 --- a/src/utils/embeddings.ts +++ b/src/utils/embeddings.ts @@ -31,6 +31,7 @@ interface SimilarityResult extends EmbeddingData { class EmbeddingsService { private embeddings: EmbeddingData[] = []; private isInitialized = false; + private initializationPromise: Promise | null = null; // ADD THIS LINE private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json'); private readonly batchSize: number; private readonly batchDelay: number; @@ -42,7 +43,25 @@ class EmbeddingsService { this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10); } + // REPLACE the existing initialize method with this: async initialize(): Promise { + // If initialization is already in progress, wait for it + if (this.initializationPromise) { + return this.initializationPromise; + } + + // If already initialized, return immediately + if (this.isInitialized) { + return Promise.resolve(); + } + + // Start initialization and store the promise + this.initializationPromise = this.performInitialization(); + return this.initializationPromise; + } + + // ADD THIS NEW METHOD: + private async performInitialization(): Promise { if (!this.enabled) { console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization'); return; @@ -74,9 +93,29 @@ class EmbeddingsService { } catch (error) { console.error('[EMBEDDINGS] Failed to initialize:', error); this.isInitialized = false; + throw error; + } finally { + this.initializationPromise = null; } } + async waitForInitialization(): Promise { + if (!this.enabled) { + return Promise.resolve(); + } + + if (this.isInitialized) { + return Promise.resolve(); + } + + if (this.initializationPromise) { + await this.initializationPromise; + return; + } + + return this.initialize(); + } + private hashData(data: any): string { return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32); } @@ -127,7 +166,6 @@ class EmbeddingsService { 'Content-Type': 'application/json' }; - // API key is optional for Ollama but required for Mistral/OpenAI if (apiKey) { headers['Authorization'] = `Bearer ${apiKey}`; } @@ -148,12 +186,10 @@ class EmbeddingsService { const data = await response.json(); - // Detect Ollama format if (Array.isArray(data.embeddings)) { return data.embeddings; } - // Detect OpenAI/Mistral format if (Array.isArray(data.data)) { return data.data.map((item: any) => item.embedding); } @@ -170,7 +206,6 @@ class EmbeddingsService { const contents = allItems.map(item => this.createContentString(item)); this.embeddings = []; - // Process in batches to respect rate limits for (let i = 0; i < contents.length; i += this.batchSize) { const batch = contents.slice(i, i + this.batchSize); const batchItems = allItems.slice(i, i + this.batchSize); @@ -198,7 +233,6 @@ class EmbeddingsService { }); }); - // Rate limiting delay between batches if (i + this.batchSize < contents.length) { await new Promise(resolve => setTimeout(resolve, this.batchDelay)); } @@ -213,7 +247,6 @@ class EmbeddingsService { } public async embedText(text: string): Promise { - // Re‑use the private batch helper to avoid auth duplication const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]); return embedding; } @@ -239,25 +272,21 @@ class EmbeddingsService { } try { - // Generate embedding for query const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]); const queryEmbedding = queryEmbeddings[0]; console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`); - // Calculate similarities - properly typed const similarities: SimilarityResult[] = this.embeddings.map(item => ({ ...item, similarity: this.cosineSimilarity(queryEmbedding, item.embedding) })); - // Filter by threshold and sort by similarity (descending - highest first) const results = similarities .filter(item => item.similarity >= threshold) - .sort((a, b) => b.similarity - a.similarity) // CRITICAL: Ensure descending order + .sort((a, b) => b.similarity - a.similarity) .slice(0, maxResults); - // ENHANCED: Verify ordering is correct const orderingValid = results.every((item, index) => { if (index === 0) return true; return item.similarity <= results[index - 1].similarity; @@ -270,15 +299,13 @@ class EmbeddingsService { }); } - // ENHANCED: Log top results for debugging console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`); if (results.length > 0) { - console.log('[EMBEDDINGS] Top 5 similarity matches:'); - results.slice(0, 5).forEach((item, idx) => { + console.log('[EMBEDDINGS] Top 10 similarity matches:'); + results.slice(0, 10).forEach((item, idx) => { console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`); }); - // Verify first result is indeed the highest const topSimilarity = results[0].similarity; const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity); if (hasHigherSimilarity) {