From a0955c2e580b33db928fd06d9faa99b14aecb073 Mon Sep 17 00:00:00 2001
From: overcuriousity <overcuriousity@posteo.org>
Date: Mon, 4 Aug 2025 21:05:15 +0200
Subject: [PATCH] Progress

---
 src/components/AIQueryInterface.astro | 24 +++++------
 src/styles/global.css                 |  1 +
 src/utils/aiPipeline.ts               | 13 +++++-
 src/utils/embeddings.ts               | 57 ++++++++++++++++++++-------
 4 files changed, 67 insertions(+), 28 deletions(-)
diff --git a/src/components/AIQueryInterface.astro b/src/components/AIQueryInterface.astro
index 0c8d86e..9b80036 100644
--- a/src/components/AIQueryInterface.astro
+++ b/src/components/AIQueryInterface.astro
@@ -15,7 +15,7 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];
           <path d="M9 11H5a2 2 0 0 0-2 2v7a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7a2 2 0 0 0-2-2h-4"/>
           <path d="M9 11V7a3 3 0 0 1 6 0v4"/>
         </svg>
-        KI-gestützte Workflow-Empfehlungen
+        Forensic AI
       </h2>
       <p id="ai-description" class="text-muted" style="max-width: 700px; margin: 0 auto; line-height: 1.6;">
         Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen 
@@ -169,16 +169,16 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];
           <!-- Micro-task Progress -->
           <div id="micro-task-progress" class="micro-task-progress hidden">
             <div class="micro-task-header">
-              <span class="micro-task-label">🔬 Micro-Task Analyse</span>
+              <span class="micro-task-label">🔬 micro-Agent-Analysis</span>
               <span id="micro-task-counter" class="micro-task-counter">1/6</span>
             </div>
             <div class="micro-task-steps">
-              <div class="micro-step" data-step="scenario">📋 Szenario</div>
-              <div class="micro-step" data-step="approach">🎯 Ansatz</div>
-              <div class="micro-step" data-step="considerations">⚠️ Kritisches</div>
-              <div class="micro-step" data-step="tools">🔧 Tools</div>
-              <div class="micro-step" data-step="knowledge">📚 Wissen</div>
-              <div class="micro-step" data-step="final">✅ Final</div>
+              <div class="micro-step" data-step="scenario">📋 Problemanalyse</div>
+              <div class="micro-step" data-step="approach">🎯 Ermittlungsansatz</div>
+              <div class="micro-step" data-step="considerations">⚠️ Herausforderungen</div>
+              <div class="micro-step" data-step="tools">🔧 Methoden</div>
+              <div class="micro-step" data-step="knowledge">📚 Evaluation</div>
+              <div class="micro-step" data-step="final">✅ Audit-Trail</div>
             </div>
           </div>
           
@@ -292,13 +292,13 @@ class AIQueryInterface {
     return {
       workflow: {
         placeholder: "Beschreiben Sie Ihr forensisches Szenario... z.B. 'Verdacht auf Ransomware-Angriff auf Windows-Domänencontroller'",
-        description: "Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen.",
+        description: "Beschreiben Sie Ihre Untersuchungssituation und erhalten Empfehlungen für alle Phasen der Untersuchung.",
         submitText: "Empfehlungen generieren",
         loadingText: "Analysiere Szenario und generiere Empfehlungen..."
       },
       tool: {
         placeholder: "Beschreiben Sie Ihr Problem... z.B. 'Analyse von Android-Backups mit WhatsApp-Nachrichten'",
-        description: "Beschreiben Sie Ihr Problem und erhalten Sie 1-3 gezielt passende Empfehlungen.",
+        description: "Beschreiben Sie Ihre Untersuchungssituation und erhalten Empfehlungen für eine spezifische Aufgabenstellung.",
         submitText: "Empfehlungen finden",
         loadingText: "Analysiere Anforderungen und suche passende Methode..."
       }
@@ -706,7 +706,7 @@ class AIQueryInterface {
 
     const html = `
       <div class="workflow-container">
-        ${this.renderHeader('Empfohlener DFIR-Workflow', originalQuery)}
+        ${this.renderHeader('Untersuchungsansatz', originalQuery)}
         ${this.renderContextualAnalysis(recommendation, 'workflow')}
         ${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
         ${this.renderWorkflowPhases(toolsByPhase, phaseOrder, phaseNames)}
@@ -721,7 +721,7 @@ class AIQueryInterface {
   displayToolResults(recommendation, originalQuery) {
     const html = `
       <div class="tool-results-container">
-        ${this.renderHeader('Passende Empfehlungen', originalQuery)}
+        ${this.renderHeader('Handlungsempfehlung', originalQuery)}
         ${this.renderContextualAnalysis(recommendation, 'tool')}
         ${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
         ${this.renderToolRecommendations(recommendation.recommended_tools)}
diff --git a/src/styles/global.css b/src/styles/global.css
index ca7ceee..79d1dce 100644
--- a/src/styles/global.css
+++ b/src/styles/global.css
@@ -2015,6 +2015,7 @@ input[type="checkbox"] {
   gap: 1rem;
   max-width: 1200px;
   margin: 0 auto;
+  margin-top: 1rem;
 }
 
 .phase-header {
diff --git a/src/utils/aiPipeline.ts b/src/utils/aiPipeline.ts
index 34dd1ee..5b14b4d 100644
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -357,6 +357,17 @@ class ImprovedMicroTaskAIPipeline {
     let candidateConcepts: any[] = [];
     let selectionMethod = 'unknown';
     
+    // WAIT for embeddings initialization if embeddings are enabled
+    if (process.env.AI_EMBEDDINGS_ENABLED === 'true') {
+      try {
+        console.log('[AI PIPELINE] Waiting for embeddings initialization...');
+        await embeddingsService.waitForInitialization();
+        console.log('[AI PIPELINE] Embeddings ready, proceeding with similarity search');
+      } catch (error) {
+        console.error('[AI PIPELINE] Embeddings initialization failed, falling back to full dataset:', error);
+      }
+    }
+    
     if (embeddingsService.isEnabled()) {
       const embeddingsStart = Date.now();
       const similarItems = await embeddingsService.findSimilar(
@@ -425,7 +436,7 @@ class ImprovedMicroTaskAIPipeline {
         );
       }
     } else {
-      console.log(`[AI PIPELINE] Embeddings disabled, using full dataset`);
+      console.log(`[AI PIPELINE] Embeddings disabled or not ready, using full dataset`);
       candidateTools = toolsData.tools;
       candidateConcepts = toolsData.concepts;
       selectionMethod = 'full_dataset';
diff --git a/src/utils/embeddings.ts b/src/utils/embeddings.ts
index b7bbbaa..d70d0ca 100644
--- a/src/utils/embeddings.ts
+++ b/src/utils/embeddings.ts
@@ -31,6 +31,7 @@ interface SimilarityResult extends EmbeddingData {
 class EmbeddingsService {
   private embeddings: EmbeddingData[] = [];
   private isInitialized = false;
+  private initializationPromise: Promise<void> | null = null; // ADD THIS LINE
   private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
   private readonly batchSize: number;
   private readonly batchDelay: number;
@@ -42,7 +43,25 @@ class EmbeddingsService {
     this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
   }
 
+  // REPLACE the existing initialize method with this:
   async initialize(): Promise<void> {
+    // If initialization is already in progress, wait for it
+    if (this.initializationPromise) {
+      return this.initializationPromise;
+    }
+
+    // If already initialized, return immediately
+    if (this.isInitialized) {
+      return Promise.resolve();
+    }
+
+    // Start initialization and store the promise
+    this.initializationPromise = this.performInitialization();
+    return this.initializationPromise;
+  }
+
+  // ADD THIS NEW METHOD:
+  private async performInitialization(): Promise<void> {
     if (!this.enabled) {
       console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
       return;
@@ -74,9 +93,29 @@ class EmbeddingsService {
     } catch (error) {
       console.error('[EMBEDDINGS] Failed to initialize:', error);
       this.isInitialized = false;
+      throw error;
+    } finally {
+      this.initializationPromise = null;
     }
   }
 
+  async waitForInitialization(): Promise<void> {
+    if (!this.enabled) {
+      return Promise.resolve();
+    }
+
+    if (this.isInitialized) {
+      return Promise.resolve();
+    }
+
+    if (this.initializationPromise) {
+      await this.initializationPromise;
+      return;
+    }
+
+    return this.initialize();
+  }
+
   private hashData(data: any): string {
     return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
   }
@@ -127,7 +166,6 @@ class EmbeddingsService {
       'Content-Type': 'application/json'
     };
 
-    // API key is optional for Ollama but required for Mistral/OpenAI
     if (apiKey) {
       headers['Authorization'] = `Bearer ${apiKey}`;
     }
@@ -148,12 +186,10 @@ class EmbeddingsService {
 
     const data = await response.json();
 
-    // Detect Ollama format
     if (Array.isArray(data.embeddings)) {
       return data.embeddings;
     }
 
-    // Detect OpenAI/Mistral format
     if (Array.isArray(data.data)) {
       return data.data.map((item: any) => item.embedding);
     }
@@ -170,7 +206,6 @@ class EmbeddingsService {
     const contents = allItems.map(item => this.createContentString(item));
     this.embeddings = [];
 
-    // Process in batches to respect rate limits
     for (let i = 0; i < contents.length; i += this.batchSize) {
       const batch = contents.slice(i, i + this.batchSize);
       const batchItems = allItems.slice(i, i + this.batchSize);
@@ -198,7 +233,6 @@ class EmbeddingsService {
           });
         });
         
-        // Rate limiting delay between batches
         if (i + this.batchSize < contents.length) {
           await new Promise(resolve => setTimeout(resolve, this.batchDelay));
         }
@@ -213,7 +247,6 @@ class EmbeddingsService {
   }
 
   public async embedText(text: string): Promise<number[]> {
-    // Re‑use the private batch helper to avoid auth duplication
     const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
     return embedding;
   }
@@ -239,25 +272,21 @@ class EmbeddingsService {
     }
 
     try {
-      // Generate embedding for query
       const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
       const queryEmbedding = queryEmbeddings[0];
 
       console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
 
-      // Calculate similarities - properly typed
       const similarities: SimilarityResult[] = this.embeddings.map(item => ({
         ...item,
         similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
       }));
 
-      // Filter by threshold and sort by similarity (descending - highest first)
       const results = similarities
         .filter(item => item.similarity >= threshold)
-        .sort((a, b) => b.similarity - a.similarity) // CRITICAL: Ensure descending order
+        .sort((a, b) => b.similarity - a.similarity) 
         .slice(0, maxResults);
 
-      // ENHANCED: Verify ordering is correct
       const orderingValid = results.every((item, index) => {
         if (index === 0) return true;
         return item.similarity <= results[index - 1].similarity;
@@ -270,15 +299,13 @@ class EmbeddingsService {
         });
       }
 
-      // ENHANCED: Log top results for debugging
       console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
       if (results.length > 0) {
-        console.log('[EMBEDDINGS] Top 5 similarity matches:');
-        results.slice(0, 5).forEach((item, idx) => {
+        console.log('[EMBEDDINGS] Top 10 similarity matches:');
+        results.slice(0, 10).forEach((item, idx) => {
           console.log(`  ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
         });
         
-        // Verify first result is indeed the highest
         const topSimilarity = results[0].similarity;
         const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
         if (hasHigherSimilarity) {