This commit is contained in:
overcuriousity 2025-08-04 21:05:15 +02:00
parent 4b0d208ef5
commit a0955c2e58
4 changed files with 67 additions and 28 deletions

View File

@ -15,7 +15,7 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];
<path d="M9 11H5a2 2 0 0 0-2 2v7a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7a2 2 0 0 0-2-2h-4"/> <path d="M9 11H5a2 2 0 0 0-2 2v7a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7a2 2 0 0 0-2-2h-4"/>
<path d="M9 11V7a3 3 0 0 1 6 0v4"/> <path d="M9 11V7a3 3 0 0 1 6 0v4"/>
</svg> </svg>
KI-gestützte Workflow-Empfehlungen Forensic AI
</h2> </h2>
<p id="ai-description" class="text-muted" style="max-width: 700px; margin: 0 auto; line-height: 1.6;"> <p id="ai-description" class="text-muted" style="max-width: 700px; margin: 0 auto; line-height: 1.6;">
Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen
@ -169,16 +169,16 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];
<!-- Micro-task Progress --> <!-- Micro-task Progress -->
<div id="micro-task-progress" class="micro-task-progress hidden"> <div id="micro-task-progress" class="micro-task-progress hidden">
<div class="micro-task-header"> <div class="micro-task-header">
<span class="micro-task-label">🔬 Micro-Task Analyse</span> <span class="micro-task-label">🔬 micro-Agent-Analysis</span>
<span id="micro-task-counter" class="micro-task-counter">1/6</span> <span id="micro-task-counter" class="micro-task-counter">1/6</span>
</div> </div>
<div class="micro-task-steps"> <div class="micro-task-steps">
<div class="micro-step" data-step="scenario">📋 Szenario</div> <div class="micro-step" data-step="scenario">📋 Problemanalyse</div>
<div class="micro-step" data-step="approach">🎯 Ansatz</div> <div class="micro-step" data-step="approach">🎯 Ermittlungsansatz</div>
<div class="micro-step" data-step="considerations">⚠️ Kritisches</div> <div class="micro-step" data-step="considerations">⚠️ Herausforderungen</div>
<div class="micro-step" data-step="tools">🔧 Tools</div> <div class="micro-step" data-step="tools">🔧 Methoden</div>
<div class="micro-step" data-step="knowledge">📚 Wissen</div> <div class="micro-step" data-step="knowledge">📚 Evaluation</div>
<div class="micro-step" data-step="final">✅ Final</div> <div class="micro-step" data-step="final">✅ Audit-Trail</div>
</div> </div>
</div> </div>
@ -292,13 +292,13 @@ class AIQueryInterface {
return { return {
workflow: { workflow: {
placeholder: "Beschreiben Sie Ihr forensisches Szenario... z.B. 'Verdacht auf Ransomware-Angriff auf Windows-Domänencontroller'", placeholder: "Beschreiben Sie Ihr forensisches Szenario... z.B. 'Verdacht auf Ransomware-Angriff auf Windows-Domänencontroller'",
description: "Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen.", description: "Beschreiben Sie Ihre Untersuchungssituation und erhalten Empfehlungen für alle Phasen der Untersuchung.",
submitText: "Empfehlungen generieren", submitText: "Empfehlungen generieren",
loadingText: "Analysiere Szenario und generiere Empfehlungen..." loadingText: "Analysiere Szenario und generiere Empfehlungen..."
}, },
tool: { tool: {
placeholder: "Beschreiben Sie Ihr Problem... z.B. 'Analyse von Android-Backups mit WhatsApp-Nachrichten'", placeholder: "Beschreiben Sie Ihr Problem... z.B. 'Analyse von Android-Backups mit WhatsApp-Nachrichten'",
description: "Beschreiben Sie Ihr Problem und erhalten Sie 1-3 gezielt passende Empfehlungen.", description: "Beschreiben Sie Ihre Untersuchungssituation und erhalten Empfehlungen für eine spezifische Aufgabenstellung.",
submitText: "Empfehlungen finden", submitText: "Empfehlungen finden",
loadingText: "Analysiere Anforderungen und suche passende Methode..." loadingText: "Analysiere Anforderungen und suche passende Methode..."
} }
@ -706,7 +706,7 @@ class AIQueryInterface {
const html = ` const html = `
<div class="workflow-container"> <div class="workflow-container">
${this.renderHeader('Empfohlener DFIR-Workflow', originalQuery)} ${this.renderHeader('Untersuchungsansatz', originalQuery)}
${this.renderContextualAnalysis(recommendation, 'workflow')} ${this.renderContextualAnalysis(recommendation, 'workflow')}
${this.renderBackgroundKnowledge(recommendation.background_knowledge)} ${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
${this.renderWorkflowPhases(toolsByPhase, phaseOrder, phaseNames)} ${this.renderWorkflowPhases(toolsByPhase, phaseOrder, phaseNames)}
@ -721,7 +721,7 @@ class AIQueryInterface {
displayToolResults(recommendation, originalQuery) { displayToolResults(recommendation, originalQuery) {
const html = ` const html = `
<div class="tool-results-container"> <div class="tool-results-container">
${this.renderHeader('Passende Empfehlungen', originalQuery)} ${this.renderHeader('Handlungsempfehlung', originalQuery)}
${this.renderContextualAnalysis(recommendation, 'tool')} ${this.renderContextualAnalysis(recommendation, 'tool')}
${this.renderBackgroundKnowledge(recommendation.background_knowledge)} ${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
${this.renderToolRecommendations(recommendation.recommended_tools)} ${this.renderToolRecommendations(recommendation.recommended_tools)}

View File

@ -2015,6 +2015,7 @@ input[type="checkbox"] {
gap: 1rem; gap: 1rem;
max-width: 1200px; max-width: 1200px;
margin: 0 auto; margin: 0 auto;
margin-top: 1rem;
} }
.phase-header { .phase-header {

View File

@ -357,6 +357,17 @@ class ImprovedMicroTaskAIPipeline {
let candidateConcepts: any[] = []; let candidateConcepts: any[] = [];
let selectionMethod = 'unknown'; let selectionMethod = 'unknown';
// WAIT for embeddings initialization if embeddings are enabled
if (process.env.AI_EMBEDDINGS_ENABLED === 'true') {
try {
console.log('[AI PIPELINE] Waiting for embeddings initialization...');
await embeddingsService.waitForInitialization();
console.log('[AI PIPELINE] Embeddings ready, proceeding with similarity search');
} catch (error) {
console.error('[AI PIPELINE] Embeddings initialization failed, falling back to full dataset:', error);
}
}
if (embeddingsService.isEnabled()) { if (embeddingsService.isEnabled()) {
const embeddingsStart = Date.now(); const embeddingsStart = Date.now();
const similarItems = await embeddingsService.findSimilar( const similarItems = await embeddingsService.findSimilar(
@ -425,7 +436,7 @@ class ImprovedMicroTaskAIPipeline {
); );
} }
} else { } else {
console.log(`[AI PIPELINE] Embeddings disabled, using full dataset`); console.log(`[AI PIPELINE] Embeddings disabled or not ready, using full dataset`);
candidateTools = toolsData.tools; candidateTools = toolsData.tools;
candidateConcepts = toolsData.concepts; candidateConcepts = toolsData.concepts;
selectionMethod = 'full_dataset'; selectionMethod = 'full_dataset';

View File

@ -31,6 +31,7 @@ interface SimilarityResult extends EmbeddingData {
class EmbeddingsService { class EmbeddingsService {
private embeddings: EmbeddingData[] = []; private embeddings: EmbeddingData[] = [];
private isInitialized = false; private isInitialized = false;
private initializationPromise: Promise<void> | null = null; // ADD THIS LINE
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json'); private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
private readonly batchSize: number; private readonly batchSize: number;
private readonly batchDelay: number; private readonly batchDelay: number;
@ -42,7 +43,25 @@ class EmbeddingsService {
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10); this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
} }
// REPLACE the existing initialize method with this:
async initialize(): Promise<void> { async initialize(): Promise<void> {
// If initialization is already in progress, wait for it
if (this.initializationPromise) {
return this.initializationPromise;
}
// If already initialized, return immediately
if (this.isInitialized) {
return Promise.resolve();
}
// Start initialization and store the promise
this.initializationPromise = this.performInitialization();
return this.initializationPromise;
}
// ADD THIS NEW METHOD:
private async performInitialization(): Promise<void> {
if (!this.enabled) { if (!this.enabled) {
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization'); console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
return; return;
@ -74,9 +93,29 @@ class EmbeddingsService {
} catch (error) { } catch (error) {
console.error('[EMBEDDINGS] Failed to initialize:', error); console.error('[EMBEDDINGS] Failed to initialize:', error);
this.isInitialized = false; this.isInitialized = false;
throw error;
} finally {
this.initializationPromise = null;
} }
} }
async waitForInitialization(): Promise<void> {
if (!this.enabled) {
return Promise.resolve();
}
if (this.isInitialized) {
return Promise.resolve();
}
if (this.initializationPromise) {
await this.initializationPromise;
return;
}
return this.initialize();
}
private hashData(data: any): string { private hashData(data: any): string {
return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32); return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
} }
@ -127,7 +166,6 @@ class EmbeddingsService {
'Content-Type': 'application/json' 'Content-Type': 'application/json'
}; };
// API key is optional for Ollama but required for Mistral/OpenAI
if (apiKey) { if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`; headers['Authorization'] = `Bearer ${apiKey}`;
} }
@ -148,12 +186,10 @@ class EmbeddingsService {
const data = await response.json(); const data = await response.json();
// Detect Ollama format
if (Array.isArray(data.embeddings)) { if (Array.isArray(data.embeddings)) {
return data.embeddings; return data.embeddings;
} }
// Detect OpenAI/Mistral format
if (Array.isArray(data.data)) { if (Array.isArray(data.data)) {
return data.data.map((item: any) => item.embedding); return data.data.map((item: any) => item.embedding);
} }
@ -170,7 +206,6 @@ class EmbeddingsService {
const contents = allItems.map(item => this.createContentString(item)); const contents = allItems.map(item => this.createContentString(item));
this.embeddings = []; this.embeddings = [];
// Process in batches to respect rate limits
for (let i = 0; i < contents.length; i += this.batchSize) { for (let i = 0; i < contents.length; i += this.batchSize) {
const batch = contents.slice(i, i + this.batchSize); const batch = contents.slice(i, i + this.batchSize);
const batchItems = allItems.slice(i, i + this.batchSize); const batchItems = allItems.slice(i, i + this.batchSize);
@ -198,7 +233,6 @@ class EmbeddingsService {
}); });
}); });
// Rate limiting delay between batches
if (i + this.batchSize < contents.length) { if (i + this.batchSize < contents.length) {
await new Promise(resolve => setTimeout(resolve, this.batchDelay)); await new Promise(resolve => setTimeout(resolve, this.batchDelay));
} }
@ -213,7 +247,6 @@ class EmbeddingsService {
} }
public async embedText(text: string): Promise<number[]> { public async embedText(text: string): Promise<number[]> {
// Reuse the private batch helper to avoid auth duplication
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]); const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
return embedding; return embedding;
} }
@ -239,25 +272,21 @@ class EmbeddingsService {
} }
try { try {
// Generate embedding for query
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]); const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
const queryEmbedding = queryEmbeddings[0]; const queryEmbedding = queryEmbeddings[0];
console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`); console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
// Calculate similarities - properly typed
const similarities: SimilarityResult[] = this.embeddings.map(item => ({ const similarities: SimilarityResult[] = this.embeddings.map(item => ({
...item, ...item,
similarity: this.cosineSimilarity(queryEmbedding, item.embedding) similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
})); }));
// Filter by threshold and sort by similarity (descending - highest first)
const results = similarities const results = similarities
.filter(item => item.similarity >= threshold) .filter(item => item.similarity >= threshold)
.sort((a, b) => b.similarity - a.similarity) // CRITICAL: Ensure descending order .sort((a, b) => b.similarity - a.similarity)
.slice(0, maxResults); .slice(0, maxResults);
// ENHANCED: Verify ordering is correct
const orderingValid = results.every((item, index) => { const orderingValid = results.every((item, index) => {
if (index === 0) return true; if (index === 0) return true;
return item.similarity <= results[index - 1].similarity; return item.similarity <= results[index - 1].similarity;
@ -270,15 +299,13 @@ class EmbeddingsService {
}); });
} }
// ENHANCED: Log top results for debugging
console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`); console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
if (results.length > 0) { if (results.length > 0) {
console.log('[EMBEDDINGS] Top 5 similarity matches:'); console.log('[EMBEDDINGS] Top 10 similarity matches:');
results.slice(0, 5).forEach((item, idx) => { results.slice(0, 10).forEach((item, idx) => {
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`); console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
}); });
// Verify first result is indeed the highest
const topSimilarity = results[0].similarity; const topSimilarity = results[0].similarity;
const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity); const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
if (hasHigherSimilarity) { if (hasHigherSimilarity) {