progress
This commit is contained in:
		
							parent
							
								
									78779fc8da
								
							
						
					
					
						commit
						895c476476
					
				
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										117075
									
								
								data/embeddings.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117075
									
								
								data/embeddings.json
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										22
									
								
								src/pages/api/ai/embeddings.status.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								src/pages/api/ai/embeddings.status.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,22 @@
 | 
				
			|||||||
 | 
					// src/pages/api/ai/embeddings-status.ts
 | 
				
			||||||
 | 
					import type { APIRoute } from 'astro';
 | 
				
			||||||
 | 
					import { embeddingsService } from '../../../utils/embeddings.js';
 | 
				
			||||||
 | 
					import { apiResponse, apiServerError } from '../../../utils/api.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export const prerender = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export const GET: APIRoute = async () => {
 | 
				
			||||||
 | 
					  try {
 | 
				
			||||||
 | 
					    const stats = embeddingsService.getStats();
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return apiResponse.success({
 | 
				
			||||||
 | 
					      embeddings: stats,
 | 
				
			||||||
 | 
					      timestamp: new Date().toISOString(),
 | 
				
			||||||
 | 
					      status: stats.enabled && stats.initialized ? 'ready' : 
 | 
				
			||||||
 | 
					             stats.enabled && !stats.initialized ? 'initializing' : 'disabled'
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					  } catch (error) {
 | 
				
			||||||
 | 
					    console.error('Embeddings status error:', error);
 | 
				
			||||||
 | 
					    return apiServerError.internal('Failed to get embeddings status');
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
@ -14,7 +14,11 @@ function getEnv(key: string): string {
 | 
				
			|||||||
  return value;
 | 
					  return value;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const AI_MODEL = getEnv('AI_MODEL');
 | 
					// Use the analyzer AI for smart prompting (smaller, faster model)
 | 
				
			||||||
 | 
					const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
 | 
				
			||||||
 | 
					const AI_API_KEY = getEnv('AI_ANALYZER_API_KEY');
 | 
				
			||||||
 | 
					const AI_MODEL = getEnv('AI_ANALYZER_MODEL');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
					const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
				
			||||||
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
 | 
					const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
 | 
				
			||||||
const RATE_LIMIT_MAX = 5; // 5 enhancement requests per minute per user
 | 
					const RATE_LIMIT_MAX = 5; // 5 enhancement requests per minute per user
 | 
				
			||||||
@ -59,29 +63,38 @@ function cleanupExpiredRateLimits() {
 | 
				
			|||||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
					setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function createEnhancementPrompt(input: string): string {
 | 
					function createEnhancementPrompt(input: string): string {
 | 
				
			||||||
  return `
 | 
					  return `Du bist eine KI für digitale Forensik-Anfragen. Der Nutzer beschreibt ein forensisches Szenario oder Problem. Analysiere die Eingabe auf Vollständigkeit und Klarheit.
 | 
				
			||||||
Du bist eine KI für digitale Forensik. Der Nutzer beschreibt ein forensisches Szenario. Analysiere die Eingabe.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Wenn die Beschreibung unvollständig oder vage ist, stelle bis zu drei präzise Rückfragen im JSON-Array-Format, um wichtige Details zu klären (z. B. Vorfalltyp, System, Ziel, Datenquellen, Zeit, Beteiligte, rechtlicher Rahmen).
 | 
					ANALYSIERE DIESE KATEGORIEN:
 | 
				
			||||||
 | 
					1. **Vorfalltyp**: Was ist passiert? (Malware, Datendiebstahl, Compliance-Verstoß, etc.)
 | 
				
			||||||
 | 
					2. **Betroffene Systeme**: Welche Technologien/Plattformen? (Windows, Linux, Mobile, Cloud, etc.)
 | 
				
			||||||
 | 
					3. **Verfügbare Datenquellen**: Was kann untersucht werden? (Logs, Images, Memory Dumps, etc.)
 | 
				
			||||||
 | 
					4. **Untersuchungsziel**: Was soll erreicht werden? (IOCs finden, Timeline erstellen, etc.)
 | 
				
			||||||
 | 
					5. **Zeitrahmen & Dringlichkeit**: Wann ist etwas passiert? Wie dringend?
 | 
				
			||||||
 | 
					6. **Ressourcen & Constraints**: Budget, Skills, Tools, rechtliche Aspekte
 | 
				
			||||||
 | 
					7. **Beweisziele**: Dokumentation, Gerichtsverfahren, interne Aufklärung?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Wenn die Eingabe bereits klar, spezifisch und vollständig ist, gib stattdessen nur eine leere Liste [] zurück.
 | 
					WENN die Beschreibung vollständig und spezifisch ist: Gib eine leere Liste [] zurück.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Antwortformat strikt:
 | 
					WENN wichtige Details fehlen: Formuliere 2-3 präzise Fragen, die die kritischsten Lücken schließen. Fokussiere auf Details, die die Tool-/Methoden-Auswahl stark beeinflussen.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
\`\`\`json
 | 
					FRAGE-QUALITÄT:
 | 
				
			||||||
 | 
					- Spezifisch, nicht allgemein (❌ "Mehr Details?" ✅ "Welche Betriebssysteme sind betroffen?")
 | 
				
			||||||
 | 
					- Handlungsrelevant (❌ "Wann passierte das?" ✅ "Haben Sie Logs aus der Vorfallzeit verfügbar?")
 | 
				
			||||||
 | 
					- Priorisiert nach Wichtigkeit für die forensische Analyse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ANTWORTFORMAT (NUR JSON):
 | 
				
			||||||
[
 | 
					[
 | 
				
			||||||
  "Frage 1?",
 | 
					  "Spezifische Frage 1?",
 | 
				
			||||||
  "Frage 2?",
 | 
					  "Spezifische Frage 2?",
 | 
				
			||||||
  "Frage 3?"
 | 
					  "Spezifische Frage 3?"
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
\`\`\`
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Nutzer-Eingabe:
 | 
					NUTZER-EINGABE:
 | 
				
			||||||
${input}
 | 
					${input}
 | 
				
			||||||
  `.trim();
 | 
					  `.trim();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
export const POST: APIRoute = async ({ request }) => {
 | 
					export const POST: APIRoute = async ({ request }) => {
 | 
				
			||||||
  try {
 | 
					  try {
 | 
				
			||||||
    const authResult = await withAPIAuth(request, 'ai');
 | 
					    const authResult = await withAPIAuth(request, 'ai');
 | 
				
			||||||
@ -98,12 +111,12 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
    const body = await request.json();
 | 
					    const body = await request.json();
 | 
				
			||||||
    const { input } = body;
 | 
					    const { input } = body;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!input || typeof input !== 'string' || input.length < 20) {
 | 
					    if (!input || typeof input !== 'string' || input.length < 40) {
 | 
				
			||||||
      return apiError.badRequest('Input too short for enhancement');
 | 
					      return apiError.badRequest('Input too short for enhancement (minimum 40 characters)');
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const sanitizedInput = sanitizeInput(input);
 | 
					    const sanitizedInput = sanitizeInput(input);
 | 
				
			||||||
    if (sanitizedInput.length < 20) {
 | 
					    if (sanitizedInput.length < 40) {
 | 
				
			||||||
      return apiError.badRequest('Input too short after sanitization');
 | 
					      return apiError.badRequest('Input too short after sanitization');
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -111,11 +124,11 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
    const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
 | 
					    const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    const aiResponse = await enqueueApiCall(() =>
 | 
					    const aiResponse = await enqueueApiCall(() =>
 | 
				
			||||||
      fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
 | 
					      fetch(`${AI_ENDPOINT}/v1/chat/completions`, {
 | 
				
			||||||
        method: 'POST',
 | 
					        method: 'POST',
 | 
				
			||||||
        headers: {
 | 
					        headers: {
 | 
				
			||||||
          'Content-Type': 'application/json',
 | 
					          'Content-Type': 'application/json',
 | 
				
			||||||
          'Authorization': `Bearer ${process.env.AI_API_KEY}`
 | 
					          'Authorization': `Bearer ${AI_API_KEY}`
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        body: JSON.stringify({
 | 
					        body: JSON.stringify({
 | 
				
			||||||
          model: AI_MODEL,
 | 
					          model: AI_MODEL,
 | 
				
			||||||
@ -125,7 +138,7 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
              content: systemPrompt
 | 
					              content: systemPrompt
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
          ],
 | 
					          ],
 | 
				
			||||||
          max_tokens: 200,
 | 
					          max_tokens: 300,
 | 
				
			||||||
          temperature: 0.7
 | 
					          temperature: 0.7
 | 
				
			||||||
        })
 | 
					        })
 | 
				
			||||||
      }), taskId);
 | 
					      }), taskId);
 | 
				
			||||||
@ -144,28 +157,32 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    let questions;
 | 
					    let questions;
 | 
				
			||||||
    try {
 | 
					    try {
 | 
				
			||||||
    const cleanedContent = aiContent
 | 
					      const cleanedContent = aiContent
 | 
				
			||||||
        .replace(/^```json\s*/i, '')
 | 
					        .replace(/^```json\s*/i, '')
 | 
				
			||||||
        .replace(/\s*```\s*$/, '')
 | 
					        .replace(/\s*```\s*$/, '')
 | 
				
			||||||
        .trim();
 | 
					        .trim();
 | 
				
			||||||
    questions = JSON.parse(cleanedContent);
 | 
					      questions = JSON.parse(cleanedContent);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      if (!Array.isArray(questions) || questions.length === 0) {
 | 
					      if (!Array.isArray(questions)) {
 | 
				
			||||||
        throw new Error('Invalid questions format');
 | 
					        throw new Error('Response is not an array');
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      // Validate and clean questions
 | 
					      // Enhanced validation and cleaning
 | 
				
			||||||
      questions = questions
 | 
					      questions = questions
 | 
				
			||||||
        .filter(q => typeof q === 'string' && q.length > 5 && q.length < 120)
 | 
					        .filter(q => typeof q === 'string' && q.length > 10 && q.length < 150) // More reasonable length limits
 | 
				
			||||||
        .slice(0, 3);
 | 
					        .filter(q => q.includes('?')) // Must be a question
 | 
				
			||||||
 | 
					        .map(q => q.trim())
 | 
				
			||||||
 | 
					        .slice(0, 3); // Max 3 questions
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
 | 
					      // If no valid questions, return empty array (means input is complete)
 | 
				
			||||||
      if (questions.length === 0) {
 | 
					      if (questions.length === 0) {
 | 
				
			||||||
        throw new Error('No valid questions found');
 | 
					        questions = [];
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    } catch (error) {
 | 
					    } catch (error) {
 | 
				
			||||||
      console.error('Failed to parse enhancement response:', aiContent);
 | 
					      console.error('Failed to parse enhancement response:', aiContent);
 | 
				
			||||||
      return apiServerError.unavailable('Invalid enhancement response format');
 | 
					      // If parsing fails, assume input is complete enough
 | 
				
			||||||
 | 
					      questions = [];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    console.log(`[AI Enhancement] User: ${userId}, Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
 | 
					    console.log(`[AI Enhancement] User: ${userId}, Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
 | 
				
			||||||
@ -173,7 +190,8 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
    return new Response(JSON.stringify({
 | 
					    return new Response(JSON.stringify({
 | 
				
			||||||
      success: true,
 | 
					      success: true,
 | 
				
			||||||
      questions,
 | 
					      questions,
 | 
				
			||||||
      taskId
 | 
					      taskId,
 | 
				
			||||||
 | 
					      inputComplete: questions.length === 0 // Flag to indicate if input seems complete
 | 
				
			||||||
    }), {
 | 
					    }), {
 | 
				
			||||||
      status: 200,
 | 
					      status: 200,
 | 
				
			||||||
      headers: { 'Content-Type': 'application/json' }
 | 
					      headers: { 'Content-Type': 'application/json' }
 | 
				
			||||||
 | 
				
			|||||||
@ -1,21 +1,12 @@
 | 
				
			|||||||
// src/pages/api/ai/query.ts
 | 
					// src/pages/api/ai/query.ts
 | 
				
			||||||
import type { APIRoute } from 'astro';
 | 
					import type { APIRoute } from 'astro';
 | 
				
			||||||
import { withAPIAuth } from '../../../utils/auth.js';
 | 
					import { withAPIAuth } from '../../../utils/auth.js';
 | 
				
			||||||
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
 | 
					 | 
				
			||||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
 | 
					import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
 | 
				
			||||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';   
 | 
					import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';   
 | 
				
			||||||
 | 
					import { aiPipeline } from '../../../utils/aiPipeline.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export const prerender = false;
 | 
					export const prerender = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function getEnv(key: string): string {
 | 
					 | 
				
			||||||
  const value = process.env[key];
 | 
					 | 
				
			||||||
  if (!value) {
 | 
					 | 
				
			||||||
    throw new Error(`Missing environment variable: ${key}`);
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  return value;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
const AI_MODEL = getEnv('AI_MODEL');
 | 
					 | 
				
			||||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
					const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
				
			||||||
const RATE_LIMIT_WINDOW = 60 * 1000; 
 | 
					const RATE_LIMIT_WINDOW = 60 * 1000; 
 | 
				
			||||||
const RATE_LIMIT_MAX = 10; 
 | 
					const RATE_LIMIT_MAX = 10; 
 | 
				
			||||||
@ -33,13 +24,6 @@ function sanitizeInput(input: string): string {
 | 
				
			|||||||
  return sanitized;
 | 
					  return sanitized;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function stripMarkdownJson(content: string): string {
 | 
					 | 
				
			||||||
  return content
 | 
					 | 
				
			||||||
    .replace(/^```json\s*/i, '')
 | 
					 | 
				
			||||||
    .replace(/\s*```\s*$/, '')
 | 
					 | 
				
			||||||
    .trim();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
function checkRateLimit(userId: string): boolean {
 | 
					function checkRateLimit(userId: string): boolean {
 | 
				
			||||||
  const now = Date.now();
 | 
					  const now = Date.now();
 | 
				
			||||||
  const userLimit = rateLimitStore.get(userId);
 | 
					  const userLimit = rateLimitStore.get(userId);
 | 
				
			||||||
@ -68,209 +52,6 @@ function cleanupExpiredRateLimits() {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
					setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
async function loadToolsDatabase() {
 | 
					 | 
				
			||||||
  try {
 | 
					 | 
				
			||||||
    return await getCompressedToolsDataForAI();
 | 
					 | 
				
			||||||
  } catch (error) {
 | 
					 | 
				
			||||||
    console.error('Failed to load tools database:', error);
 | 
					 | 
				
			||||||
    throw new Error('Database unavailable');
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
function createWorkflowSystemPrompt(toolsData: any): string {
 | 
					 | 
				
			||||||
  const toolsList = toolsData.tools.map((tool: any) => ({
 | 
					 | 
				
			||||||
    name: tool.name,
 | 
					 | 
				
			||||||
    description: tool.description,
 | 
					 | 
				
			||||||
    domains: tool.domains,
 | 
					 | 
				
			||||||
    phases: tool.phases,
 | 
					 | 
				
			||||||
    domainAgnostic: tool['domain-agnostic-software'],
 | 
					 | 
				
			||||||
    platforms: tool.platforms,
 | 
					 | 
				
			||||||
    skillLevel: tool.skillLevel,
 | 
					 | 
				
			||||||
    license: tool.license,
 | 
					 | 
				
			||||||
    tags: tool.tags,
 | 
					 | 
				
			||||||
    related_concepts: tool.related_concepts || []
 | 
					 | 
				
			||||||
  }));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
					 | 
				
			||||||
    name: concept.name,
 | 
					 | 
				
			||||||
    description: concept.description,
 | 
					 | 
				
			||||||
    domains: concept.domains,
 | 
					 | 
				
			||||||
    phases: concept.phases,
 | 
					 | 
				
			||||||
    skillLevel: concept.skillLevel,
 | 
					 | 
				
			||||||
    tags: concept.tags
 | 
					 | 
				
			||||||
  }));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const regularPhases = toolsData.phases || [];
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  const domainAgnosticSoftware = toolsData['domain-agnostic-software'] || [];
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  const allPhaseItems = [
 | 
					 | 
				
			||||||
    ...regularPhases,
 | 
					 | 
				
			||||||
    ...domainAgnosticSoftware
 | 
					 | 
				
			||||||
  ];
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  const phasesDescription = allPhaseItems.map((phase: any) => 
 | 
					 | 
				
			||||||
    `- ${phase.id}: ${phase.name}`
 | 
					 | 
				
			||||||
  ).join('\n');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const domainsDescription = toolsData.domains.map((domain: any) => 
 | 
					 | 
				
			||||||
    `- ${domain.id}: ${domain.name}`
 | 
					 | 
				
			||||||
  ).join('\n');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const phaseDescriptions = regularPhases.map((phase: any) => 
 | 
					 | 
				
			||||||
    `- ${phase.name}: ${phase.description || 'Tools/Methods for this phase'}`
 | 
					 | 
				
			||||||
  ).join('\n');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const domainAgnosticDescriptions = domainAgnosticSoftware.map((section: any) => 
 | 
					 | 
				
			||||||
    `- ${section.name}: ${section.description || 'Cross-cutting software and platforms'}`
 | 
					 | 
				
			||||||
  ).join('\n');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const validPhases = [
 | 
					 | 
				
			||||||
    ...regularPhases.map((p: any) => p.id),
 | 
					 | 
				
			||||||
    ...domainAgnosticSoftware.map((s: any) => s.id)
 | 
					 | 
				
			||||||
  ].join('|');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der Ermittlern bei der Auswahl von Software und Methoden hilft.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERFÜGBARE TOOLS/METHODEN:
 | 
					 | 
				
			||||||
${JSON.stringify(toolsList, null, 2)}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
 | 
					 | 
				
			||||||
${JSON.stringify(conceptsList, null, 2)}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
UNTERSUCHUNGSPHASEN (NIST Framework):
 | 
					 | 
				
			||||||
${phasesDescription}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
FORENSISCHE DOMÄNEN:
 | 
					 | 
				
			||||||
${domainsDescription}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
WICHTIGE REGELN:
 | 
					 | 
				
			||||||
1. Pro Phase 2-3 Tools/Methoden empfehlen (immer mindestens 2 wenn verfügbar)
 | 
					 | 
				
			||||||
2. Tools/Methoden können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool/Methode für jede Phase zu empfehlen, selbst wenn die Priorität "low" ist.
 | 
					 | 
				
			||||||
3. Für Reporting-Phase: Visualisierungs- und Dokumentationssoftware einschließen
 | 
					 | 
				
			||||||
4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug.
 | 
					 | 
				
			||||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
					 | 
				
			||||||
6. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
 | 
					 | 
				
			||||||
7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an, wenn proprietäre Software besser geeignet ist.
 | 
					 | 
				
			||||||
8. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
					 | 
				
			||||||
9. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ENHANCED CONTEXTUAL ANALYSIS:
 | 
					 | 
				
			||||||
10. Analysiere das Szenario detailliert und identifiziere Schlüsselelemente, Bedrohungen und forensische Herausforderungen
 | 
					 | 
				
			||||||
11. Entwickle einen strategischen Untersuchungsansatz basierend auf dem spezifischen Szenario
 | 
					 | 
				
			||||||
12. Identifiziere zeitkritische oder besonders wichtige Faktoren für diesen Fall
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SOFTWARE/METHODEN-AUSWAHL NACH PHASE:
 | 
					 | 
				
			||||||
${phaseDescriptions}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
DOMÄNENAGNOSTISCHE SOFTWARE/METHODEN:
 | 
					 | 
				
			||||||
${domainAgnosticDescriptions}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ANTWORT-FORMAT (strict JSON):
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  "scenario_analysis": "Detaillierte Analyse des Szenarios: Erkannte Schlüsselelemente, Art des Vorfalls, betroffene Systeme, potentielle Bedrohungen und forensische Herausforderungen",
 | 
					 | 
				
			||||||
  "investigation_approach": "Strategischer Untersuchungsansatz für dieses spezifische Szenario: Prioritäten, Reihenfolge der Phasen, besondere Überlegungen",
 | 
					 | 
				
			||||||
  "critical_considerations": "Zeitkritische Faktoren, wichtige Sicherheitsaspekte oder besondere Vorsichtsmaßnahmen für diesen Fall",
 | 
					 | 
				
			||||||
  "recommended_tools": [
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
      "name": "EXAKTER Name aus der Tools-Database",
 | 
					 | 
				
			||||||
      "priority": "high|medium|low", 
 | 
					 | 
				
			||||||
      "phase": "${validPhases}",
 | 
					 | 
				
			||||||
      "justification": "Warum diese Methode für diese Phase und dieses spezifische Szenario geeignet ist - mit Bezug zu den erkannten Schlüsselelementen"
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  ],
 | 
					 | 
				
			||||||
  "workflow_suggestion": "Vorgeschlagener Untersuchungsablauf mit konkreten Schritten für dieses Szenario",
 | 
					 | 
				
			||||||
  "background_knowledge": [
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
					 | 
				
			||||||
      "relevance": "Warum dieses Konzept für das Szenario relevant ist, und bei welchen der empfohlenen Methoden/Tools."
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  ],
 | 
					 | 
				
			||||||
  "additional_notes": "Wichtige Überlegungen und Hinweise"
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
function createToolSystemPrompt(toolsData: any): string {
 | 
					 | 
				
			||||||
  const toolsList = toolsData.tools.map((tool: any) => ({
 | 
					 | 
				
			||||||
    name: tool.name,
 | 
					 | 
				
			||||||
    description: tool.description,
 | 
					 | 
				
			||||||
    domains: tool.domains,
 | 
					 | 
				
			||||||
    phases: tool.phases,
 | 
					 | 
				
			||||||
    platforms: tool.platforms,
 | 
					 | 
				
			||||||
    skillLevel: tool.skillLevel,
 | 
					 | 
				
			||||||
    license: tool.license,
 | 
					 | 
				
			||||||
    tags: tool.tags,
 | 
					 | 
				
			||||||
    url: tool.url,
 | 
					 | 
				
			||||||
    projectUrl: tool.projectUrl,
 | 
					 | 
				
			||||||
    related_concepts: tool.related_concepts || []
 | 
					 | 
				
			||||||
  }));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
					 | 
				
			||||||
    name: concept.name,
 | 
					 | 
				
			||||||
    description: concept.description,
 | 
					 | 
				
			||||||
    domains: concept.domains,
 | 
					 | 
				
			||||||
    phases: concept.phases,
 | 
					 | 
				
			||||||
    skillLevel: concept.skillLevel,
 | 
					 | 
				
			||||||
    tags: concept.tags
 | 
					 | 
				
			||||||
  }));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der bei der Auswahl spezifischer Software/Methoden für konkrete Probleme hilft.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERFÜGBARE TOOLS/METHODEN:
 | 
					 | 
				
			||||||
${JSON.stringify(toolsList, null, 2)}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
 | 
					 | 
				
			||||||
${JSON.stringify(conceptsList, null, 2)}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
WICHTIGE REGELN:
 | 
					 | 
				
			||||||
1. Analysiere das spezifische Problem/die Anforderung sorgfältig
 | 
					 | 
				
			||||||
2. Empfehle 1-3 Methoden/Tools, sortiert nach Eignung (beste Empfehlung zuerst)
 | 
					 | 
				
			||||||
3. Gib detaillierte Erklärungen, WARUM und WIE jede Methode/Tool das Problem löst
 | 
					 | 
				
			||||||
4. Berücksichtige praktische Aspekte: Skill Level, Plattformen, Verfügbarkeit
 | 
					 | 
				
			||||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
					 | 
				
			||||||
6. Gib konkrete Anwendungshinweise, nicht nur allgemeine Beschreibungen - Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
 | 
					 | 
				
			||||||
7. Erwähne sowohl Stärken als auch Schwächen/Limitationen
 | 
					 | 
				
			||||||
8. Schlage alternative Ansätze vor, wenn sinnvoll
 | 
					 | 
				
			||||||
9. Gib grundsätzliche Hinweise, WIE die Methode/Tool konkret eingesetzt wird
 | 
					 | 
				
			||||||
10. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
					 | 
				
			||||||
11. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ENHANCED CONTEXTUAL ANALYSIS:
 | 
					 | 
				
			||||||
12. Analysiere das Problem detailliert und identifiziere technische Anforderungen, Herausforderungen und Erfolgsfaktoren
 | 
					 | 
				
			||||||
13. Entwickle einen strategischen Lösungsansatz basierend auf dem spezifischen Problem
 | 
					 | 
				
			||||||
14. Identifiziere wichtige Voraussetzungen oder Warnungen für die Anwendung
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ANTWORT-FORMAT (strict JSON):
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  "problem_analysis": "Detaillierte Analyse des Problems: Erkannte technische Anforderungen, Herausforderungen, benötigte Fähigkeiten und Erfolgsfaktoren",
 | 
					 | 
				
			||||||
  "investigation_approach": "Strategischer Lösungsansatz für dieses spezifische Problem: Herangehensweise, Prioritäten, optimale Anwendungsreihenfolge",
 | 
					 | 
				
			||||||
  "critical_considerations": "Wichtige Voraussetzungen, potentielle Fallstricke oder Warnungen für die Anwendung der empfohlenen Lösungen",
 | 
					 | 
				
			||||||
  "recommended_tools": [
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
      "name": "EXAKTER Name aus der Tools-Database",
 | 
					 | 
				
			||||||
      "rank": 1,
 | 
					 | 
				
			||||||
      "suitability_score": "high|medium|low",
 | 
					 | 
				
			||||||
      "detailed_explanation": "Detaillierte Erklärung, warum dieses Tool/diese Methode das spezifische Problem löst - mit Bezug zu den erkannten Anforderungen",
 | 
					 | 
				
			||||||
      "implementation_approach": "Konkrete Schritte/Ansatz zur Anwendung für dieses spezifische Problem",
 | 
					 | 
				
			||||||
      "pros": ["Spezifische Vorteile für diesen Anwendungsfall", "Weitere Vorteile"],
 | 
					 | 
				
			||||||
      "cons": ["Potentielle Nachteile oder Limitationen", "Weitere Einschränkungen"],
 | 
					 | 
				
			||||||
      "alternatives": "Alternative Ansätze oder ergänzende Tools/Methoden, falls relevant"
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  ],
 | 
					 | 
				
			||||||
  "background_knowledge": [
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
					 | 
				
			||||||
      "relevance": "Warum dieses Konzept für die empfohlenen Tools/das Problem relevant ist, und für welche der empfohlenen Methoden/Tools."
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  ],
 | 
					 | 
				
			||||||
  "additional_considerations": "Wichtige Überlegungen, Voraussetzungen oder Warnungen"
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
export const POST: APIRoute = async ({ request }) => {
 | 
					export const POST: APIRoute = async ({ request }) => {
 | 
				
			||||||
  try {
 | 
					  try {
 | 
				
			||||||
    const authResult = await withAPIAuth(request, 'ai');
 | 
					    const authResult = await withAPIAuth(request, 'ai');
 | 
				
			||||||
@ -287,7 +68,6 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
    const body = await request.json();
 | 
					    const body = await request.json();
 | 
				
			||||||
    const { query, mode = 'workflow', taskId: clientTaskId } = body;
 | 
					    const { query, mode = 'workflow', taskId: clientTaskId } = body;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // ADD THIS DEBUG LOGGING
 | 
					 | 
				
			||||||
    console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
 | 
					    console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!query || typeof query !== 'string') {
 | 
					    if (!query || typeof query !== 'string') {
 | 
				
			||||||
@ -306,128 +86,31 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
      return apiError.badRequest('Invalid input detected');
 | 
					      return apiError.badRequest('Invalid input detected');
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const toolsData = await loadToolsDatabase();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    const systemPrompt = mode === 'workflow' 
 | 
					 | 
				
			||||||
      ? createWorkflowSystemPrompt(toolsData)
 | 
					 | 
				
			||||||
      : createToolSystemPrompt(toolsData);
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
 | 
					    const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    console.log(`[AI API] About to enqueue task ${taskId}`);
 | 
					    console.log(`[AI API] About to enqueue task ${taskId}`);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    
 | 
					    // Use the new AI pipeline instead of direct API calls
 | 
				
			||||||
    const aiResponse = await enqueueApiCall(() =>
 | 
					    const result = await enqueueApiCall(() => 
 | 
				
			||||||
      fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
 | 
					      aiPipeline.processQuery(sanitizedQuery, mode)
 | 
				
			||||||
        method: 'POST',
 | 
					 | 
				
			||||||
        headers: {
 | 
					 | 
				
			||||||
          'Content-Type': 'application/json',
 | 
					 | 
				
			||||||
          'Authorization': `Bearer ${process.env.AI_API_KEY}`
 | 
					 | 
				
			||||||
        },
 | 
					 | 
				
			||||||
        body: JSON.stringify({
 | 
					 | 
				
			||||||
          model: AI_MODEL,
 | 
					 | 
				
			||||||
          messages: [
 | 
					 | 
				
			||||||
            {
 | 
					 | 
				
			||||||
              role: 'system',
 | 
					 | 
				
			||||||
              content: systemPrompt
 | 
					 | 
				
			||||||
            },
 | 
					 | 
				
			||||||
            {
 | 
					 | 
				
			||||||
              role: 'user',
 | 
					 | 
				
			||||||
              content: sanitizedQuery
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
          ],
 | 
					 | 
				
			||||||
          max_tokens: 3500,
 | 
					 | 
				
			||||||
          temperature: 0.3
 | 
					 | 
				
			||||||
        })
 | 
					 | 
				
			||||||
      })
 | 
					 | 
				
			||||||
    , taskId);
 | 
					    , taskId);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!aiResponse.ok) {
 | 
					    if (!result || !result.recommendation) {
 | 
				
			||||||
      console.error('AI API error:', await aiResponse.text());
 | 
					      return apiServerError.unavailable('No response from AI pipeline');
 | 
				
			||||||
      return apiServerError.unavailable('AI service unavailable');
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const aiData = await aiResponse.json();
 | 
					    // Add processing statistics to the response for debugging/monitoring
 | 
				
			||||||
    const aiContent = aiData.choices?.[0]?.message?.content;
 | 
					    console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}`);
 | 
				
			||||||
 | 
					    console.log(`[AI Query] Processing stats:`, result.processingStats);
 | 
				
			||||||
    if (!aiContent) {
 | 
					    console.log(`[AI Query] Tools: ${result.recommendation.recommended_tools?.length || 0}, Concepts: ${result.recommendation.background_knowledge?.length || 0}`);
 | 
				
			||||||
      return apiServerError.unavailable('No response from AI');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    let recommendation;
 | 
					 | 
				
			||||||
    try {
 | 
					 | 
				
			||||||
      const cleanedContent = stripMarkdownJson(aiContent);
 | 
					 | 
				
			||||||
      recommendation = JSON.parse(cleanedContent);
 | 
					 | 
				
			||||||
    } catch (error) {
 | 
					 | 
				
			||||||
      console.error('Failed to parse AI response:', aiContent);
 | 
					 | 
				
			||||||
      return apiServerError.unavailable('Invalid AI response format');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    const validToolNames = new Set(toolsData.tools.map((t: any) => t.name));
 | 
					 | 
				
			||||||
    const validConceptNames = new Set(toolsData.concepts.map((c: any) => c.name));
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    let validatedRecommendation;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (mode === 'workflow') {
 | 
					 | 
				
			||||||
      validatedRecommendation = {
 | 
					 | 
				
			||||||
        ...recommendation,
 | 
					 | 
				
			||||||
        // Ensure all new fields are included with fallbacks
 | 
					 | 
				
			||||||
        scenario_analysis: recommendation.scenario_analysis || recommendation.problem_analysis || '',
 | 
					 | 
				
			||||||
        investigation_approach: recommendation.investigation_approach || '',
 | 
					 | 
				
			||||||
        critical_considerations: recommendation.critical_considerations || '',
 | 
					 | 
				
			||||||
        recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
 | 
					 | 
				
			||||||
          if (!validToolNames.has(tool.name)) {
 | 
					 | 
				
			||||||
            console.warn(`AI recommended unknown tool: ${tool.name}`);
 | 
					 | 
				
			||||||
            return false;
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
          return true;
 | 
					 | 
				
			||||||
        }) || [],
 | 
					 | 
				
			||||||
        background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
 | 
					 | 
				
			||||||
          if (!validConceptNames.has(concept.concept_name)) {
 | 
					 | 
				
			||||||
            console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
 | 
					 | 
				
			||||||
            return false;
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
          return true;
 | 
					 | 
				
			||||||
        }) || []
 | 
					 | 
				
			||||||
      };
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
      validatedRecommendation = {
 | 
					 | 
				
			||||||
        ...recommendation,
 | 
					 | 
				
			||||||
        // Ensure all new fields are included with fallbacks
 | 
					 | 
				
			||||||
        problem_analysis: recommendation.problem_analysis || recommendation.scenario_analysis || '',
 | 
					 | 
				
			||||||
        investigation_approach: recommendation.investigation_approach || '',
 | 
					 | 
				
			||||||
        critical_considerations: recommendation.critical_considerations || '',
 | 
					 | 
				
			||||||
        recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
 | 
					 | 
				
			||||||
          if (!validToolNames.has(tool.name)) {
 | 
					 | 
				
			||||||
            console.warn(`AI recommended unknown tool: ${tool.name}`);
 | 
					 | 
				
			||||||
            return false;
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
          return true;
 | 
					 | 
				
			||||||
        }).map((tool: any, index: number) => ({
 | 
					 | 
				
			||||||
          ...tool,
 | 
					 | 
				
			||||||
          rank: tool.rank || (index + 1),
 | 
					 | 
				
			||||||
          suitability_score: tool.suitability_score || 'medium',
 | 
					 | 
				
			||||||
          pros: Array.isArray(tool.pros) ? tool.pros : [],
 | 
					 | 
				
			||||||
          cons: Array.isArray(tool.cons) ? tool.cons : []
 | 
					 | 
				
			||||||
        })) || [],
 | 
					 | 
				
			||||||
        background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
 | 
					 | 
				
			||||||
          if (!validConceptNames.has(concept.concept_name)) {
 | 
					 | 
				
			||||||
            console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
 | 
					 | 
				
			||||||
            return false;
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
          return true;
 | 
					 | 
				
			||||||
        }) || []
 | 
					 | 
				
			||||||
      };
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}, Tools: ${validatedRecommendation.recommended_tools.length}, Concepts: ${validatedRecommendation.background_knowledge?.length || 0}`);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return new Response(JSON.stringify({
 | 
					    return new Response(JSON.stringify({
 | 
				
			||||||
      success: true,
 | 
					      success: true,
 | 
				
			||||||
      mode,
 | 
					      mode,
 | 
				
			||||||
      taskId,
 | 
					      taskId,
 | 
				
			||||||
      recommendation: validatedRecommendation,
 | 
					      recommendation: result.recommendation,
 | 
				
			||||||
      query: sanitizedQuery
 | 
					      query: sanitizedQuery,
 | 
				
			||||||
 | 
					      processingStats: result.processingStats // Include stats for monitoring
 | 
				
			||||||
    }), {
 | 
					    }), {
 | 
				
			||||||
      status: 200,
 | 
					      status: 200,
 | 
				
			||||||
      headers: { 'Content-Type': 'application/json' }
 | 
					      headers: { 'Content-Type': 'application/json' }
 | 
				
			||||||
@ -435,6 +118,16 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
  } catch (error) {
 | 
					  } catch (error) {
 | 
				
			||||||
    console.error('AI query error:', error);
 | 
					    console.error('AI query error:', error);
 | 
				
			||||||
    return apiServerError.internal('Internal server error');
 | 
					    
 | 
				
			||||||
 | 
					    // Provide more specific error messages based on error type
 | 
				
			||||||
 | 
					    if (error.message.includes('embeddings')) {
 | 
				
			||||||
 | 
					      return apiServerError.unavailable('Embeddings service error - falling back to basic processing');
 | 
				
			||||||
 | 
					    } else if (error.message.includes('selector')) {
 | 
				
			||||||
 | 
					      return apiServerError.unavailable('AI selector service error');
 | 
				
			||||||
 | 
					    } else if (error.message.includes('analyzer')) {
 | 
				
			||||||
 | 
					      return apiServerError.unavailable('AI analyzer service error');
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      return apiServerError.internal('Internal server error');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
							
								
								
									
										521
									
								
								src/utils/aiPipeline.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										521
									
								
								src/utils/aiPipeline.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,521 @@
 | 
				
			|||||||
 | 
					// src/utils/aiPipeline.ts
 | 
				
			||||||
 | 
					import { getCompressedToolsDataForAI } from './dataService.js';
 | 
				
			||||||
 | 
					import { embeddingsService, type EmbeddingData } from './embeddings.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface AIConfig {
 | 
				
			||||||
 | 
					  endpoint: string;
 | 
				
			||||||
 | 
					  apiKey: string;
 | 
				
			||||||
 | 
					  model: string;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface SelectionResult {
 | 
				
			||||||
 | 
					  selectedTools: string[];
 | 
				
			||||||
 | 
					  selectedConcepts: string[];
 | 
				
			||||||
 | 
					  reasoning: string;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface AnalysisResult {
 | 
				
			||||||
 | 
					  recommendation: any;
 | 
				
			||||||
 | 
					  processingStats: {
 | 
				
			||||||
 | 
					    embeddingsUsed: boolean;
 | 
				
			||||||
 | 
					    candidatesFromEmbeddings: number;
 | 
				
			||||||
 | 
					    finalSelectedItems: number;
 | 
				
			||||||
 | 
					    processingTimeMs: number;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class AIProcessingPipeline {
 | 
				
			||||||
 | 
					  private selectorConfig: AIConfig;
 | 
				
			||||||
 | 
					  private analyzerConfig: AIConfig;
 | 
				
			||||||
 | 
					  private maxSelectedItems: number;
 | 
				
			||||||
 | 
					  private embeddingCandidates: number;
 | 
				
			||||||
 | 
					  private similarityThreshold: number;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  constructor() {
 | 
				
			||||||
 | 
					    this.selectorConfig = {
 | 
				
			||||||
 | 
					      endpoint: this.getEnv('AI_SELECTOR_ENDPOINT'),
 | 
				
			||||||
 | 
					      apiKey: this.getEnv('AI_SELECTOR_API_KEY'),
 | 
				
			||||||
 | 
					      model: this.getEnv('AI_SELECTOR_MODEL')
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    this.analyzerConfig = {
 | 
				
			||||||
 | 
					      endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
 | 
				
			||||||
 | 
					      apiKey: this.getEnv('AI_ANALYZER_API_KEY'),
 | 
				
			||||||
 | 
					      model: this.getEnv('AI_ANALYZER_MODEL')
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '15', 10);
 | 
				
			||||||
 | 
					    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '30', 10);
 | 
				
			||||||
 | 
					    this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private getEnv(key: string): string {
 | 
				
			||||||
 | 
					    const value = process.env[key];
 | 
				
			||||||
 | 
					    if (!value) {
 | 
				
			||||||
 | 
					      throw new Error(`Missing environment variable: ${key}`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return value;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async callAI(config: AIConfig, messages: any[], maxTokens: number = 1000): Promise<string> {
 | 
				
			||||||
 | 
					    const response = await fetch(`${config.endpoint}/v1/chat/completions`, {
 | 
				
			||||||
 | 
					      method: 'POST',
 | 
				
			||||||
 | 
					      headers: {
 | 
				
			||||||
 | 
					        'Content-Type': 'application/json',
 | 
				
			||||||
 | 
					        'Authorization': `Bearer ${config.apiKey}`
 | 
				
			||||||
 | 
					      },
 | 
				
			||||||
 | 
					      body: JSON.stringify({
 | 
				
			||||||
 | 
					        model: config.model,
 | 
				
			||||||
 | 
					        messages,
 | 
				
			||||||
 | 
					        max_tokens: maxTokens,
 | 
				
			||||||
 | 
					        temperature: 0.3
 | 
				
			||||||
 | 
					      })
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!response.ok) {
 | 
				
			||||||
 | 
					      const errorText = await response.text();
 | 
				
			||||||
 | 
					      throw new Error(`AI API error (${config.model}): ${response.status} - ${errorText}`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const data = await response.json();
 | 
				
			||||||
 | 
					    const content = data.choices?.[0]?.message?.content;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (!content) {
 | 
				
			||||||
 | 
					      throw new Error(`No response from AI model: ${config.model}`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return content;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private createSelectorPrompt(toolsData: any, userQuery: string, mode: string): string {
 | 
				
			||||||
 | 
					    const toolsList = toolsData.tools.map((tool: any) => ({
 | 
				
			||||||
 | 
					      name: tool.name,
 | 
				
			||||||
 | 
					      type: tool.type,
 | 
				
			||||||
 | 
					      description: tool.description.slice(0, 200) + '...',
 | 
				
			||||||
 | 
					      domains: tool.domains,
 | 
				
			||||||
 | 
					      phases: tool.phases,
 | 
				
			||||||
 | 
					      tags: tool.tags?.slice(0, 5) || [],
 | 
				
			||||||
 | 
					      skillLevel: tool.skillLevel
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
				
			||||||
 | 
					      name: concept.name,
 | 
				
			||||||
 | 
					      type: 'concept',
 | 
				
			||||||
 | 
					      description: concept.description.slice(0, 200) + '...',
 | 
				
			||||||
 | 
					      domains: concept.domains,
 | 
				
			||||||
 | 
					      phases: concept.phases,
 | 
				
			||||||
 | 
					      tags: concept.tags?.slice(0, 5) || []
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const modeInstruction = mode === 'workflow' 
 | 
				
			||||||
 | 
					      ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases.'
 | 
				
			||||||
 | 
					      : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem.';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return `You are a DFIR expert tasked with selecting the most relevant tools and concepts for a user query.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					${modeInstruction}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					AVAILABLE TOOLS:
 | 
				
			||||||
 | 
					${JSON.stringify(toolsList, null, 2)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					AVAILABLE CONCEPTS:
 | 
				
			||||||
 | 
					${JSON.stringify(conceptsList, null, 2)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					USER QUERY: "${userQuery}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Select the most relevant items (max ${this.maxSelectedItems} total). For workflow mode, prioritize breadth across phases. For tool mode, prioritize specificity and direct relevance.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Respond with ONLY this JSON format:
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
 | 
				
			||||||
 | 
					  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
 | 
				
			||||||
 | 
					  "reasoning": "Brief explanation of selection criteria and approach"
 | 
				
			||||||
 | 
					}`;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async selectRelevantItems(toolsData: any, userQuery: string, mode: string): Promise<SelectionResult> {
 | 
				
			||||||
 | 
					    const prompt = this.createSelectorPrompt(toolsData, userQuery, mode);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const messages = [
 | 
				
			||||||
 | 
					      { role: 'user', content: prompt }
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const response = await this.callAI(this.selectorConfig, messages, 1500);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      const cleaned = response.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
 | 
				
			||||||
 | 
					      const result = JSON.parse(cleaned);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Validate the structure
 | 
				
			||||||
 | 
					      if (!Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
 | 
				
			||||||
 | 
					        throw new Error('Invalid selection result structure');
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Limit selections
 | 
				
			||||||
 | 
					      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
 | 
				
			||||||
 | 
					      if (totalSelected > this.maxSelectedItems) {
 | 
				
			||||||
 | 
					        console.warn(`[AI PIPELINE] Selection exceeded limit (${totalSelected}), truncating`);
 | 
				
			||||||
 | 
					        result.selectedTools = result.selectedTools.slice(0, Math.floor(this.maxSelectedItems * 0.8));
 | 
				
			||||||
 | 
					        result.selectedConcepts = result.selectedConcepts.slice(0, Math.ceil(this.maxSelectedItems * 0.2));
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      return result;
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.error('[AI PIPELINE] Failed to parse selector response:', response);
 | 
				
			||||||
 | 
					      throw new Error('Invalid JSON response from selector AI');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private filterDataBySelection(toolsData: any, selection: SelectionResult): any {
 | 
				
			||||||
 | 
					    const selectedToolNames = new Set(selection.selectedTools);
 | 
				
			||||||
 | 
					    const selectedConceptNames = new Set(selection.selectedConcepts);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return {
 | 
				
			||||||
 | 
					      tools: toolsData.tools.filter((tool: any) => selectedToolNames.has(tool.name)),
 | 
				
			||||||
 | 
					      concepts: toolsData.concepts.filter((concept: any) => selectedConceptNames.has(concept.name)),
 | 
				
			||||||
 | 
					      domains: toolsData.domains,
 | 
				
			||||||
 | 
					      phases: toolsData.phases,
 | 
				
			||||||
 | 
					      'domain-agnostic-software': toolsData['domain-agnostic-software']
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async processWithEmbeddings(userQuery: string, toolsData: any, mode: string): Promise<{ filteredData: any; stats: any }> {
 | 
				
			||||||
 | 
					    console.log('[AI PIPELINE] Using embeddings for initial filtering');
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const similarItems = await embeddingsService.findSimilar(
 | 
				
			||||||
 | 
					      userQuery, 
 | 
				
			||||||
 | 
					      this.embeddingCandidates, 
 | 
				
			||||||
 | 
					      this.similarityThreshold
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (similarItems.length === 0) {
 | 
				
			||||||
 | 
					      console.log('[AI PIPELINE] No similar items found with embeddings, using full dataset');
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        filteredData: toolsData,
 | 
				
			||||||
 | 
					        stats: { embeddingsUsed: true, candidatesFromEmbeddings: 0, fallbackToFull: true }
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Create filtered dataset from embedding results
 | 
				
			||||||
 | 
					    const similarToolNames = new Set();
 | 
				
			||||||
 | 
					    const similarConceptNames = new Set();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    similarItems.forEach(item => {
 | 
				
			||||||
 | 
					      if (item.type === 'tool') {
 | 
				
			||||||
 | 
					        similarToolNames.add(item.name);
 | 
				
			||||||
 | 
					      } else if (item.type === 'concept') {
 | 
				
			||||||
 | 
					        similarConceptNames.add(item.name);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const embeddingFilteredData = {
 | 
				
			||||||
 | 
					      tools: toolsData.tools.filter((tool: any) => similarToolNames.has(tool.name)),
 | 
				
			||||||
 | 
					      concepts: toolsData.concepts.filter((concept: any) => similarConceptNames.has(concept.name)),
 | 
				
			||||||
 | 
					      domains: toolsData.domains,
 | 
				
			||||||
 | 
					      phases: toolsData.phases,
 | 
				
			||||||
 | 
					      'domain-agnostic-software': toolsData['domain-agnostic-software']
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    console.log(`[AI PIPELINE] Embeddings filtered to ${embeddingFilteredData.tools.length} tools, ${embeddingFilteredData.concepts.length} concepts`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return {
 | 
				
			||||||
 | 
					      filteredData: embeddingFilteredData,
 | 
				
			||||||
 | 
					      stats: { embeddingsUsed: true, candidatesFromEmbeddings: similarItems.length }
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async processWithoutEmbeddings(userQuery: string, toolsData: any, mode: string): Promise<{ filteredData: any; stats: any }> {
 | 
				
			||||||
 | 
					    console.log('[AI PIPELINE] Processing without embeddings - using selector AI');
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const selection = await this.selectRelevantItems(toolsData, userQuery, mode);
 | 
				
			||||||
 | 
					    const filteredData = this.filterDataBySelection(toolsData, selection);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    console.log(`[AI PIPELINE] Selector chose ${selection.selectedTools.length} tools, ${selection.selectedConcepts.length} concepts`);
 | 
				
			||||||
 | 
					    console.log(`[AI PIPELINE] Selection reasoning: ${selection.reasoning}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return {
 | 
				
			||||||
 | 
					      filteredData,
 | 
				
			||||||
 | 
					      stats: { embeddingsUsed: false, candidatesFromEmbeddings: 0, selectorReasoning: selection.reasoning }
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private createAnalyzerPrompt(filteredData: any, userQuery: string, mode: string): string {
 | 
				
			||||||
 | 
					    // Use existing prompt creation logic but with filtered data
 | 
				
			||||||
 | 
					    if (mode === 'workflow') {
 | 
				
			||||||
 | 
					      return this.createWorkflowAnalyzerPrompt(filteredData, userQuery);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      return this.createToolAnalyzerPrompt(filteredData, userQuery);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private createWorkflowAnalyzerPrompt(toolsData: any, userQuery: string): string {
 | 
				
			||||||
 | 
					    const toolsList = toolsData.tools.map((tool: any) => ({
 | 
				
			||||||
 | 
					      name: tool.name,
 | 
				
			||||||
 | 
					      description: tool.description,
 | 
				
			||||||
 | 
					      domains: tool.domains,
 | 
				
			||||||
 | 
					      phases: tool.phases,
 | 
				
			||||||
 | 
					      domainAgnostic: tool['domain-agnostic-software'],
 | 
				
			||||||
 | 
					      platforms: tool.platforms,
 | 
				
			||||||
 | 
					      skillLevel: tool.skillLevel,
 | 
				
			||||||
 | 
					      license: tool.license,
 | 
				
			||||||
 | 
					      tags: tool.tags,
 | 
				
			||||||
 | 
					      related_concepts: tool.related_concepts || []
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
				
			||||||
 | 
					      name: concept.name,
 | 
				
			||||||
 | 
					      description: concept.description,
 | 
				
			||||||
 | 
					      domains: concept.domains,
 | 
				
			||||||
 | 
					      phases: concept.phases,
 | 
				
			||||||
 | 
					      skillLevel: concept.skillLevel,
 | 
				
			||||||
 | 
					      tags: concept.tags
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const regularPhases = toolsData.phases || [];
 | 
				
			||||||
 | 
					    const domainAgnosticSoftware = toolsData['domain-agnostic-software'] || [];
 | 
				
			||||||
 | 
					    const allPhaseItems = [...regularPhases, ...domainAgnosticSoftware];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const phasesDescription = allPhaseItems.map((phase: any) => 
 | 
				
			||||||
 | 
					      `- ${phase.id}: ${phase.name}`
 | 
				
			||||||
 | 
					    ).join('\n');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const domainsDescription = toolsData.domains.map((domain: any) => 
 | 
				
			||||||
 | 
					      `- ${domain.id}: ${domain.name}`
 | 
				
			||||||
 | 
					    ).join('\n');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const validPhases = [...regularPhases.map((p: any) => p.id), ...domainAgnosticSoftware.map((s: any) => s.id)].join('|');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte. Du erhältst eine vorgefilterte Auswahl relevanter Tools und Konzepte und sollst daraus eine optimale Empfehlung erstellen.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					VERFÜGBARE TOOLS/METHODEN (VORGEFILTERT):
 | 
				
			||||||
 | 
					${JSON.stringify(toolsList, null, 2)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					VERFÜGBARE KONZEPTE (VORGEFILTERT):
 | 
				
			||||||
 | 
					${JSON.stringify(conceptsList, null, 2)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					UNTERSUCHUNGSPHASEN:
 | 
				
			||||||
 | 
					${phasesDescription}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					FORENSISCHE DOMÄNEN:
 | 
				
			||||||
 | 
					${domainsDescription}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WICHTIGE REGELN:
 | 
				
			||||||
 | 
					1. Pro Phase 2-3 Tools/Methoden empfehlen (immer mindestens 2 wenn verfügbar)
 | 
				
			||||||
 | 
					2. Tools/Methoden können in MEHREREN Phasen empfohlen werden wenn sinnvoll
 | 
				
			||||||
 | 
					3. Für Reporting-Phase: Visualisierungs- und Dokumentationssoftware einschließen
 | 
				
			||||||
 | 
					4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug
 | 
				
			||||||
 | 
					5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
				
			||||||
 | 
					6. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software
 | 
				
			||||||
 | 
					7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an, wenn proprietäre Software besser geeignet ist
 | 
				
			||||||
 | 
					8. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
				
			||||||
 | 
					9. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ENHANCED CONTEXTUAL ANALYSIS:
 | 
				
			||||||
 | 
					10. Analysiere das Szenario detailliert und identifiziere Schlüsselelemente, Bedrohungen und forensische Herausforderungen
 | 
				
			||||||
 | 
					11. Entwickle einen strategischen Untersuchungsansatz basierend auf dem spezifischen Szenario
 | 
				
			||||||
 | 
					12. Identifiziere zeitkritische oder besonders wichtige Faktoren für diesen Fall
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					USER QUERY: "${userQuery}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ANTWORT-FORMAT (strict JSON):
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  "scenario_analysis": "Detaillierte Analyse des Szenarios: Erkannte Schlüsselelemente, Art des Vorfalls, betroffene Systeme, potentielle Bedrohungen und forensische Herausforderungen",
 | 
				
			||||||
 | 
					  "investigation_approach": "Strategischer Untersuchungsansatz für dieses spezifische Szenario: Prioritäten, Reihenfolge der Phasen, besondere Überlegungen",
 | 
				
			||||||
 | 
					  "critical_considerations": "Zeitkritische Faktoren, wichtige Sicherheitsaspekte oder besondere Vorsichtsmaßnahmen für diesen Fall",
 | 
				
			||||||
 | 
					  "recommended_tools": [
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      "name": "EXAKTER Name aus der Tools-Database",
 | 
				
			||||||
 | 
					      "priority": "high|medium|low", 
 | 
				
			||||||
 | 
					      "phase": "${validPhases}",
 | 
				
			||||||
 | 
					      "justification": "Warum diese Methode für diese Phase und dieses spezifische Szenario geeignet ist - mit Bezug zu den erkannten Schlüsselelementen"
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  ],
 | 
				
			||||||
 | 
					  "workflow_suggestion": "Vorgeschlagener Untersuchungsablauf mit konkreten Schritten für dieses Szenario",
 | 
				
			||||||
 | 
					  "background_knowledge": [
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
				
			||||||
 | 
					      "relevance": "Warum dieses Konzept für das Szenario relevant ist, und bei welchen der empfohlenen Methoden/Tools."
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  ],
 | 
				
			||||||
 | 
					  "additional_notes": "Wichtige Überlegungen und Hinweise"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private createToolAnalyzerPrompt(toolsData: any, userQuery: string): string {
 | 
				
			||||||
 | 
					    const toolsList = toolsData.tools.map((tool: any) => ({
 | 
				
			||||||
 | 
					      name: tool.name,
 | 
				
			||||||
 | 
					      description: tool.description,
 | 
				
			||||||
 | 
					      domains: tool.domains,
 | 
				
			||||||
 | 
					      phases: tool.phases,
 | 
				
			||||||
 | 
					      platforms: tool.platforms,
 | 
				
			||||||
 | 
					      skillLevel: tool.skillLevel,
 | 
				
			||||||
 | 
					      license: tool.license,
 | 
				
			||||||
 | 
					      tags: tool.tags,
 | 
				
			||||||
 | 
					      url: tool.url,
 | 
				
			||||||
 | 
					      projectUrl: tool.projectUrl,
 | 
				
			||||||
 | 
					      related_concepts: tool.related_concepts || []
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
				
			||||||
 | 
					      name: concept.name,
 | 
				
			||||||
 | 
					      description: concept.description,
 | 
				
			||||||
 | 
					      domains: concept.domains,
 | 
				
			||||||
 | 
					      phases: concept.phases,
 | 
				
			||||||
 | 
					      skillLevel: concept.skillLevel,
 | 
				
			||||||
 | 
					      tags: concept.tags
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte. Du erhältst eine vorgefilterte Auswahl relevanter Tools und Konzepte und sollst daraus 1-3 optimale Empfehlungen für ein spezifisches Problem erstellen.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					VERFÜGBARE TOOLS/METHODEN (VORGEFILTERT):
 | 
				
			||||||
 | 
					${JSON.stringify(toolsList, null, 2)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					VERFÜGBARE KONZEPTE (VORGEFILTERT):
 | 
				
			||||||
 | 
					${JSON.stringify(conceptsList, null, 2)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WICHTIGE REGELN:
 | 
				
			||||||
 | 
					1. Analysiere das spezifische Problem/die Anforderung sorgfältig
 | 
				
			||||||
 | 
					2. Empfehle 1-3 Methoden/Tools, sortiert nach Eignung (beste Empfehlung zuerst)
 | 
				
			||||||
 | 
					3. Gib detaillierte Erklärungen, WARUM und WIE jede Methode/Tool das Problem löst
 | 
				
			||||||
 | 
					4. Berücksichtige praktische Aspekte: Skill Level, Plattformen, Verfügbarkeit
 | 
				
			||||||
 | 
					5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
				
			||||||
 | 
					6. Gib konkrete Anwendungshinweise, nicht nur allgemeine Beschreibungen
 | 
				
			||||||
 | 
					7. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software
 | 
				
			||||||
 | 
					8. Erwähne sowohl Stärken als auch Schwächen/Limitationen
 | 
				
			||||||
 | 
					9. Schlage alternative Ansätze vor, wenn sinnvoll
 | 
				
			||||||
 | 
					10. Gib grundsätzliche Hinweise, WIE die Methode/Tool konkret eingesetzt wird
 | 
				
			||||||
 | 
					11. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
				
			||||||
 | 
					12. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ENHANCED CONTEXTUAL ANALYSIS:
 | 
				
			||||||
 | 
					13. Analysiere das Problem detailliert und identifiziere technische Anforderungen, Herausforderungen und Erfolgsfaktoren
 | 
				
			||||||
 | 
					14. Entwickle einen strategischen Lösungsansatz basierend auf dem spezifischen Problem
 | 
				
			||||||
 | 
					15. Identifiziere wichtige Voraussetzungen oder Warnungen für die Anwendung
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					USER QUERY: "${userQuery}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ANTWORT-FORMAT (strict JSON):
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  "problem_analysis": "Detaillierte Analyse des Problems: Erkannte technische Anforderungen, Herausforderungen, benötigte Fähigkeiten und Erfolgsfaktoren",
 | 
				
			||||||
 | 
					  "investigation_approach": "Strategischer Lösungsansatz für dieses spezifische Problem: Herangehensweise, Prioritäten, optimale Anwendungsreihenfolge",
 | 
				
			||||||
 | 
					  "critical_considerations": "Wichtige Voraussetzungen, potentielle Fallstricke oder Warnungen für die Anwendung der empfohlenen Lösungen",
 | 
				
			||||||
 | 
					  "recommended_tools": [
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      "name": "EXAKTER Name aus der Tools-Database",
 | 
				
			||||||
 | 
					      "rank": 1,
 | 
				
			||||||
 | 
					      "suitability_score": "high|medium|low",
 | 
				
			||||||
 | 
					      "detailed_explanation": "Detaillierte Erklärung, warum dieses Tool/diese Methode das spezifische Problem löst - mit Bezug zu den erkannten Anforderungen",
 | 
				
			||||||
 | 
					      "implementation_approach": "Konkrete Schritte/Ansatz zur Anwendung für dieses spezifische Problem",
 | 
				
			||||||
 | 
					      "pros": ["Spezifische Vorteile für diesen Anwendungsfall", "Weitere Vorteile"],
 | 
				
			||||||
 | 
					      "cons": ["Potentielle Nachteile oder Limitationen", "Weitere Einschränkungen"],
 | 
				
			||||||
 | 
					      "alternatives": "Alternative Ansätze oder ergänzende Tools/Methoden, falls relevant"
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  ],
 | 
				
			||||||
 | 
					  "background_knowledge": [
 | 
				
			||||||
 | 
					    {
 | 
				
			||||||
 | 
					      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
				
			||||||
 | 
					      "relevance": "Warum dieses Konzept für die empfohlenen Tools/das Problem relevant ist, und für welche der empfohlenen Methoden/Tools."
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  ],
 | 
				
			||||||
 | 
					  "additional_considerations": "Wichtige Überlegungen, Voraussetzungen oder Warnungen"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
 | 
				
			||||||
 | 
					    const startTime = Date.now();
 | 
				
			||||||
 | 
					    console.log(`[AI PIPELINE] Starting ${mode} query processing`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      // Load full dataset
 | 
				
			||||||
 | 
					      const toolsData = await getCompressedToolsDataForAI();
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      let filteredData: any;
 | 
				
			||||||
 | 
					      let processingStats: any = {
 | 
				
			||||||
 | 
					        embeddingsUsed: false,
 | 
				
			||||||
 | 
					        candidatesFromEmbeddings: 0,
 | 
				
			||||||
 | 
					        finalSelectedItems: 0,
 | 
				
			||||||
 | 
					        processingTimeMs: 0
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Stage 1: Filter candidates (embeddings or selector AI)
 | 
				
			||||||
 | 
					      if (embeddingsService.isEnabled()) {
 | 
				
			||||||
 | 
					        const result = await this.processWithEmbeddings(userQuery, toolsData, mode);
 | 
				
			||||||
 | 
					        filteredData = result.filteredData;
 | 
				
			||||||
 | 
					        processingStats = { ...processingStats, ...result.stats };
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        const result = await this.processWithoutEmbeddings(userQuery, toolsData, mode);
 | 
				
			||||||
 | 
					        filteredData = result.filteredData;
 | 
				
			||||||
 | 
					        processingStats = { ...processingStats, ...result.stats };
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Stage 2: Generate detailed analysis with analyzer AI
 | 
				
			||||||
 | 
					      console.log('[AI PIPELINE] Stage 2: Generating detailed analysis');
 | 
				
			||||||
 | 
					      const analyzerPrompt = this.createAnalyzerPrompt(filteredData, userQuery, mode);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      const messages = [
 | 
				
			||||||
 | 
					        { role: 'user', content: analyzerPrompt }
 | 
				
			||||||
 | 
					      ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const analysisResponse = await this.callAI(this.analyzerConfig, messages, 3500);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Parse the response
 | 
				
			||||||
 | 
					      let recommendation;
 | 
				
			||||||
 | 
					      try {
 | 
				
			||||||
 | 
					        const cleanedContent = analysisResponse.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
 | 
				
			||||||
 | 
					        recommendation = JSON.parse(cleanedContent);
 | 
				
			||||||
 | 
					      } catch (error) {
 | 
				
			||||||
 | 
					        console.error('[AI PIPELINE] Failed to parse analysis response:', analysisResponse);
 | 
				
			||||||
 | 
					        throw new Error('Invalid JSON response from analyzer AI');
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Validate tool/concept names exist in filtered data
 | 
				
			||||||
 | 
					      const validToolNames = new Set(filteredData.tools.map((t: any) => t.name));
 | 
				
			||||||
 | 
					      const validConceptNames = new Set(filteredData.concepts.map((c: any) => c.name));
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      if (recommendation.recommended_tools) {
 | 
				
			||||||
 | 
					        recommendation.recommended_tools = recommendation.recommended_tools.filter((tool: any) => {
 | 
				
			||||||
 | 
					          if (!validToolNames.has(tool.name)) {
 | 
				
			||||||
 | 
					            console.warn(`[AI PIPELINE] Analyzer recommended unknown tool: ${tool.name}`);
 | 
				
			||||||
 | 
					            return false;
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          return true;
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      if (recommendation.background_knowledge) {
 | 
				
			||||||
 | 
					        recommendation.background_knowledge = recommendation.background_knowledge.filter((concept: any) => {
 | 
				
			||||||
 | 
					          if (!validConceptNames.has(concept.concept_name)) {
 | 
				
			||||||
 | 
					            console.warn(`[AI PIPELINE] Analyzer referenced unknown concept: ${concept.concept_name}`);
 | 
				
			||||||
 | 
					            return false;
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          return true;
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      processingStats.finalSelectedItems = (recommendation.recommended_tools?.length || 0) + 
 | 
				
			||||||
 | 
					                                          (recommendation.background_knowledge?.length || 0);
 | 
				
			||||||
 | 
					      processingStats.processingTimeMs = Date.now() - startTime;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      console.log(`[AI PIPELINE] Completed in ${processingStats.processingTimeMs}ms`);
 | 
				
			||||||
 | 
					      console.log(`[AI PIPELINE] Final recommendations: ${recommendation.recommended_tools?.length || 0} tools, ${recommendation.background_knowledge?.length || 0} concepts`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        recommendation,
 | 
				
			||||||
 | 
					        processingStats
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.error('[AI PIPELINE] Processing failed:', error);
 | 
				
			||||||
 | 
					      throw error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Global instance
 | 
				
			||||||
 | 
					const aiPipeline = new AIProcessingPipeline();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export { aiPipeline, type AnalysisResult };
 | 
				
			||||||
@ -21,7 +21,7 @@ const ToolSchema = z.object({
 | 
				
			|||||||
  accessType: z.string().optional().nullable(),
 | 
					  accessType: z.string().optional().nullable(),
 | 
				
			||||||
  'domain-agnostic-software': z.array(z.string()).optional().nullable(),
 | 
					  'domain-agnostic-software': z.array(z.string()).optional().nullable(),
 | 
				
			||||||
  related_concepts: z.array(z.string()).optional().nullable().default([]),
 | 
					  related_concepts: z.array(z.string()).optional().nullable().default([]),
 | 
				
			||||||
  related_software: z.array(z.string()).optional().nullable().default([]), // Added this line
 | 
					  related_software: z.array(z.string()).optional().nullable().default([]),
 | 
				
			||||||
});
 | 
					});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const ToolsDataSchema = z.object({
 | 
					const ToolsDataSchema = z.object({
 | 
				
			||||||
@ -67,6 +67,7 @@ let cachedData: ToolsData | null = null;
 | 
				
			|||||||
let cachedRandomizedData: ToolsData | null = null;
 | 
					let cachedRandomizedData: ToolsData | null = null;
 | 
				
			||||||
let cachedCompressedData: CompressedToolsData | null = null;
 | 
					let cachedCompressedData: CompressedToolsData | null = null;
 | 
				
			||||||
let lastRandomizationDate: string | null = null;
 | 
					let lastRandomizationDate: string | null = null;
 | 
				
			||||||
 | 
					let dataVersion: string | null = null; // Add version tracking for embeddings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function seededRandom(seed: number): () => number {
 | 
					function seededRandom(seed: number): () => number {
 | 
				
			||||||
  let x = Math.sin(seed) * 10000;
 | 
					  let x = Math.sin(seed) * 10000;
 | 
				
			||||||
@ -91,6 +92,18 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
 | 
				
			|||||||
  return shuffled;
 | 
					  return shuffled;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Generate a simple hash of the data for version tracking
 | 
				
			||||||
 | 
					function generateDataVersion(data: any): string {
 | 
				
			||||||
 | 
					  const str = JSON.stringify(data, Object.keys(data).sort());
 | 
				
			||||||
 | 
					  let hash = 0;
 | 
				
			||||||
 | 
					  for (let i = 0; i < str.length; i++) {
 | 
				
			||||||
 | 
					    const char = str.charCodeAt(i);
 | 
				
			||||||
 | 
					    hash = ((hash << 5) - hash) + char;
 | 
				
			||||||
 | 
					    hash = hash & hash; // Convert to 32-bit integer
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return Math.abs(hash).toString(36);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
async function loadRawData(): Promise<ToolsData> {
 | 
					async function loadRawData(): Promise<ToolsData> {
 | 
				
			||||||
  if (!cachedData) {
 | 
					  if (!cachedData) {
 | 
				
			||||||
    const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
 | 
					    const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
 | 
				
			||||||
@ -99,6 +112,11 @@ async function loadRawData(): Promise<ToolsData> {
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
    try {
 | 
					    try {
 | 
				
			||||||
      cachedData = ToolsDataSchema.parse(rawData);
 | 
					      cachedData = ToolsDataSchema.parse(rawData);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Generate data version for embeddings tracking
 | 
				
			||||||
 | 
					      dataVersion = generateDataVersion(cachedData);
 | 
				
			||||||
 | 
					      console.log(`[DATA SERVICE] Loaded data version: ${dataVersion}`);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
    } catch (error) {
 | 
					    } catch (error) {
 | 
				
			||||||
      console.error('YAML validation failed:', error);
 | 
					      console.error('YAML validation failed:', error);
 | 
				
			||||||
      throw new Error('Invalid tools.yaml structure');
 | 
					      throw new Error('Invalid tools.yaml structure');
 | 
				
			||||||
@ -124,6 +142,7 @@ export async function getToolsData(): Promise<ToolsData> {
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
    lastRandomizationDate = today;
 | 
					    lastRandomizationDate = today;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					    // Clear compressed cache when randomized data changes
 | 
				
			||||||
    cachedCompressedData = null;
 | 
					    cachedCompressedData = null;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
@ -156,14 +175,23 @@ export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData
 | 
				
			|||||||
      'domain-agnostic-software': data['domain-agnostic-software']
 | 
					      'domain-agnostic-software': data['domain-agnostic-software']
 | 
				
			||||||
      // scenarios intentionally excluded from AI data
 | 
					      // scenarios intentionally excluded from AI data
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    console.log(`[DATA SERVICE] Generated compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  return cachedCompressedData;
 | 
					  return cachedCompressedData;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export function getDataVersion(): string | null {
 | 
				
			||||||
 | 
					  return dataVersion;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export function clearCache(): void {
 | 
					export function clearCache(): void {
 | 
				
			||||||
  cachedData = null;
 | 
					  cachedData = null;
 | 
				
			||||||
  cachedRandomizedData = null;
 | 
					  cachedRandomizedData = null;
 | 
				
			||||||
  cachedCompressedData = null;
 | 
					  cachedCompressedData = null;
 | 
				
			||||||
  lastRandomizationDate = null;
 | 
					  lastRandomizationDate = null;
 | 
				
			||||||
 | 
					  dataVersion = null;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  console.log('[DATA SERVICE] Cache cleared');
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
							
								
								
									
										259
									
								
								src/utils/embeddings.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										259
									
								
								src/utils/embeddings.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,259 @@
 | 
				
			|||||||
 | 
					// src/utils/embeddings.ts
 | 
				
			||||||
 | 
					import { promises as fs } from 'fs';
 | 
				
			||||||
 | 
					import path from 'path';
 | 
				
			||||||
 | 
					import { getCompressedToolsDataForAI } from './dataService.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface EmbeddingData {
 | 
				
			||||||
 | 
					  id: string;
 | 
				
			||||||
 | 
					  type: 'tool' | 'concept';
 | 
				
			||||||
 | 
					  name: string;
 | 
				
			||||||
 | 
					  content: string;
 | 
				
			||||||
 | 
					  embedding: number[];
 | 
				
			||||||
 | 
					  metadata: {
 | 
				
			||||||
 | 
					    domains?: string[];
 | 
				
			||||||
 | 
					    phases?: string[];
 | 
				
			||||||
 | 
					    tags?: string[];
 | 
				
			||||||
 | 
					    skillLevel?: string;
 | 
				
			||||||
 | 
					    type?: string;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface EmbeddingsDatabase {
 | 
				
			||||||
 | 
					  version: string;
 | 
				
			||||||
 | 
					  lastUpdated: number;
 | 
				
			||||||
 | 
					  embeddings: EmbeddingData[];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class EmbeddingsService {
 | 
				
			||||||
 | 
					  private embeddings: EmbeddingData[] = [];
 | 
				
			||||||
 | 
					  private isInitialized = false;
 | 
				
			||||||
 | 
					  private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
 | 
				
			||||||
 | 
					  private readonly batchSize: number;
 | 
				
			||||||
 | 
					  private readonly batchDelay: number;
 | 
				
			||||||
 | 
					  private readonly enabled: boolean;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  constructor() {
 | 
				
			||||||
 | 
					    this.enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
 | 
				
			||||||
 | 
					    this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
 | 
				
			||||||
 | 
					    this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  async initialize(): Promise<void> {
 | 
				
			||||||
 | 
					    if (!this.enabled) {
 | 
				
			||||||
 | 
					      console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
 | 
				
			||||||
 | 
					      return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      console.log('[EMBEDDINGS] Initializing embeddings system...');
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Create data directory if it doesn't exist
 | 
				
			||||||
 | 
					      await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      const toolsData = await getCompressedToolsDataForAI();
 | 
				
			||||||
 | 
					      const currentDataHash = this.hashData(toolsData);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Try to load existing embeddings
 | 
				
			||||||
 | 
					      const existingEmbeddings = await this.loadEmbeddings();
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      if (existingEmbeddings && existingEmbeddings.version === currentDataHash) {
 | 
				
			||||||
 | 
					        console.log('[EMBEDDINGS] Using cached embeddings');
 | 
				
			||||||
 | 
					        this.embeddings = existingEmbeddings.embeddings;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        console.log('[EMBEDDINGS] Generating new embeddings...');
 | 
				
			||||||
 | 
					        await this.generateEmbeddings(toolsData, currentDataHash);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      this.isInitialized = true;
 | 
				
			||||||
 | 
					      console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings`);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.error('[EMBEDDINGS] Failed to initialize:', error);
 | 
				
			||||||
 | 
					      this.isInitialized = false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private hashData(data: any): string {
 | 
				
			||||||
 | 
					    return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      const data = await fs.readFile(this.embeddingsPath, 'utf8');
 | 
				
			||||||
 | 
					      return JSON.parse(data);
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.log('[EMBEDDINGS] No existing embeddings found');
 | 
				
			||||||
 | 
					      return null;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async saveEmbeddings(version: string): Promise<void> {
 | 
				
			||||||
 | 
					    const database: EmbeddingsDatabase = {
 | 
				
			||||||
 | 
					      version,
 | 
				
			||||||
 | 
					      lastUpdated: Date.now(),
 | 
				
			||||||
 | 
					      embeddings: this.embeddings
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
 | 
				
			||||||
 | 
					    console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private createContentString(item: any): string {
 | 
				
			||||||
 | 
					    const parts = [
 | 
				
			||||||
 | 
					      item.name,
 | 
				
			||||||
 | 
					      item.description || '',
 | 
				
			||||||
 | 
					      ...(item.tags || []),
 | 
				
			||||||
 | 
					      ...(item.domains || []),
 | 
				
			||||||
 | 
					      ...(item.phases || [])
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return parts.filter(Boolean).join(' ').toLowerCase();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
 | 
				
			||||||
 | 
					    const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
 | 
				
			||||||
 | 
					    const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
 | 
				
			||||||
 | 
					    const model = process.env.AI_EMBEDDINGS_MODEL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!endpoint || !apiKey || !model) {
 | 
				
			||||||
 | 
					      throw new Error('Missing embeddings API configuration');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const response = await fetch(endpoint, {
 | 
				
			||||||
 | 
					      method: 'POST',
 | 
				
			||||||
 | 
					      headers: {
 | 
				
			||||||
 | 
					        'Content-Type': 'application/json',
 | 
				
			||||||
 | 
					        'Authorization': `Bearer ${apiKey}`
 | 
				
			||||||
 | 
					      },
 | 
				
			||||||
 | 
					      body: JSON.stringify({
 | 
				
			||||||
 | 
					        model,
 | 
				
			||||||
 | 
					        input: contents
 | 
				
			||||||
 | 
					      })
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!response.ok) {
 | 
				
			||||||
 | 
					      const error = await response.text();
 | 
				
			||||||
 | 
					      throw new Error(`Embeddings API error: ${response.status} - ${error}`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const data = await response.json();
 | 
				
			||||||
 | 
					    return data.data.map((item: any) => item.embedding);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async generateEmbeddings(toolsData: any, version: string): Promise<void> {
 | 
				
			||||||
 | 
					    const allItems = [
 | 
				
			||||||
 | 
					      ...toolsData.tools.map((tool: any) => ({ ...tool, type: 'tool' })),
 | 
				
			||||||
 | 
					      ...toolsData.concepts.map((concept: any) => ({ ...concept, type: 'concept' }))
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const contents = allItems.map(item => this.createContentString(item));
 | 
				
			||||||
 | 
					    this.embeddings = [];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Process in batches to respect rate limits
 | 
				
			||||||
 | 
					    for (let i = 0; i < contents.length; i += this.batchSize) {
 | 
				
			||||||
 | 
					      const batch = contents.slice(i, i + this.batchSize);
 | 
				
			||||||
 | 
					      const batchItems = allItems.slice(i, i + this.batchSize);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      try {
 | 
				
			||||||
 | 
					        const embeddings = await this.generateEmbeddingsBatch(batch);
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        embeddings.forEach((embedding, index) => {
 | 
				
			||||||
 | 
					          const item = batchItems[index];
 | 
				
			||||||
 | 
					          this.embeddings.push({
 | 
				
			||||||
 | 
					            id: `${item.type}_${item.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
 | 
				
			||||||
 | 
					            type: item.type,
 | 
				
			||||||
 | 
					            name: item.name,
 | 
				
			||||||
 | 
					            content: batch[index],
 | 
				
			||||||
 | 
					            embedding,
 | 
				
			||||||
 | 
					            metadata: {
 | 
				
			||||||
 | 
					              domains: item.domains,
 | 
				
			||||||
 | 
					              phases: item.phases,
 | 
				
			||||||
 | 
					              tags: item.tags,
 | 
				
			||||||
 | 
					              skillLevel: item.skillLevel,
 | 
				
			||||||
 | 
					              type: item.type
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          });
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        // Rate limiting delay between batches
 | 
				
			||||||
 | 
					        if (i + this.batchSize < contents.length) {
 | 
				
			||||||
 | 
					          await new Promise(resolve => setTimeout(resolve, this.batchDelay));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					      } catch (error) {
 | 
				
			||||||
 | 
					        console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
 | 
				
			||||||
 | 
					        throw error;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    await this.saveEmbeddings(version);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private cosineSimilarity(a: number[], b: number[]): number {
 | 
				
			||||||
 | 
					    let dotProduct = 0;
 | 
				
			||||||
 | 
					    let normA = 0;
 | 
				
			||||||
 | 
					    let normB = 0;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for (let i = 0; i < a.length; i++) {
 | 
				
			||||||
 | 
					      dotProduct += a[i] * b[i];
 | 
				
			||||||
 | 
					      normA += a[i] * a[i];
 | 
				
			||||||
 | 
					      normB += b[i] * b[i];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<EmbeddingData[]> {
 | 
				
			||||||
 | 
					    if (!this.enabled || !this.isInitialized || this.embeddings.length === 0) {
 | 
				
			||||||
 | 
					      return [];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      // Generate embedding for query
 | 
				
			||||||
 | 
					      const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
 | 
				
			||||||
 | 
					      const queryEmbedding = queryEmbeddings[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Calculate similarities
 | 
				
			||||||
 | 
					      const similarities = this.embeddings.map(item => ({
 | 
				
			||||||
 | 
					        ...item,
 | 
				
			||||||
 | 
					        similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
 | 
				
			||||||
 | 
					      }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Filter by threshold and sort by similarity
 | 
				
			||||||
 | 
					      return similarities
 | 
				
			||||||
 | 
					        .filter(item => item.similarity >= threshold)
 | 
				
			||||||
 | 
					        .sort((a, b) => b.similarity - a.similarity)
 | 
				
			||||||
 | 
					        .slice(0, maxResults);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.error('[EMBEDDINGS] Failed to find similar items:', error);
 | 
				
			||||||
 | 
					      return [];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  isEnabled(): boolean {
 | 
				
			||||||
 | 
					    return this.enabled && this.isInitialized;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  getStats(): { enabled: boolean; initialized: boolean; count: number } {
 | 
				
			||||||
 | 
					    return {
 | 
				
			||||||
 | 
					      enabled: this.enabled,
 | 
				
			||||||
 | 
					      initialized: this.isInitialized,
 | 
				
			||||||
 | 
					      count: this.embeddings.length
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Global instance
 | 
				
			||||||
 | 
					const embeddingsService = new EmbeddingsService();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export { embeddingsService, type EmbeddingData };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Auto-initialize on import in server environment
 | 
				
			||||||
 | 
					if (typeof window === 'undefined' && process.env.NODE_ENV !== 'test') {
 | 
				
			||||||
 | 
					  embeddingsService.initialize().catch(error => {
 | 
				
			||||||
 | 
					    console.error('[EMBEDDINGS] Auto-initialization failed:', error);
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user