progress
This commit is contained in:
		
							parent
							
								
									78779fc8da
								
							
						
					
					
						commit
						895c476476
					
				
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							
							
								
								
									
										117075
									
								
								data/embeddings.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										117075
									
								
								data/embeddings.json
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										22
									
								
								src/pages/api/ai/embeddings.status.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								src/pages/api/ai/embeddings.status.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,22 @@
 | 
			
		||||
// src/pages/api/ai/embeddings-status.ts
 | 
			
		||||
import type { APIRoute } from 'astro';
 | 
			
		||||
import { embeddingsService } from '../../../utils/embeddings.js';
 | 
			
		||||
import { apiResponse, apiServerError } from '../../../utils/api.js';
 | 
			
		||||
 | 
			
		||||
export const prerender = false;
 | 
			
		||||
 | 
			
		||||
export const GET: APIRoute = async () => {
 | 
			
		||||
  try {
 | 
			
		||||
    const stats = embeddingsService.getStats();
 | 
			
		||||
    
 | 
			
		||||
    return apiResponse.success({
 | 
			
		||||
      embeddings: stats,
 | 
			
		||||
      timestamp: new Date().toISOString(),
 | 
			
		||||
      status: stats.enabled && stats.initialized ? 'ready' : 
 | 
			
		||||
             stats.enabled && !stats.initialized ? 'initializing' : 'disabled'
 | 
			
		||||
    });
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    console.error('Embeddings status error:', error);
 | 
			
		||||
    return apiServerError.internal('Failed to get embeddings status');
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
@ -14,7 +14,11 @@ function getEnv(key: string): string {
 | 
			
		||||
  return value;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const AI_MODEL = getEnv('AI_MODEL');
 | 
			
		||||
// Use the analyzer AI for smart prompting (smaller, faster model)
 | 
			
		||||
const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
 | 
			
		||||
const AI_API_KEY = getEnv('AI_ANALYZER_API_KEY');
 | 
			
		||||
const AI_MODEL = getEnv('AI_ANALYZER_MODEL');
 | 
			
		||||
 | 
			
		||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
			
		||||
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
 | 
			
		||||
const RATE_LIMIT_MAX = 5; // 5 enhancement requests per minute per user
 | 
			
		||||
@ -59,29 +63,38 @@ function cleanupExpiredRateLimits() {
 | 
			
		||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
			
		||||
 | 
			
		||||
function createEnhancementPrompt(input: string): string {
 | 
			
		||||
  return `
 | 
			
		||||
Du bist eine KI für digitale Forensik. Der Nutzer beschreibt ein forensisches Szenario. Analysiere die Eingabe.
 | 
			
		||||
  return `Du bist eine KI für digitale Forensik-Anfragen. Der Nutzer beschreibt ein forensisches Szenario oder Problem. Analysiere die Eingabe auf Vollständigkeit und Klarheit.
 | 
			
		||||
 | 
			
		||||
Wenn die Beschreibung unvollständig oder vage ist, stelle bis zu drei präzise Rückfragen im JSON-Array-Format, um wichtige Details zu klären (z. B. Vorfalltyp, System, Ziel, Datenquellen, Zeit, Beteiligte, rechtlicher Rahmen).
 | 
			
		||||
ANALYSIERE DIESE KATEGORIEN:
 | 
			
		||||
1. **Vorfalltyp**: Was ist passiert? (Malware, Datendiebstahl, Compliance-Verstoß, etc.)
 | 
			
		||||
2. **Betroffene Systeme**: Welche Technologien/Plattformen? (Windows, Linux, Mobile, Cloud, etc.)
 | 
			
		||||
3. **Verfügbare Datenquellen**: Was kann untersucht werden? (Logs, Images, Memory Dumps, etc.)
 | 
			
		||||
4. **Untersuchungsziel**: Was soll erreicht werden? (IOCs finden, Timeline erstellen, etc.)
 | 
			
		||||
5. **Zeitrahmen & Dringlichkeit**: Wann ist etwas passiert? Wie dringend?
 | 
			
		||||
6. **Ressourcen & Constraints**: Budget, Skills, Tools, rechtliche Aspekte
 | 
			
		||||
7. **Beweisziele**: Dokumentation, Gerichtsverfahren, interne Aufklärung?
 | 
			
		||||
 | 
			
		||||
Wenn die Eingabe bereits klar, spezifisch und vollständig ist, gib stattdessen nur eine leere Liste [] zurück.
 | 
			
		||||
WENN die Beschreibung vollständig und spezifisch ist: Gib eine leere Liste [] zurück.
 | 
			
		||||
 | 
			
		||||
Antwortformat strikt:
 | 
			
		||||
WENN wichtige Details fehlen: Formuliere 2-3 präzise Fragen, die die kritischsten Lücken schließen. Fokussiere auf Details, die die Tool-/Methoden-Auswahl stark beeinflussen.
 | 
			
		||||
 | 
			
		||||
\`\`\`json
 | 
			
		||||
FRAGE-QUALITÄT:
 | 
			
		||||
- Spezifisch, nicht allgemein (❌ "Mehr Details?" ✅ "Welche Betriebssysteme sind betroffen?")
 | 
			
		||||
- Handlungsrelevant (❌ "Wann passierte das?" ✅ "Haben Sie Logs aus der Vorfallzeit verfügbar?")
 | 
			
		||||
- Priorisiert nach Wichtigkeit für die forensische Analyse
 | 
			
		||||
 | 
			
		||||
ANTWORTFORMAT (NUR JSON):
 | 
			
		||||
[
 | 
			
		||||
  "Frage 1?",
 | 
			
		||||
  "Frage 2?",
 | 
			
		||||
  "Frage 3?"
 | 
			
		||||
  "Spezifische Frage 1?",
 | 
			
		||||
  "Spezifische Frage 2?",
 | 
			
		||||
  "Spezifische Frage 3?"
 | 
			
		||||
]
 | 
			
		||||
\`\`\`
 | 
			
		||||
 | 
			
		||||
Nutzer-Eingabe:
 | 
			
		||||
NUTZER-EINGABE:
 | 
			
		||||
${input}
 | 
			
		||||
  `.trim();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
  try {
 | 
			
		||||
    const authResult = await withAPIAuth(request, 'ai');
 | 
			
		||||
@ -98,12 +111,12 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
    const body = await request.json();
 | 
			
		||||
    const { input } = body;
 | 
			
		||||
 | 
			
		||||
    if (!input || typeof input !== 'string' || input.length < 20) {
 | 
			
		||||
      return apiError.badRequest('Input too short for enhancement');
 | 
			
		||||
    if (!input || typeof input !== 'string' || input.length < 40) {
 | 
			
		||||
      return apiError.badRequest('Input too short for enhancement (minimum 40 characters)');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const sanitizedInput = sanitizeInput(input);
 | 
			
		||||
    if (sanitizedInput.length < 20) {
 | 
			
		||||
    if (sanitizedInput.length < 40) {
 | 
			
		||||
      return apiError.badRequest('Input too short after sanitization');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -111,11 +124,11 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
    const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
 | 
			
		||||
    
 | 
			
		||||
    const aiResponse = await enqueueApiCall(() =>
 | 
			
		||||
      fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
 | 
			
		||||
      fetch(`${AI_ENDPOINT}/v1/chat/completions`, {
 | 
			
		||||
        method: 'POST',
 | 
			
		||||
        headers: {
 | 
			
		||||
          'Content-Type': 'application/json',
 | 
			
		||||
          'Authorization': `Bearer ${process.env.AI_API_KEY}`
 | 
			
		||||
          'Authorization': `Bearer ${AI_API_KEY}`
 | 
			
		||||
        },
 | 
			
		||||
        body: JSON.stringify({
 | 
			
		||||
          model: AI_MODEL,
 | 
			
		||||
@ -125,7 +138,7 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
              content: systemPrompt
 | 
			
		||||
            }
 | 
			
		||||
          ],
 | 
			
		||||
          max_tokens: 200,
 | 
			
		||||
          max_tokens: 300,
 | 
			
		||||
          temperature: 0.7
 | 
			
		||||
        })
 | 
			
		||||
      }), taskId);
 | 
			
		||||
@ -150,22 +163,26 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
        .trim();
 | 
			
		||||
      questions = JSON.parse(cleanedContent);
 | 
			
		||||
      
 | 
			
		||||
      if (!Array.isArray(questions) || questions.length === 0) {
 | 
			
		||||
        throw new Error('Invalid questions format');
 | 
			
		||||
      if (!Array.isArray(questions)) {
 | 
			
		||||
        throw new Error('Response is not an array');
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // Validate and clean questions
 | 
			
		||||
      // Enhanced validation and cleaning
 | 
			
		||||
      questions = questions
 | 
			
		||||
        .filter(q => typeof q === 'string' && q.length > 5 && q.length < 120)
 | 
			
		||||
        .slice(0, 3);
 | 
			
		||||
        .filter(q => typeof q === 'string' && q.length > 10 && q.length < 150) // More reasonable length limits
 | 
			
		||||
        .filter(q => q.includes('?')) // Must be a question
 | 
			
		||||
        .map(q => q.trim())
 | 
			
		||||
        .slice(0, 3); // Max 3 questions
 | 
			
		||||
        
 | 
			
		||||
      // If no valid questions, return empty array (means input is complete)
 | 
			
		||||
      if (questions.length === 0) {
 | 
			
		||||
        throw new Error('No valid questions found');
 | 
			
		||||
        questions = [];
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('Failed to parse enhancement response:', aiContent);
 | 
			
		||||
      return apiServerError.unavailable('Invalid enhancement response format');
 | 
			
		||||
      // If parsing fails, assume input is complete enough
 | 
			
		||||
      questions = [];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(`[AI Enhancement] User: ${userId}, Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
 | 
			
		||||
@ -173,7 +190,8 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
    return new Response(JSON.stringify({
 | 
			
		||||
      success: true,
 | 
			
		||||
      questions,
 | 
			
		||||
      taskId
 | 
			
		||||
      taskId,
 | 
			
		||||
      inputComplete: questions.length === 0 // Flag to indicate if input seems complete
 | 
			
		||||
    }), {
 | 
			
		||||
      status: 200,
 | 
			
		||||
      headers: { 'Content-Type': 'application/json' }
 | 
			
		||||
 | 
			
		||||
@ -1,21 +1,12 @@
 | 
			
		||||
// src/pages/api/ai/query.ts
 | 
			
		||||
import type { APIRoute } from 'astro';
 | 
			
		||||
import { withAPIAuth } from '../../../utils/auth.js';
 | 
			
		||||
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
 | 
			
		||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
 | 
			
		||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';   
 | 
			
		||||
import { aiPipeline } from '../../../utils/aiPipeline.js';
 | 
			
		||||
 | 
			
		||||
export const prerender = false;
 | 
			
		||||
 | 
			
		||||
function getEnv(key: string): string {
 | 
			
		||||
  const value = process.env[key];
 | 
			
		||||
  if (!value) {
 | 
			
		||||
    throw new Error(`Missing environment variable: ${key}`);
 | 
			
		||||
  }
 | 
			
		||||
  return value;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const AI_MODEL = getEnv('AI_MODEL');
 | 
			
		||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
			
		||||
const RATE_LIMIT_WINDOW = 60 * 1000; 
 | 
			
		||||
const RATE_LIMIT_MAX = 10; 
 | 
			
		||||
@ -33,13 +24,6 @@ function sanitizeInput(input: string): string {
 | 
			
		||||
  return sanitized;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function stripMarkdownJson(content: string): string {
 | 
			
		||||
  return content
 | 
			
		||||
    .replace(/^```json\s*/i, '')
 | 
			
		||||
    .replace(/\s*```\s*$/, '')
 | 
			
		||||
    .trim();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function checkRateLimit(userId: string): boolean {
 | 
			
		||||
  const now = Date.now();
 | 
			
		||||
  const userLimit = rateLimitStore.get(userId);
 | 
			
		||||
@ -68,209 +52,6 @@ function cleanupExpiredRateLimits() {
 | 
			
		||||
 | 
			
		||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
			
		||||
 | 
			
		||||
async function loadToolsDatabase() {
 | 
			
		||||
  try {
 | 
			
		||||
    return await getCompressedToolsDataForAI();
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    console.error('Failed to load tools database:', error);
 | 
			
		||||
    throw new Error('Database unavailable');
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function createWorkflowSystemPrompt(toolsData: any): string {
 | 
			
		||||
  const toolsList = toolsData.tools.map((tool: any) => ({
 | 
			
		||||
    name: tool.name,
 | 
			
		||||
    description: tool.description,
 | 
			
		||||
    domains: tool.domains,
 | 
			
		||||
    phases: tool.phases,
 | 
			
		||||
    domainAgnostic: tool['domain-agnostic-software'],
 | 
			
		||||
    platforms: tool.platforms,
 | 
			
		||||
    skillLevel: tool.skillLevel,
 | 
			
		||||
    license: tool.license,
 | 
			
		||||
    tags: tool.tags,
 | 
			
		||||
    related_concepts: tool.related_concepts || []
 | 
			
		||||
  }));
 | 
			
		||||
 | 
			
		||||
  const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
			
		||||
    name: concept.name,
 | 
			
		||||
    description: concept.description,
 | 
			
		||||
    domains: concept.domains,
 | 
			
		||||
    phases: concept.phases,
 | 
			
		||||
    skillLevel: concept.skillLevel,
 | 
			
		||||
    tags: concept.tags
 | 
			
		||||
  }));
 | 
			
		||||
 | 
			
		||||
  const regularPhases = toolsData.phases || [];
 | 
			
		||||
  
 | 
			
		||||
  const domainAgnosticSoftware = toolsData['domain-agnostic-software'] || [];
 | 
			
		||||
  
 | 
			
		||||
  const allPhaseItems = [
 | 
			
		||||
    ...regularPhases,
 | 
			
		||||
    ...domainAgnosticSoftware
 | 
			
		||||
  ];
 | 
			
		||||
  
 | 
			
		||||
  const phasesDescription = allPhaseItems.map((phase: any) => 
 | 
			
		||||
    `- ${phase.id}: ${phase.name}`
 | 
			
		||||
  ).join('\n');
 | 
			
		||||
 | 
			
		||||
  const domainsDescription = toolsData.domains.map((domain: any) => 
 | 
			
		||||
    `- ${domain.id}: ${domain.name}`
 | 
			
		||||
  ).join('\n');
 | 
			
		||||
 | 
			
		||||
  const phaseDescriptions = regularPhases.map((phase: any) => 
 | 
			
		||||
    `- ${phase.name}: ${phase.description || 'Tools/Methods for this phase'}`
 | 
			
		||||
  ).join('\n');
 | 
			
		||||
 | 
			
		||||
  const domainAgnosticDescriptions = domainAgnosticSoftware.map((section: any) => 
 | 
			
		||||
    `- ${section.name}: ${section.description || 'Cross-cutting software and platforms'}`
 | 
			
		||||
  ).join('\n');
 | 
			
		||||
 | 
			
		||||
  const validPhases = [
 | 
			
		||||
    ...regularPhases.map((p: any) => p.id),
 | 
			
		||||
    ...domainAgnosticSoftware.map((s: any) => s.id)
 | 
			
		||||
  ].join('|');
 | 
			
		||||
 | 
			
		||||
  return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der Ermittlern bei der Auswahl von Software und Methoden hilft.
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE TOOLS/METHODEN:
 | 
			
		||||
${JSON.stringify(toolsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
 | 
			
		||||
${JSON.stringify(conceptsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
UNTERSUCHUNGSPHASEN (NIST Framework):
 | 
			
		||||
${phasesDescription}
 | 
			
		||||
 | 
			
		||||
FORENSISCHE DOMÄNEN:
 | 
			
		||||
${domainsDescription}
 | 
			
		||||
 | 
			
		||||
WICHTIGE REGELN:
 | 
			
		||||
1. Pro Phase 2-3 Tools/Methoden empfehlen (immer mindestens 2 wenn verfügbar)
 | 
			
		||||
2. Tools/Methoden können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool/Methode für jede Phase zu empfehlen, selbst wenn die Priorität "low" ist.
 | 
			
		||||
3. Für Reporting-Phase: Visualisierungs- und Dokumentationssoftware einschließen
 | 
			
		||||
4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug.
 | 
			
		||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
			
		||||
6. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
 | 
			
		||||
7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an, wenn proprietäre Software besser geeignet ist.
 | 
			
		||||
8. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
			
		||||
9. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
			
		||||
 | 
			
		||||
ENHANCED CONTEXTUAL ANALYSIS:
 | 
			
		||||
10. Analysiere das Szenario detailliert und identifiziere Schlüsselelemente, Bedrohungen und forensische Herausforderungen
 | 
			
		||||
11. Entwickle einen strategischen Untersuchungsansatz basierend auf dem spezifischen Szenario
 | 
			
		||||
12. Identifiziere zeitkritische oder besonders wichtige Faktoren für diesen Fall
 | 
			
		||||
 | 
			
		||||
SOFTWARE/METHODEN-AUSWAHL NACH PHASE:
 | 
			
		||||
${phaseDescriptions}
 | 
			
		||||
 | 
			
		||||
DOMÄNENAGNOSTISCHE SOFTWARE/METHODEN:
 | 
			
		||||
${domainAgnosticDescriptions}
 | 
			
		||||
 | 
			
		||||
ANTWORT-FORMAT (strict JSON):
 | 
			
		||||
{
 | 
			
		||||
  "scenario_analysis": "Detaillierte Analyse des Szenarios: Erkannte Schlüsselelemente, Art des Vorfalls, betroffene Systeme, potentielle Bedrohungen und forensische Herausforderungen",
 | 
			
		||||
  "investigation_approach": "Strategischer Untersuchungsansatz für dieses spezifische Szenario: Prioritäten, Reihenfolge der Phasen, besondere Überlegungen",
 | 
			
		||||
  "critical_considerations": "Zeitkritische Faktoren, wichtige Sicherheitsaspekte oder besondere Vorsichtsmaßnahmen für diesen Fall",
 | 
			
		||||
  "recommended_tools": [
 | 
			
		||||
    {
 | 
			
		||||
      "name": "EXAKTER Name aus der Tools-Database",
 | 
			
		||||
      "priority": "high|medium|low", 
 | 
			
		||||
      "phase": "${validPhases}",
 | 
			
		||||
      "justification": "Warum diese Methode für diese Phase und dieses spezifische Szenario geeignet ist - mit Bezug zu den erkannten Schlüsselelementen"
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "workflow_suggestion": "Vorgeschlagener Untersuchungsablauf mit konkreten Schritten für dieses Szenario",
 | 
			
		||||
  "background_knowledge": [
 | 
			
		||||
    {
 | 
			
		||||
      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
			
		||||
      "relevance": "Warum dieses Konzept für das Szenario relevant ist, und bei welchen der empfohlenen Methoden/Tools."
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "additional_notes": "Wichtige Überlegungen und Hinweise"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function createToolSystemPrompt(toolsData: any): string {
 | 
			
		||||
  const toolsList = toolsData.tools.map((tool: any) => ({
 | 
			
		||||
    name: tool.name,
 | 
			
		||||
    description: tool.description,
 | 
			
		||||
    domains: tool.domains,
 | 
			
		||||
    phases: tool.phases,
 | 
			
		||||
    platforms: tool.platforms,
 | 
			
		||||
    skillLevel: tool.skillLevel,
 | 
			
		||||
    license: tool.license,
 | 
			
		||||
    tags: tool.tags,
 | 
			
		||||
    url: tool.url,
 | 
			
		||||
    projectUrl: tool.projectUrl,
 | 
			
		||||
    related_concepts: tool.related_concepts || []
 | 
			
		||||
  }));
 | 
			
		||||
 | 
			
		||||
  const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
			
		||||
    name: concept.name,
 | 
			
		||||
    description: concept.description,
 | 
			
		||||
    domains: concept.domains,
 | 
			
		||||
    phases: concept.phases,
 | 
			
		||||
    skillLevel: concept.skillLevel,
 | 
			
		||||
    tags: concept.tags
 | 
			
		||||
  }));
 | 
			
		||||
 | 
			
		||||
  return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der bei der Auswahl spezifischer Software/Methoden für konkrete Probleme hilft.
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE TOOLS/METHODEN:
 | 
			
		||||
${JSON.stringify(toolsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
 | 
			
		||||
${JSON.stringify(conceptsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
WICHTIGE REGELN:
 | 
			
		||||
1. Analysiere das spezifische Problem/die Anforderung sorgfältig
 | 
			
		||||
2. Empfehle 1-3 Methoden/Tools, sortiert nach Eignung (beste Empfehlung zuerst)
 | 
			
		||||
3. Gib detaillierte Erklärungen, WARUM und WIE jede Methode/Tool das Problem löst
 | 
			
		||||
4. Berücksichtige praktische Aspekte: Skill Level, Plattformen, Verfügbarkeit
 | 
			
		||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
			
		||||
6. Gib konkrete Anwendungshinweise, nicht nur allgemeine Beschreibungen - Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
 | 
			
		||||
7. Erwähne sowohl Stärken als auch Schwächen/Limitationen
 | 
			
		||||
8. Schlage alternative Ansätze vor, wenn sinnvoll
 | 
			
		||||
9. Gib grundsätzliche Hinweise, WIE die Methode/Tool konkret eingesetzt wird
 | 
			
		||||
10. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
			
		||||
11. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
			
		||||
 | 
			
		||||
ENHANCED CONTEXTUAL ANALYSIS:
 | 
			
		||||
12. Analysiere das Problem detailliert und identifiziere technische Anforderungen, Herausforderungen und Erfolgsfaktoren
 | 
			
		||||
13. Entwickle einen strategischen Lösungsansatz basierend auf dem spezifischen Problem
 | 
			
		||||
14. Identifiziere wichtige Voraussetzungen oder Warnungen für die Anwendung
 | 
			
		||||
 | 
			
		||||
ANTWORT-FORMAT (strict JSON):
 | 
			
		||||
{
 | 
			
		||||
  "problem_analysis": "Detaillierte Analyse des Problems: Erkannte technische Anforderungen, Herausforderungen, benötigte Fähigkeiten und Erfolgsfaktoren",
 | 
			
		||||
  "investigation_approach": "Strategischer Lösungsansatz für dieses spezifische Problem: Herangehensweise, Prioritäten, optimale Anwendungsreihenfolge",
 | 
			
		||||
  "critical_considerations": "Wichtige Voraussetzungen, potentielle Fallstricke oder Warnungen für die Anwendung der empfohlenen Lösungen",
 | 
			
		||||
  "recommended_tools": [
 | 
			
		||||
    {
 | 
			
		||||
      "name": "EXAKTER Name aus der Tools-Database",
 | 
			
		||||
      "rank": 1,
 | 
			
		||||
      "suitability_score": "high|medium|low",
 | 
			
		||||
      "detailed_explanation": "Detaillierte Erklärung, warum dieses Tool/diese Methode das spezifische Problem löst - mit Bezug zu den erkannten Anforderungen",
 | 
			
		||||
      "implementation_approach": "Konkrete Schritte/Ansatz zur Anwendung für dieses spezifische Problem",
 | 
			
		||||
      "pros": ["Spezifische Vorteile für diesen Anwendungsfall", "Weitere Vorteile"],
 | 
			
		||||
      "cons": ["Potentielle Nachteile oder Limitationen", "Weitere Einschränkungen"],
 | 
			
		||||
      "alternatives": "Alternative Ansätze oder ergänzende Tools/Methoden, falls relevant"
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "background_knowledge": [
 | 
			
		||||
    {
 | 
			
		||||
      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
			
		||||
      "relevance": "Warum dieses Konzept für die empfohlenen Tools/das Problem relevant ist, und für welche der empfohlenen Methoden/Tools."
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "additional_considerations": "Wichtige Überlegungen, Voraussetzungen oder Warnungen"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
  try {
 | 
			
		||||
    const authResult = await withAPIAuth(request, 'ai');
 | 
			
		||||
@ -287,7 +68,6 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
    const body = await request.json();
 | 
			
		||||
    const { query, mode = 'workflow', taskId: clientTaskId } = body;
 | 
			
		||||
 | 
			
		||||
    // ADD THIS DEBUG LOGGING
 | 
			
		||||
    console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
 | 
			
		||||
 | 
			
		||||
    if (!query || typeof query !== 'string') {
 | 
			
		||||
@ -306,128 +86,31 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
      return apiError.badRequest('Invalid input detected');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const toolsData = await loadToolsDatabase();
 | 
			
		||||
 | 
			
		||||
    const systemPrompt = mode === 'workflow' 
 | 
			
		||||
      ? createWorkflowSystemPrompt(toolsData)
 | 
			
		||||
      : createToolSystemPrompt(toolsData);
 | 
			
		||||
    
 | 
			
		||||
    const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[AI API] About to enqueue task ${taskId}`);
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
    const aiResponse = await enqueueApiCall(() =>
 | 
			
		||||
      fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
 | 
			
		||||
        method: 'POST',
 | 
			
		||||
        headers: {
 | 
			
		||||
          'Content-Type': 'application/json',
 | 
			
		||||
          'Authorization': `Bearer ${process.env.AI_API_KEY}`
 | 
			
		||||
        },
 | 
			
		||||
        body: JSON.stringify({
 | 
			
		||||
          model: AI_MODEL,
 | 
			
		||||
          messages: [
 | 
			
		||||
            {
 | 
			
		||||
              role: 'system',
 | 
			
		||||
              content: systemPrompt
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
              role: 'user',
 | 
			
		||||
              content: sanitizedQuery
 | 
			
		||||
            }
 | 
			
		||||
          ],
 | 
			
		||||
          max_tokens: 3500,
 | 
			
		||||
          temperature: 0.3
 | 
			
		||||
        })
 | 
			
		||||
      })
 | 
			
		||||
    // Use the new AI pipeline instead of direct API calls
 | 
			
		||||
    const result = await enqueueApiCall(() => 
 | 
			
		||||
      aiPipeline.processQuery(sanitizedQuery, mode)
 | 
			
		||||
    , taskId);
 | 
			
		||||
 | 
			
		||||
    if (!aiResponse.ok) {
 | 
			
		||||
      console.error('AI API error:', await aiResponse.text());
 | 
			
		||||
      return apiServerError.unavailable('AI service unavailable');
 | 
			
		||||
    if (!result || !result.recommendation) {
 | 
			
		||||
      return apiServerError.unavailable('No response from AI pipeline');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const aiData = await aiResponse.json();
 | 
			
		||||
    const aiContent = aiData.choices?.[0]?.message?.content;
 | 
			
		||||
 | 
			
		||||
    if (!aiContent) {
 | 
			
		||||
      return apiServerError.unavailable('No response from AI');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let recommendation;
 | 
			
		||||
    try {
 | 
			
		||||
      const cleanedContent = stripMarkdownJson(aiContent);
 | 
			
		||||
      recommendation = JSON.parse(cleanedContent);
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('Failed to parse AI response:', aiContent);
 | 
			
		||||
      return apiServerError.unavailable('Invalid AI response format');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const validToolNames = new Set(toolsData.tools.map((t: any) => t.name));
 | 
			
		||||
    const validConceptNames = new Set(toolsData.concepts.map((c: any) => c.name));
 | 
			
		||||
    
 | 
			
		||||
    let validatedRecommendation;
 | 
			
		||||
 | 
			
		||||
    if (mode === 'workflow') {
 | 
			
		||||
      validatedRecommendation = {
 | 
			
		||||
        ...recommendation,
 | 
			
		||||
        // Ensure all new fields are included with fallbacks
 | 
			
		||||
        scenario_analysis: recommendation.scenario_analysis || recommendation.problem_analysis || '',
 | 
			
		||||
        investigation_approach: recommendation.investigation_approach || '',
 | 
			
		||||
        critical_considerations: recommendation.critical_considerations || '',
 | 
			
		||||
        recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
 | 
			
		||||
          if (!validToolNames.has(tool.name)) {
 | 
			
		||||
            console.warn(`AI recommended unknown tool: ${tool.name}`);
 | 
			
		||||
            return false;
 | 
			
		||||
          }
 | 
			
		||||
          return true;
 | 
			
		||||
        }) || [],
 | 
			
		||||
        background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
 | 
			
		||||
          if (!validConceptNames.has(concept.concept_name)) {
 | 
			
		||||
            console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
 | 
			
		||||
            return false;
 | 
			
		||||
          }
 | 
			
		||||
          return true;
 | 
			
		||||
        }) || []
 | 
			
		||||
      };
 | 
			
		||||
    } else {
 | 
			
		||||
      validatedRecommendation = {
 | 
			
		||||
        ...recommendation,
 | 
			
		||||
        // Ensure all new fields are included with fallbacks
 | 
			
		||||
        problem_analysis: recommendation.problem_analysis || recommendation.scenario_analysis || '',
 | 
			
		||||
        investigation_approach: recommendation.investigation_approach || '',
 | 
			
		||||
        critical_considerations: recommendation.critical_considerations || '',
 | 
			
		||||
        recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
 | 
			
		||||
          if (!validToolNames.has(tool.name)) {
 | 
			
		||||
            console.warn(`AI recommended unknown tool: ${tool.name}`);
 | 
			
		||||
            return false;
 | 
			
		||||
          }
 | 
			
		||||
          return true;
 | 
			
		||||
        }).map((tool: any, index: number) => ({
 | 
			
		||||
          ...tool,
 | 
			
		||||
          rank: tool.rank || (index + 1),
 | 
			
		||||
          suitability_score: tool.suitability_score || 'medium',
 | 
			
		||||
          pros: Array.isArray(tool.pros) ? tool.pros : [],
 | 
			
		||||
          cons: Array.isArray(tool.cons) ? tool.cons : []
 | 
			
		||||
        })) || [],
 | 
			
		||||
        background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
 | 
			
		||||
          if (!validConceptNames.has(concept.concept_name)) {
 | 
			
		||||
            console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
 | 
			
		||||
            return false;
 | 
			
		||||
          }
 | 
			
		||||
          return true;
 | 
			
		||||
        }) || []
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}, Tools: ${validatedRecommendation.recommended_tools.length}, Concepts: ${validatedRecommendation.background_knowledge?.length || 0}`);
 | 
			
		||||
    // Add processing statistics to the response for debugging/monitoring
 | 
			
		||||
    console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}`);
 | 
			
		||||
    console.log(`[AI Query] Processing stats:`, result.processingStats);
 | 
			
		||||
    console.log(`[AI Query] Tools: ${result.recommendation.recommended_tools?.length || 0}, Concepts: ${result.recommendation.background_knowledge?.length || 0}`);
 | 
			
		||||
 | 
			
		||||
    return new Response(JSON.stringify({
 | 
			
		||||
      success: true,
 | 
			
		||||
      mode,
 | 
			
		||||
      taskId,
 | 
			
		||||
      recommendation: validatedRecommendation,
 | 
			
		||||
      query: sanitizedQuery
 | 
			
		||||
      recommendation: result.recommendation,
 | 
			
		||||
      query: sanitizedQuery,
 | 
			
		||||
      processingStats: result.processingStats // Include stats for monitoring
 | 
			
		||||
    }), {
 | 
			
		||||
      status: 200,
 | 
			
		||||
      headers: { 'Content-Type': 'application/json' }
 | 
			
		||||
@ -435,6 +118,16 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    console.error('AI query error:', error);
 | 
			
		||||
    
 | 
			
		||||
    // Provide more specific error messages based on error type
 | 
			
		||||
    if (error.message.includes('embeddings')) {
 | 
			
		||||
      return apiServerError.unavailable('Embeddings service error - falling back to basic processing');
 | 
			
		||||
    } else if (error.message.includes('selector')) {
 | 
			
		||||
      return apiServerError.unavailable('AI selector service error');
 | 
			
		||||
    } else if (error.message.includes('analyzer')) {
 | 
			
		||||
      return apiServerError.unavailable('AI analyzer service error');
 | 
			
		||||
    } else {
 | 
			
		||||
      return apiServerError.internal('Internal server error');
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
							
								
								
									
										521
									
								
								src/utils/aiPipeline.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										521
									
								
								src/utils/aiPipeline.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,521 @@
 | 
			
		||||
// src/utils/aiPipeline.ts
 | 
			
		||||
import { getCompressedToolsDataForAI } from './dataService.js';
 | 
			
		||||
import { embeddingsService, type EmbeddingData } from './embeddings.js';
 | 
			
		||||
 | 
			
		||||
interface AIConfig {
 | 
			
		||||
  endpoint: string;
 | 
			
		||||
  apiKey: string;
 | 
			
		||||
  model: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface SelectionResult {
 | 
			
		||||
  selectedTools: string[];
 | 
			
		||||
  selectedConcepts: string[];
 | 
			
		||||
  reasoning: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface AnalysisResult {
 | 
			
		||||
  recommendation: any;
 | 
			
		||||
  processingStats: {
 | 
			
		||||
    embeddingsUsed: boolean;
 | 
			
		||||
    candidatesFromEmbeddings: number;
 | 
			
		||||
    finalSelectedItems: number;
 | 
			
		||||
    processingTimeMs: number;
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class AIProcessingPipeline {
 | 
			
		||||
  private selectorConfig: AIConfig;
 | 
			
		||||
  private analyzerConfig: AIConfig;
 | 
			
		||||
  private maxSelectedItems: number;
 | 
			
		||||
  private embeddingCandidates: number;
 | 
			
		||||
  private similarityThreshold: number;
 | 
			
		||||
 | 
			
		||||
  constructor() {
 | 
			
		||||
    this.selectorConfig = {
 | 
			
		||||
      endpoint: this.getEnv('AI_SELECTOR_ENDPOINT'),
 | 
			
		||||
      apiKey: this.getEnv('AI_SELECTOR_API_KEY'),
 | 
			
		||||
      model: this.getEnv('AI_SELECTOR_MODEL')
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    this.analyzerConfig = {
 | 
			
		||||
      endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
 | 
			
		||||
      apiKey: this.getEnv('AI_ANALYZER_API_KEY'),
 | 
			
		||||
      model: this.getEnv('AI_ANALYZER_MODEL')
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '15', 10);
 | 
			
		||||
    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '30', 10);
 | 
			
		||||
    this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private getEnv(key: string): string {
 | 
			
		||||
    const value = process.env[key];
 | 
			
		||||
    if (!value) {
 | 
			
		||||
      throw new Error(`Missing environment variable: ${key}`);
 | 
			
		||||
    }
 | 
			
		||||
    return value;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async callAI(config: AIConfig, messages: any[], maxTokens: number = 1000): Promise<string> {
 | 
			
		||||
    const response = await fetch(`${config.endpoint}/v1/chat/completions`, {
 | 
			
		||||
      method: 'POST',
 | 
			
		||||
      headers: {
 | 
			
		||||
        'Content-Type': 'application/json',
 | 
			
		||||
        'Authorization': `Bearer ${config.apiKey}`
 | 
			
		||||
      },
 | 
			
		||||
      body: JSON.stringify({
 | 
			
		||||
        model: config.model,
 | 
			
		||||
        messages,
 | 
			
		||||
        max_tokens: maxTokens,
 | 
			
		||||
        temperature: 0.3
 | 
			
		||||
      })
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    if (!response.ok) {
 | 
			
		||||
      const errorText = await response.text();
 | 
			
		||||
      throw new Error(`AI API error (${config.model}): ${response.status} - ${errorText}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const data = await response.json();
 | 
			
		||||
    const content = data.choices?.[0]?.message?.content;
 | 
			
		||||
    
 | 
			
		||||
    if (!content) {
 | 
			
		||||
      throw new Error(`No response from AI model: ${config.model}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return content;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private createSelectorPrompt(toolsData: any, userQuery: string, mode: string): string {
 | 
			
		||||
    const toolsList = toolsData.tools.map((tool: any) => ({
 | 
			
		||||
      name: tool.name,
 | 
			
		||||
      type: tool.type,
 | 
			
		||||
      description: tool.description.slice(0, 200) + '...',
 | 
			
		||||
      domains: tool.domains,
 | 
			
		||||
      phases: tool.phases,
 | 
			
		||||
      tags: tool.tags?.slice(0, 5) || [],
 | 
			
		||||
      skillLevel: tool.skillLevel
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
			
		||||
      name: concept.name,
 | 
			
		||||
      type: 'concept',
 | 
			
		||||
      description: concept.description.slice(0, 200) + '...',
 | 
			
		||||
      domains: concept.domains,
 | 
			
		||||
      phases: concept.phases,
 | 
			
		||||
      tags: concept.tags?.slice(0, 5) || []
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const modeInstruction = mode === 'workflow' 
 | 
			
		||||
      ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases.'
 | 
			
		||||
      : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem.';
 | 
			
		||||
 | 
			
		||||
    return `You are a DFIR expert tasked with selecting the most relevant tools and concepts for a user query.
 | 
			
		||||
 | 
			
		||||
${modeInstruction}
 | 
			
		||||
 | 
			
		||||
AVAILABLE TOOLS:
 | 
			
		||||
${JSON.stringify(toolsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
AVAILABLE CONCEPTS:
 | 
			
		||||
${JSON.stringify(conceptsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
USER QUERY: "${userQuery}"
 | 
			
		||||
 | 
			
		||||
Select the most relevant items (max ${this.maxSelectedItems} total). For workflow mode, prioritize breadth across phases. For tool mode, prioritize specificity and direct relevance.
 | 
			
		||||
 | 
			
		||||
Respond with ONLY this JSON format:
 | 
			
		||||
{
 | 
			
		||||
  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
 | 
			
		||||
  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
 | 
			
		||||
  "reasoning": "Brief explanation of selection criteria and approach"
 | 
			
		||||
}`;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async selectRelevantItems(toolsData: any, userQuery: string, mode: string): Promise<SelectionResult> {
 | 
			
		||||
    const prompt = this.createSelectorPrompt(toolsData, userQuery, mode);
 | 
			
		||||
    
 | 
			
		||||
    const messages = [
 | 
			
		||||
      { role: 'user', content: prompt }
 | 
			
		||||
    ];
 | 
			
		||||
 | 
			
		||||
    const response = await this.callAI(this.selectorConfig, messages, 1500);
 | 
			
		||||
    
 | 
			
		||||
    try {
 | 
			
		||||
      const cleaned = response.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
 | 
			
		||||
      const result = JSON.parse(cleaned);
 | 
			
		||||
      
 | 
			
		||||
      // Validate the structure
 | 
			
		||||
      if (!Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
 | 
			
		||||
        throw new Error('Invalid selection result structure');
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // Limit selections
 | 
			
		||||
      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
 | 
			
		||||
      if (totalSelected > this.maxSelectedItems) {
 | 
			
		||||
        console.warn(`[AI PIPELINE] Selection exceeded limit (${totalSelected}), truncating`);
 | 
			
		||||
        result.selectedTools = result.selectedTools.slice(0, Math.floor(this.maxSelectedItems * 0.8));
 | 
			
		||||
        result.selectedConcepts = result.selectedConcepts.slice(0, Math.ceil(this.maxSelectedItems * 0.2));
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      return result;
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[AI PIPELINE] Failed to parse selector response:', response);
 | 
			
		||||
      throw new Error('Invalid JSON response from selector AI');
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private filterDataBySelection(toolsData: any, selection: SelectionResult): any {
 | 
			
		||||
    const selectedToolNames = new Set(selection.selectedTools);
 | 
			
		||||
    const selectedConceptNames = new Set(selection.selectedConcepts);
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
      tools: toolsData.tools.filter((tool: any) => selectedToolNames.has(tool.name)),
 | 
			
		||||
      concepts: toolsData.concepts.filter((concept: any) => selectedConceptNames.has(concept.name)),
 | 
			
		||||
      domains: toolsData.domains,
 | 
			
		||||
      phases: toolsData.phases,
 | 
			
		||||
      'domain-agnostic-software': toolsData['domain-agnostic-software']
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async processWithEmbeddings(userQuery: string, toolsData: any, mode: string): Promise<{ filteredData: any; stats: any }> {
 | 
			
		||||
    console.log('[AI PIPELINE] Using embeddings for initial filtering');
 | 
			
		||||
    
 | 
			
		||||
    const similarItems = await embeddingsService.findSimilar(
 | 
			
		||||
      userQuery, 
 | 
			
		||||
      this.embeddingCandidates, 
 | 
			
		||||
      this.similarityThreshold
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    if (similarItems.length === 0) {
 | 
			
		||||
      console.log('[AI PIPELINE] No similar items found with embeddings, using full dataset');
 | 
			
		||||
      return {
 | 
			
		||||
        filteredData: toolsData,
 | 
			
		||||
        stats: { embeddingsUsed: true, candidatesFromEmbeddings: 0, fallbackToFull: true }
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Create filtered dataset from embedding results
 | 
			
		||||
    const similarToolNames = new Set();
 | 
			
		||||
    const similarConceptNames = new Set();
 | 
			
		||||
 | 
			
		||||
    similarItems.forEach(item => {
 | 
			
		||||
      if (item.type === 'tool') {
 | 
			
		||||
        similarToolNames.add(item.name);
 | 
			
		||||
      } else if (item.type === 'concept') {
 | 
			
		||||
        similarConceptNames.add(item.name);
 | 
			
		||||
      }
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    const embeddingFilteredData = {
 | 
			
		||||
      tools: toolsData.tools.filter((tool: any) => similarToolNames.has(tool.name)),
 | 
			
		||||
      concepts: toolsData.concepts.filter((concept: any) => similarConceptNames.has(concept.name)),
 | 
			
		||||
      domains: toolsData.domains,
 | 
			
		||||
      phases: toolsData.phases,
 | 
			
		||||
      'domain-agnostic-software': toolsData['domain-agnostic-software']
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    console.log(`[AI PIPELINE] Embeddings filtered to ${embeddingFilteredData.tools.length} tools, ${embeddingFilteredData.concepts.length} concepts`);
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
      filteredData: embeddingFilteredData,
 | 
			
		||||
      stats: { embeddingsUsed: true, candidatesFromEmbeddings: similarItems.length }
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async processWithoutEmbeddings(userQuery: string, toolsData: any, mode: string): Promise<{ filteredData: any; stats: any }> {
 | 
			
		||||
    console.log('[AI PIPELINE] Processing without embeddings - using selector AI');
 | 
			
		||||
    
 | 
			
		||||
    const selection = await this.selectRelevantItems(toolsData, userQuery, mode);
 | 
			
		||||
    const filteredData = this.filterDataBySelection(toolsData, selection);
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[AI PIPELINE] Selector chose ${selection.selectedTools.length} tools, ${selection.selectedConcepts.length} concepts`);
 | 
			
		||||
    console.log(`[AI PIPELINE] Selection reasoning: ${selection.reasoning}`);
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
      filteredData,
 | 
			
		||||
      stats: { embeddingsUsed: false, candidatesFromEmbeddings: 0, selectorReasoning: selection.reasoning }
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private createAnalyzerPrompt(filteredData: any, userQuery: string, mode: string): string {
 | 
			
		||||
    // Use existing prompt creation logic but with filtered data
 | 
			
		||||
    if (mode === 'workflow') {
 | 
			
		||||
      return this.createWorkflowAnalyzerPrompt(filteredData, userQuery);
 | 
			
		||||
    } else {
 | 
			
		||||
      return this.createToolAnalyzerPrompt(filteredData, userQuery);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private createWorkflowAnalyzerPrompt(toolsData: any, userQuery: string): string {
 | 
			
		||||
    const toolsList = toolsData.tools.map((tool: any) => ({
 | 
			
		||||
      name: tool.name,
 | 
			
		||||
      description: tool.description,
 | 
			
		||||
      domains: tool.domains,
 | 
			
		||||
      phases: tool.phases,
 | 
			
		||||
      domainAgnostic: tool['domain-agnostic-software'],
 | 
			
		||||
      platforms: tool.platforms,
 | 
			
		||||
      skillLevel: tool.skillLevel,
 | 
			
		||||
      license: tool.license,
 | 
			
		||||
      tags: tool.tags,
 | 
			
		||||
      related_concepts: tool.related_concepts || []
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
			
		||||
      name: concept.name,
 | 
			
		||||
      description: concept.description,
 | 
			
		||||
      domains: concept.domains,
 | 
			
		||||
      phases: concept.phases,
 | 
			
		||||
      skillLevel: concept.skillLevel,
 | 
			
		||||
      tags: concept.tags
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const regularPhases = toolsData.phases || [];
 | 
			
		||||
    const domainAgnosticSoftware = toolsData['domain-agnostic-software'] || [];
 | 
			
		||||
    const allPhaseItems = [...regularPhases, ...domainAgnosticSoftware];
 | 
			
		||||
    
 | 
			
		||||
    const phasesDescription = allPhaseItems.map((phase: any) => 
 | 
			
		||||
      `- ${phase.id}: ${phase.name}`
 | 
			
		||||
    ).join('\n');
 | 
			
		||||
 | 
			
		||||
    const domainsDescription = toolsData.domains.map((domain: any) => 
 | 
			
		||||
      `- ${domain.id}: ${domain.name}`
 | 
			
		||||
    ).join('\n');
 | 
			
		||||
 | 
			
		||||
    const validPhases = [...regularPhases.map((p: any) => p.id), ...domainAgnosticSoftware.map((s: any) => s.id)].join('|');
 | 
			
		||||
 | 
			
		||||
    return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte. Du erhältst eine vorgefilterte Auswahl relevanter Tools und Konzepte und sollst daraus eine optimale Empfehlung erstellen.
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE TOOLS/METHODEN (VORGEFILTERT):
 | 
			
		||||
${JSON.stringify(toolsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE KONZEPTE (VORGEFILTERT):
 | 
			
		||||
${JSON.stringify(conceptsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
UNTERSUCHUNGSPHASEN:
 | 
			
		||||
${phasesDescription}
 | 
			
		||||
 | 
			
		||||
FORENSISCHE DOMÄNEN:
 | 
			
		||||
${domainsDescription}
 | 
			
		||||
 | 
			
		||||
WICHTIGE REGELN:
 | 
			
		||||
1. Pro Phase 2-3 Tools/Methoden empfehlen (immer mindestens 2 wenn verfügbar)
 | 
			
		||||
2. Tools/Methoden können in MEHREREN Phasen empfohlen werden wenn sinnvoll
 | 
			
		||||
3. Für Reporting-Phase: Visualisierungs- und Dokumentationssoftware einschließen
 | 
			
		||||
4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug
 | 
			
		||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
			
		||||
6. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software
 | 
			
		||||
7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an, wenn proprietäre Software besser geeignet ist
 | 
			
		||||
8. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
			
		||||
9. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
			
		||||
 | 
			
		||||
ENHANCED CONTEXTUAL ANALYSIS:
 | 
			
		||||
10. Analysiere das Szenario detailliert und identifiziere Schlüsselelemente, Bedrohungen und forensische Herausforderungen
 | 
			
		||||
11. Entwickle einen strategischen Untersuchungsansatz basierend auf dem spezifischen Szenario
 | 
			
		||||
12. Identifiziere zeitkritische oder besonders wichtige Faktoren für diesen Fall
 | 
			
		||||
 | 
			
		||||
USER QUERY: "${userQuery}"
 | 
			
		||||
 | 
			
		||||
ANTWORT-FORMAT (strict JSON):
 | 
			
		||||
{
 | 
			
		||||
  "scenario_analysis": "Detaillierte Analyse des Szenarios: Erkannte Schlüsselelemente, Art des Vorfalls, betroffene Systeme, potentielle Bedrohungen und forensische Herausforderungen",
 | 
			
		||||
  "investigation_approach": "Strategischer Untersuchungsansatz für dieses spezifische Szenario: Prioritäten, Reihenfolge der Phasen, besondere Überlegungen",
 | 
			
		||||
  "critical_considerations": "Zeitkritische Faktoren, wichtige Sicherheitsaspekte oder besondere Vorsichtsmaßnahmen für diesen Fall",
 | 
			
		||||
  "recommended_tools": [
 | 
			
		||||
    {
 | 
			
		||||
      "name": "EXAKTER Name aus der Tools-Database",
 | 
			
		||||
      "priority": "high|medium|low", 
 | 
			
		||||
      "phase": "${validPhases}",
 | 
			
		||||
      "justification": "Warum diese Methode für diese Phase und dieses spezifische Szenario geeignet ist - mit Bezug zu den erkannten Schlüsselelementen"
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "workflow_suggestion": "Vorgeschlagener Untersuchungsablauf mit konkreten Schritten für dieses Szenario",
 | 
			
		||||
  "background_knowledge": [
 | 
			
		||||
    {
 | 
			
		||||
      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
			
		||||
      "relevance": "Warum dieses Konzept für das Szenario relevant ist, und bei welchen der empfohlenen Methoden/Tools."
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "additional_notes": "Wichtige Überlegungen und Hinweise"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private createToolAnalyzerPrompt(toolsData: any, userQuery: string): string {
 | 
			
		||||
    const toolsList = toolsData.tools.map((tool: any) => ({
 | 
			
		||||
      name: tool.name,
 | 
			
		||||
      description: tool.description,
 | 
			
		||||
      domains: tool.domains,
 | 
			
		||||
      phases: tool.phases,
 | 
			
		||||
      platforms: tool.platforms,
 | 
			
		||||
      skillLevel: tool.skillLevel,
 | 
			
		||||
      license: tool.license,
 | 
			
		||||
      tags: tool.tags,
 | 
			
		||||
      url: tool.url,
 | 
			
		||||
      projectUrl: tool.projectUrl,
 | 
			
		||||
      related_concepts: tool.related_concepts || []
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
			
		||||
      name: concept.name,
 | 
			
		||||
      description: concept.description,
 | 
			
		||||
      domains: concept.domains,
 | 
			
		||||
      phases: concept.phases,
 | 
			
		||||
      skillLevel: concept.skillLevel,
 | 
			
		||||
      tags: concept.tags
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte. Du erhältst eine vorgefilterte Auswahl relevanter Tools und Konzepte und sollst daraus 1-3 optimale Empfehlungen für ein spezifisches Problem erstellen.
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE TOOLS/METHODEN (VORGEFILTERT):
 | 
			
		||||
${JSON.stringify(toolsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE KONZEPTE (VORGEFILTERT):
 | 
			
		||||
${JSON.stringify(conceptsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
WICHTIGE REGELN:
 | 
			
		||||
1. Analysiere das spezifische Problem/die Anforderung sorgfältig
 | 
			
		||||
2. Empfehle 1-3 Methoden/Tools, sortiert nach Eignung (beste Empfehlung zuerst)
 | 
			
		||||
3. Gib detaillierte Erklärungen, WARUM und WIE jede Methode/Tool das Problem löst
 | 
			
		||||
4. Berücksichtige praktische Aspekte: Skill Level, Plattformen, Verfügbarkeit
 | 
			
		||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
			
		||||
6. Gib konkrete Anwendungshinweise, nicht nur allgemeine Beschreibungen
 | 
			
		||||
7. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software
 | 
			
		||||
8. Erwähne sowohl Stärken als auch Schwächen/Limitationen
 | 
			
		||||
9. Schlage alternative Ansätze vor, wenn sinnvoll
 | 
			
		||||
10. Gib grundsätzliche Hinweise, WIE die Methode/Tool konkret eingesetzt wird
 | 
			
		||||
11. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
			
		||||
12. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
			
		||||
 | 
			
		||||
ENHANCED CONTEXTUAL ANALYSIS:
 | 
			
		||||
13. Analysiere das Problem detailliert und identifiziere technische Anforderungen, Herausforderungen und Erfolgsfaktoren
 | 
			
		||||
14. Entwickle einen strategischen Lösungsansatz basierend auf dem spezifischen Problem
 | 
			
		||||
15. Identifiziere wichtige Voraussetzungen oder Warnungen für die Anwendung
 | 
			
		||||
 | 
			
		||||
USER QUERY: "${userQuery}"
 | 
			
		||||
 | 
			
		||||
ANTWORT-FORMAT (strict JSON):
 | 
			
		||||
{
 | 
			
		||||
  "problem_analysis": "Detaillierte Analyse des Problems: Erkannte technische Anforderungen, Herausforderungen, benötigte Fähigkeiten und Erfolgsfaktoren",
 | 
			
		||||
  "investigation_approach": "Strategischer Lösungsansatz für dieses spezifische Problem: Herangehensweise, Prioritäten, optimale Anwendungsreihenfolge",
 | 
			
		||||
  "critical_considerations": "Wichtige Voraussetzungen, potentielle Fallstricke oder Warnungen für die Anwendung der empfohlenen Lösungen",
 | 
			
		||||
  "recommended_tools": [
 | 
			
		||||
    {
 | 
			
		||||
      "name": "EXAKTER Name aus der Tools-Database",
 | 
			
		||||
      "rank": 1,
 | 
			
		||||
      "suitability_score": "high|medium|low",
 | 
			
		||||
      "detailed_explanation": "Detaillierte Erklärung, warum dieses Tool/diese Methode das spezifische Problem löst - mit Bezug zu den erkannten Anforderungen",
 | 
			
		||||
      "implementation_approach": "Konkrete Schritte/Ansatz zur Anwendung für dieses spezifische Problem",
 | 
			
		||||
      "pros": ["Spezifische Vorteile für diesen Anwendungsfall", "Weitere Vorteile"],
 | 
			
		||||
      "cons": ["Potentielle Nachteile oder Limitationen", "Weitere Einschränkungen"],
 | 
			
		||||
      "alternatives": "Alternative Ansätze oder ergänzende Tools/Methoden, falls relevant"
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "background_knowledge": [
 | 
			
		||||
    {
 | 
			
		||||
      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
			
		||||
      "relevance": "Warum dieses Konzept für die empfohlenen Tools/das Problem relevant ist, und für welche der empfohlenen Methoden/Tools."
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "additional_considerations": "Wichtige Überlegungen, Voraussetzungen oder Warnungen"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
 | 
			
		||||
    const startTime = Date.now();
 | 
			
		||||
    console.log(`[AI PIPELINE] Starting ${mode} query processing`);
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      // Load full dataset
 | 
			
		||||
      const toolsData = await getCompressedToolsDataForAI();
 | 
			
		||||
      
 | 
			
		||||
      let filteredData: any;
 | 
			
		||||
      let processingStats: any = {
 | 
			
		||||
        embeddingsUsed: false,
 | 
			
		||||
        candidatesFromEmbeddings: 0,
 | 
			
		||||
        finalSelectedItems: 0,
 | 
			
		||||
        processingTimeMs: 0
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      // Stage 1: Filter candidates (embeddings or selector AI)
 | 
			
		||||
      if (embeddingsService.isEnabled()) {
 | 
			
		||||
        const result = await this.processWithEmbeddings(userQuery, toolsData, mode);
 | 
			
		||||
        filteredData = result.filteredData;
 | 
			
		||||
        processingStats = { ...processingStats, ...result.stats };
 | 
			
		||||
      } else {
 | 
			
		||||
        const result = await this.processWithoutEmbeddings(userQuery, toolsData, mode);
 | 
			
		||||
        filteredData = result.filteredData;
 | 
			
		||||
        processingStats = { ...processingStats, ...result.stats };
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // Stage 2: Generate detailed analysis with analyzer AI
 | 
			
		||||
      console.log('[AI PIPELINE] Stage 2: Generating detailed analysis');
 | 
			
		||||
      const analyzerPrompt = this.createAnalyzerPrompt(filteredData, userQuery, mode);
 | 
			
		||||
      
 | 
			
		||||
      const messages = [
 | 
			
		||||
        { role: 'user', content: analyzerPrompt }
 | 
			
		||||
      ];
 | 
			
		||||
 | 
			
		||||
      const analysisResponse = await this.callAI(this.analyzerConfig, messages, 3500);
 | 
			
		||||
      
 | 
			
		||||
      // Parse the response
 | 
			
		||||
      let recommendation;
 | 
			
		||||
      try {
 | 
			
		||||
        const cleanedContent = analysisResponse.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
 | 
			
		||||
        recommendation = JSON.parse(cleanedContent);
 | 
			
		||||
      } catch (error) {
 | 
			
		||||
        console.error('[AI PIPELINE] Failed to parse analysis response:', analysisResponse);
 | 
			
		||||
        throw new Error('Invalid JSON response from analyzer AI');
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // Validate tool/concept names exist in filtered data
 | 
			
		||||
      const validToolNames = new Set(filteredData.tools.map((t: any) => t.name));
 | 
			
		||||
      const validConceptNames = new Set(filteredData.concepts.map((c: any) => c.name));
 | 
			
		||||
      
 | 
			
		||||
      if (recommendation.recommended_tools) {
 | 
			
		||||
        recommendation.recommended_tools = recommendation.recommended_tools.filter((tool: any) => {
 | 
			
		||||
          if (!validToolNames.has(tool.name)) {
 | 
			
		||||
            console.warn(`[AI PIPELINE] Analyzer recommended unknown tool: ${tool.name}`);
 | 
			
		||||
            return false;
 | 
			
		||||
          }
 | 
			
		||||
          return true;
 | 
			
		||||
        });
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      if (recommendation.background_knowledge) {
 | 
			
		||||
        recommendation.background_knowledge = recommendation.background_knowledge.filter((concept: any) => {
 | 
			
		||||
          if (!validConceptNames.has(concept.concept_name)) {
 | 
			
		||||
            console.warn(`[AI PIPELINE] Analyzer referenced unknown concept: ${concept.concept_name}`);
 | 
			
		||||
            return false;
 | 
			
		||||
          }
 | 
			
		||||
          return true;
 | 
			
		||||
        });
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      processingStats.finalSelectedItems = (recommendation.recommended_tools?.length || 0) + 
 | 
			
		||||
                                          (recommendation.background_knowledge?.length || 0);
 | 
			
		||||
      processingStats.processingTimeMs = Date.now() - startTime;
 | 
			
		||||
 | 
			
		||||
      console.log(`[AI PIPELINE] Completed in ${processingStats.processingTimeMs}ms`);
 | 
			
		||||
      console.log(`[AI PIPELINE] Final recommendations: ${recommendation.recommended_tools?.length || 0} tools, ${recommendation.background_knowledge?.length || 0} concepts`);
 | 
			
		||||
 | 
			
		||||
      return {
 | 
			
		||||
        recommendation,
 | 
			
		||||
        processingStats
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[AI PIPELINE] Processing failed:', error);
 | 
			
		||||
      throw error;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Global instance
 | 
			
		||||
const aiPipeline = new AIProcessingPipeline();
 | 
			
		||||
 | 
			
		||||
export { aiPipeline, type AnalysisResult };
 | 
			
		||||
@ -21,7 +21,7 @@ const ToolSchema = z.object({
 | 
			
		||||
  accessType: z.string().optional().nullable(),
 | 
			
		||||
  'domain-agnostic-software': z.array(z.string()).optional().nullable(),
 | 
			
		||||
  related_concepts: z.array(z.string()).optional().nullable().default([]),
 | 
			
		||||
  related_software: z.array(z.string()).optional().nullable().default([]), // Added this line
 | 
			
		||||
  related_software: z.array(z.string()).optional().nullable().default([]),
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
const ToolsDataSchema = z.object({
 | 
			
		||||
@ -67,6 +67,7 @@ let cachedData: ToolsData | null = null;
 | 
			
		||||
let cachedRandomizedData: ToolsData | null = null;
 | 
			
		||||
let cachedCompressedData: CompressedToolsData | null = null;
 | 
			
		||||
let lastRandomizationDate: string | null = null;
 | 
			
		||||
let dataVersion: string | null = null; // Add version tracking for embeddings
 | 
			
		||||
 | 
			
		||||
function seededRandom(seed: number): () => number {
 | 
			
		||||
  let x = Math.sin(seed) * 10000;
 | 
			
		||||
@ -91,6 +92,18 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
 | 
			
		||||
  return shuffled;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Generate a simple hash of the data for version tracking
 | 
			
		||||
function generateDataVersion(data: any): string {
 | 
			
		||||
  const str = JSON.stringify(data, Object.keys(data).sort());
 | 
			
		||||
  let hash = 0;
 | 
			
		||||
  for (let i = 0; i < str.length; i++) {
 | 
			
		||||
    const char = str.charCodeAt(i);
 | 
			
		||||
    hash = ((hash << 5) - hash) + char;
 | 
			
		||||
    hash = hash & hash; // Convert to 32-bit integer
 | 
			
		||||
  }
 | 
			
		||||
  return Math.abs(hash).toString(36);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function loadRawData(): Promise<ToolsData> {
 | 
			
		||||
  if (!cachedData) {
 | 
			
		||||
    const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
 | 
			
		||||
@ -99,6 +112,11 @@ async function loadRawData(): Promise<ToolsData> {
 | 
			
		||||
    
 | 
			
		||||
    try {
 | 
			
		||||
      cachedData = ToolsDataSchema.parse(rawData);
 | 
			
		||||
      
 | 
			
		||||
      // Generate data version for embeddings tracking
 | 
			
		||||
      dataVersion = generateDataVersion(cachedData);
 | 
			
		||||
      console.log(`[DATA SERVICE] Loaded data version: ${dataVersion}`);
 | 
			
		||||
      
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('YAML validation failed:', error);
 | 
			
		||||
      throw new Error('Invalid tools.yaml structure');
 | 
			
		||||
@ -124,6 +142,7 @@ export async function getToolsData(): Promise<ToolsData> {
 | 
			
		||||
    
 | 
			
		||||
    lastRandomizationDate = today;
 | 
			
		||||
    
 | 
			
		||||
    // Clear compressed cache when randomized data changes
 | 
			
		||||
    cachedCompressedData = null;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
@ -156,14 +175,23 @@ export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData
 | 
			
		||||
      'domain-agnostic-software': data['domain-agnostic-software']
 | 
			
		||||
      // scenarios intentionally excluded from AI data
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[DATA SERVICE] Generated compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return cachedCompressedData;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function getDataVersion(): string | null {
 | 
			
		||||
  return dataVersion;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function clearCache(): void {
 | 
			
		||||
  cachedData = null;
 | 
			
		||||
  cachedRandomizedData = null;
 | 
			
		||||
  cachedCompressedData = null;
 | 
			
		||||
  lastRandomizationDate = null;
 | 
			
		||||
  dataVersion = null;
 | 
			
		||||
  
 | 
			
		||||
  console.log('[DATA SERVICE] Cache cleared');
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										259
									
								
								src/utils/embeddings.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										259
									
								
								src/utils/embeddings.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,259 @@
 | 
			
		||||
// src/utils/embeddings.ts
 | 
			
		||||
import { promises as fs } from 'fs';
 | 
			
		||||
import path from 'path';
 | 
			
		||||
import { getCompressedToolsDataForAI } from './dataService.js';
 | 
			
		||||
 | 
			
		||||
interface EmbeddingData {
 | 
			
		||||
  id: string;
 | 
			
		||||
  type: 'tool' | 'concept';
 | 
			
		||||
  name: string;
 | 
			
		||||
  content: string;
 | 
			
		||||
  embedding: number[];
 | 
			
		||||
  metadata: {
 | 
			
		||||
    domains?: string[];
 | 
			
		||||
    phases?: string[];
 | 
			
		||||
    tags?: string[];
 | 
			
		||||
    skillLevel?: string;
 | 
			
		||||
    type?: string;
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface EmbeddingsDatabase {
 | 
			
		||||
  version: string;
 | 
			
		||||
  lastUpdated: number;
 | 
			
		||||
  embeddings: EmbeddingData[];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class EmbeddingsService {
 | 
			
		||||
  private embeddings: EmbeddingData[] = [];
 | 
			
		||||
  private isInitialized = false;
 | 
			
		||||
  private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
 | 
			
		||||
  private readonly batchSize: number;
 | 
			
		||||
  private readonly batchDelay: number;
 | 
			
		||||
  private readonly enabled: boolean;
 | 
			
		||||
 | 
			
		||||
  constructor() {
 | 
			
		||||
    this.enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
 | 
			
		||||
    this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
 | 
			
		||||
    this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async initialize(): Promise<void> {
 | 
			
		||||
    if (!this.enabled) {
 | 
			
		||||
      console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      console.log('[EMBEDDINGS] Initializing embeddings system...');
 | 
			
		||||
      
 | 
			
		||||
      // Create data directory if it doesn't exist
 | 
			
		||||
      await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
 | 
			
		||||
      
 | 
			
		||||
      const toolsData = await getCompressedToolsDataForAI();
 | 
			
		||||
      const currentDataHash = this.hashData(toolsData);
 | 
			
		||||
      
 | 
			
		||||
      // Try to load existing embeddings
 | 
			
		||||
      const existingEmbeddings = await this.loadEmbeddings();
 | 
			
		||||
      
 | 
			
		||||
      if (existingEmbeddings && existingEmbeddings.version === currentDataHash) {
 | 
			
		||||
        console.log('[EMBEDDINGS] Using cached embeddings');
 | 
			
		||||
        this.embeddings = existingEmbeddings.embeddings;
 | 
			
		||||
      } else {
 | 
			
		||||
        console.log('[EMBEDDINGS] Generating new embeddings...');
 | 
			
		||||
        await this.generateEmbeddings(toolsData, currentDataHash);
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      this.isInitialized = true;
 | 
			
		||||
      console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings`);
 | 
			
		||||
      
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[EMBEDDINGS] Failed to initialize:', error);
 | 
			
		||||
      this.isInitialized = false;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private hashData(data: any): string {
 | 
			
		||||
    return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
 | 
			
		||||
    try {
 | 
			
		||||
      const data = await fs.readFile(this.embeddingsPath, 'utf8');
 | 
			
		||||
      return JSON.parse(data);
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.log('[EMBEDDINGS] No existing embeddings found');
 | 
			
		||||
      return null;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async saveEmbeddings(version: string): Promise<void> {
 | 
			
		||||
    const database: EmbeddingsDatabase = {
 | 
			
		||||
      version,
 | 
			
		||||
      lastUpdated: Date.now(),
 | 
			
		||||
      embeddings: this.embeddings
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
 | 
			
		||||
    console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private createContentString(item: any): string {
 | 
			
		||||
    const parts = [
 | 
			
		||||
      item.name,
 | 
			
		||||
      item.description || '',
 | 
			
		||||
      ...(item.tags || []),
 | 
			
		||||
      ...(item.domains || []),
 | 
			
		||||
      ...(item.phases || [])
 | 
			
		||||
    ];
 | 
			
		||||
    
 | 
			
		||||
    return parts.filter(Boolean).join(' ').toLowerCase();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
 | 
			
		||||
    const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
 | 
			
		||||
    const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
 | 
			
		||||
    const model = process.env.AI_EMBEDDINGS_MODEL;
 | 
			
		||||
 | 
			
		||||
    if (!endpoint || !apiKey || !model) {
 | 
			
		||||
      throw new Error('Missing embeddings API configuration');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const response = await fetch(endpoint, {
 | 
			
		||||
      method: 'POST',
 | 
			
		||||
      headers: {
 | 
			
		||||
        'Content-Type': 'application/json',
 | 
			
		||||
        'Authorization': `Bearer ${apiKey}`
 | 
			
		||||
      },
 | 
			
		||||
      body: JSON.stringify({
 | 
			
		||||
        model,
 | 
			
		||||
        input: contents
 | 
			
		||||
      })
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    if (!response.ok) {
 | 
			
		||||
      const error = await response.text();
 | 
			
		||||
      throw new Error(`Embeddings API error: ${response.status} - ${error}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const data = await response.json();
 | 
			
		||||
    return data.data.map((item: any) => item.embedding);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async generateEmbeddings(toolsData: any, version: string): Promise<void> {
 | 
			
		||||
    const allItems = [
 | 
			
		||||
      ...toolsData.tools.map((tool: any) => ({ ...tool, type: 'tool' })),
 | 
			
		||||
      ...toolsData.concepts.map((concept: any) => ({ ...concept, type: 'concept' }))
 | 
			
		||||
    ];
 | 
			
		||||
 | 
			
		||||
    const contents = allItems.map(item => this.createContentString(item));
 | 
			
		||||
    this.embeddings = [];
 | 
			
		||||
 | 
			
		||||
    // Process in batches to respect rate limits
 | 
			
		||||
    for (let i = 0; i < contents.length; i += this.batchSize) {
 | 
			
		||||
      const batch = contents.slice(i, i + this.batchSize);
 | 
			
		||||
      const batchItems = allItems.slice(i, i + this.batchSize);
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
 | 
			
		||||
      
 | 
			
		||||
      try {
 | 
			
		||||
        const embeddings = await this.generateEmbeddingsBatch(batch);
 | 
			
		||||
        
 | 
			
		||||
        embeddings.forEach((embedding, index) => {
 | 
			
		||||
          const item = batchItems[index];
 | 
			
		||||
          this.embeddings.push({
 | 
			
		||||
            id: `${item.type}_${item.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
 | 
			
		||||
            type: item.type,
 | 
			
		||||
            name: item.name,
 | 
			
		||||
            content: batch[index],
 | 
			
		||||
            embedding,
 | 
			
		||||
            metadata: {
 | 
			
		||||
              domains: item.domains,
 | 
			
		||||
              phases: item.phases,
 | 
			
		||||
              tags: item.tags,
 | 
			
		||||
              skillLevel: item.skillLevel,
 | 
			
		||||
              type: item.type
 | 
			
		||||
            }
 | 
			
		||||
          });
 | 
			
		||||
        });
 | 
			
		||||
        
 | 
			
		||||
        // Rate limiting delay between batches
 | 
			
		||||
        if (i + this.batchSize < contents.length) {
 | 
			
		||||
          await new Promise(resolve => setTimeout(resolve, this.batchDelay));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
      } catch (error) {
 | 
			
		||||
        console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
 | 
			
		||||
        throw error;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    await this.saveEmbeddings(version);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private cosineSimilarity(a: number[], b: number[]): number {
 | 
			
		||||
    let dotProduct = 0;
 | 
			
		||||
    let normA = 0;
 | 
			
		||||
    let normB = 0;
 | 
			
		||||
    
 | 
			
		||||
    for (let i = 0; i < a.length; i++) {
 | 
			
		||||
      dotProduct += a[i] * b[i];
 | 
			
		||||
      normA += a[i] * a[i];
 | 
			
		||||
      normB += b[i] * b[i];
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<EmbeddingData[]> {
 | 
			
		||||
    if (!this.enabled || !this.isInitialized || this.embeddings.length === 0) {
 | 
			
		||||
      return [];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      // Generate embedding for query
 | 
			
		||||
      const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
 | 
			
		||||
      const queryEmbedding = queryEmbeddings[0];
 | 
			
		||||
 | 
			
		||||
      // Calculate similarities
 | 
			
		||||
      const similarities = this.embeddings.map(item => ({
 | 
			
		||||
        ...item,
 | 
			
		||||
        similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
 | 
			
		||||
      }));
 | 
			
		||||
 | 
			
		||||
      // Filter by threshold and sort by similarity
 | 
			
		||||
      return similarities
 | 
			
		||||
        .filter(item => item.similarity >= threshold)
 | 
			
		||||
        .sort((a, b) => b.similarity - a.similarity)
 | 
			
		||||
        .slice(0, maxResults);
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[EMBEDDINGS] Failed to find similar items:', error);
 | 
			
		||||
      return [];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  isEnabled(): boolean {
 | 
			
		||||
    return this.enabled && this.isInitialized;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  getStats(): { enabled: boolean; initialized: boolean; count: number } {
 | 
			
		||||
    return {
 | 
			
		||||
      enabled: this.enabled,
 | 
			
		||||
      initialized: this.isInitialized,
 | 
			
		||||
      count: this.embeddings.length
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Global instance
 | 
			
		||||
const embeddingsService = new EmbeddingsService();
 | 
			
		||||
 | 
			
		||||
export { embeddingsService, type EmbeddingData };
 | 
			
		||||
 | 
			
		||||
// Auto-initialize on import in server environment
 | 
			
		||||
if (typeof window === 'undefined' && process.env.NODE_ENV !== 'test') {
 | 
			
		||||
  embeddingsService.initialize().catch(error => {
 | 
			
		||||
    console.error('[EMBEDDINGS] Auto-initialization failed:', error);
 | 
			
		||||
  });
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user