try work with embeddings
This commit is contained in:
parent
f2423b2158
commit
5f190fbf02
@ -226,6 +226,66 @@ tools:
|
|||||||
- TheHive 5
|
- TheHive 5
|
||||||
- Cortex
|
- Cortex
|
||||||
- OpenCTI
|
- OpenCTI
|
||||||
|
- name: DFIR-IRIS
|
||||||
|
icon: 🌺
|
||||||
|
type: software
|
||||||
|
description: >-
|
||||||
|
Collaborative Incident Response Management Platform für strukturierte
|
||||||
|
DFIR-Case-Organisation. Zentralisiert alle Aspekte einer Untersuchung:
|
||||||
|
Assets, IOCs, Tasks, Timeline, Evidence-Tracking. Multi-User-Environment
|
||||||
|
mit granularen Permissions für verschiedene Analysten-Rollen. Besonders
|
||||||
|
wertvoll: Case-Templates standardisieren Workflows, automatische IOC-
|
||||||
|
Enrichment via MISP/OpenCTI, integrierte Timeline-Visualisierung,
|
||||||
|
Evidence-Chain-of-Custody-Tracking. Plugin-System erweitert für Custom-
|
||||||
|
Integrations. RESTful API für Tool-Orchestrierung. Dashboard zeigt Case-
|
||||||
|
Status und Team-Workload. Notes-System dokumentiert Findings strukturiert.
|
||||||
|
Reporting-Engine generiert Executive-Summaries. Die Web-basierte
|
||||||
|
Architektur skaliert von kleinen Teams bis Enterprise-SOCs. Docker-
|
||||||
|
Deployment vereinfacht Installation. Besonders stark bei komplexen,
|
||||||
|
langwierigen Ermittlungen mit mehreren Beteiligten. Open-Source
|
||||||
|
Alternative zu kommerziellen Case-Management-Systemen.
|
||||||
|
domains:
|
||||||
|
- incident-response
|
||||||
|
- static-investigations
|
||||||
|
- malware-analysis
|
||||||
|
- fraud-investigation
|
||||||
|
- network-forensics
|
||||||
|
- mobile-forensics
|
||||||
|
- cloud-forensics
|
||||||
|
phases:
|
||||||
|
- data-collection
|
||||||
|
- examination
|
||||||
|
- analysis
|
||||||
|
- reporting
|
||||||
|
platforms:
|
||||||
|
- Web
|
||||||
|
related_software:
|
||||||
|
- TheHive 5
|
||||||
|
- MISP
|
||||||
|
- OpenCTI
|
||||||
|
domain-agnostic-software:
|
||||||
|
- collaboration-general
|
||||||
|
skillLevel: intermediate
|
||||||
|
accessType: server-based
|
||||||
|
url: https://dfir-iris.org/
|
||||||
|
projectUrl: ''
|
||||||
|
license: LGPL-3.0
|
||||||
|
knowledgebase: false
|
||||||
|
tags:
|
||||||
|
- web-interface
|
||||||
|
- case-management
|
||||||
|
- collaboration
|
||||||
|
- multi-user-support
|
||||||
|
- api
|
||||||
|
- workflow
|
||||||
|
- timeline-view
|
||||||
|
- ioc-tracking
|
||||||
|
- evidence-management
|
||||||
|
- reporting
|
||||||
|
- plugin-support
|
||||||
|
- docker-ready
|
||||||
|
related_concepts:
|
||||||
|
- Digital Evidence Chain of Custody
|
||||||
- name: Timesketch
|
- name: Timesketch
|
||||||
icon: ⏱️
|
icon: ⏱️
|
||||||
type: software
|
type: software
|
||||||
|
18
src/pages/api/ai/embeddings-status.ts
Normal file
18
src/pages/api/ai/embeddings-status.ts
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
import type { APIRoute } from 'astro';
|
||||||
|
import { isEmbeddingsEnabled } from '../../../utils/embeddingsService.js';
|
||||||
|
import { apiResponse } from '../../../utils/api.js';
|
||||||
|
|
||||||
|
export const prerender = false;
|
||||||
|
|
||||||
|
export const GET: APIRoute = async () => {
|
||||||
|
const embeddingsEnabled = isEmbeddingsEnabled();
|
||||||
|
|
||||||
|
return apiResponse.success({
|
||||||
|
embeddingsEnabled,
|
||||||
|
endpointConfigured: !!process.env.AI_EMBEDDINGS_ENDPOINT,
|
||||||
|
apiKeyConfigured: !!process.env.AI_EMBEDDINGS_API_KEY,
|
||||||
|
model: process.env.AI_EMBEDDINGS_MODEL || 'mistral-embed',
|
||||||
|
maxCandidates: parseInt(process.env.AI_EMBEDDING_CANDIDATES || '30'),
|
||||||
|
similarityThreshold: parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3')
|
||||||
|
});
|
||||||
|
};
|
@ -1,23 +1,15 @@
|
|||||||
// src/pages/api/ai/enhance-input.ts
|
|
||||||
import type { APIRoute } from 'astro';
|
import type { APIRoute } from 'astro';
|
||||||
import { withAPIAuth } from '../../../utils/auth.js';
|
import { withAPIAuth } from '../../../utils/auth.js';
|
||||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
||||||
|
import { callAI, getAnalyzerConfig } from '../../../utils/aiService.js';
|
||||||
|
import { createEnhancementPrompt } from '../../../utils/aiPrompts.js';
|
||||||
|
|
||||||
export const prerender = false;
|
export const prerender = false;
|
||||||
|
|
||||||
function getEnv(key: string): string {
|
|
||||||
const value = process.env[key];
|
|
||||||
if (!value) {
|
|
||||||
throw new Error(`Missing environment variable: ${key}`);
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
const AI_MODEL = getEnv('AI_MODEL');
|
|
||||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
||||||
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
|
const RATE_LIMIT_WINDOW = 60 * 1000;
|
||||||
const RATE_LIMIT_MAX = 5; // 5 enhancement requests per minute per user
|
const RATE_LIMIT_MAX = 5;
|
||||||
|
|
||||||
function sanitizeInput(input: string): string {
|
function sanitizeInput(input: string): string {
|
||||||
return input
|
return input
|
||||||
@ -26,7 +18,7 @@ function sanitizeInput(input: string): string {
|
|||||||
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
||||||
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
||||||
.trim()
|
.trim()
|
||||||
.slice(0, 1000); // Shorter limit for enhancement
|
.slice(0, 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
function checkRateLimit(userId: string): boolean {
|
function checkRateLimit(userId: string): boolean {
|
||||||
@ -55,33 +47,8 @@ function cleanupExpiredRateLimits() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clean up expired limits every 5 minutes
|
|
||||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||||
|
|
||||||
function createEnhancementPrompt(input: string): string {
|
|
||||||
return `
|
|
||||||
Du bist eine KI für digitale Forensik. Der Nutzer beschreibt ein forensisches Szenario. Analysiere die Eingabe.
|
|
||||||
|
|
||||||
Wenn die Beschreibung unvollständig oder vage ist, stelle bis zu drei präzise Rückfragen im JSON-Array-Format, um wichtige Details zu klären (z. B. Vorfalltyp, System, Ziel, Datenquellen, Zeit, Beteiligte, rechtlicher Rahmen).
|
|
||||||
|
|
||||||
Wenn die Eingabe bereits klar, spezifisch und vollständig ist, gib stattdessen nur eine leere Liste [] zurück.
|
|
||||||
|
|
||||||
Antwortformat strikt:
|
|
||||||
|
|
||||||
\`\`\`json
|
|
||||||
[
|
|
||||||
"Frage 1?",
|
|
||||||
"Frage 2?",
|
|
||||||
"Frage 3?"
|
|
||||||
]
|
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
Nutzer-Eingabe:
|
|
||||||
${input}
|
|
||||||
`.trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
export const POST: APIRoute = async ({ request }) => {
|
export const POST: APIRoute = async ({ request }) => {
|
||||||
try {
|
try {
|
||||||
const authResult = await withAPIAuth(request, 'ai');
|
const authResult = await withAPIAuth(request, 'ai');
|
||||||
@ -110,38 +77,31 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
const systemPrompt = createEnhancementPrompt(sanitizedInput);
|
const systemPrompt = createEnhancementPrompt(sanitizedInput);
|
||||||
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
|
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
|
||||||
|
|
||||||
|
console.log(`[ENHANCEMENT] Starting for user ${userId}, input length: ${sanitizedInput.length}`);
|
||||||
|
|
||||||
|
const analyzerConfig = getAnalyzerConfig();
|
||||||
|
|
||||||
|
let aiContent;
|
||||||
|
try {
|
||||||
const aiResponse = await enqueueApiCall(() =>
|
const aiResponse = await enqueueApiCall(() =>
|
||||||
fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
|
callAI(analyzerConfig, [
|
||||||
method: 'POST',
|
{ role: 'user', content: systemPrompt }
|
||||||
headers: {
|
], 200, 0.1), taskId); // Lower temperature for consistent JSON
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Authorization': `Bearer ${process.env.AI_API_KEY}`
|
|
||||||
},
|
|
||||||
body: JSON.stringify({
|
|
||||||
model: AI_MODEL,
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: 'user',
|
|
||||||
content: systemPrompt
|
|
||||||
}
|
|
||||||
],
|
|
||||||
max_tokens: 200,
|
|
||||||
temperature: 0.7
|
|
||||||
})
|
|
||||||
}), taskId);
|
|
||||||
|
|
||||||
if (!aiResponse.ok) {
|
aiContent = aiResponse.choices?.[0]?.message?.content;
|
||||||
console.error('AI enhancement error:', await aiResponse.text());
|
|
||||||
return apiServerError.unavailable('Enhancement service unavailable');
|
|
||||||
}
|
|
||||||
|
|
||||||
const aiData = await aiResponse.json();
|
|
||||||
const aiContent = aiData.choices?.[0]?.message?.content;
|
|
||||||
|
|
||||||
if (!aiContent) {
|
if (!aiContent) {
|
||||||
|
console.error(`[ENHANCEMENT] No AI response content for task ${taskId}`);
|
||||||
return apiServerError.unavailable('No enhancement response');
|
return apiServerError.unavailable('No enhancement response');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(`[ENHANCEMENT] Received AI response for task ${taskId}, length: ${aiContent.length}`);
|
||||||
|
|
||||||
|
} catch (apiError) {
|
||||||
|
console.error(`[ENHANCEMENT] AI API call failed for task ${taskId}:`, apiError);
|
||||||
|
return apiServerError.unavailable('AI service temporarily unavailable');
|
||||||
|
}
|
||||||
|
|
||||||
let questions;
|
let questions;
|
||||||
try {
|
try {
|
||||||
const cleanedContent = aiContent
|
const cleanedContent = aiContent
|
||||||
@ -151,36 +111,34 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
questions = JSON.parse(cleanedContent);
|
questions = JSON.parse(cleanedContent);
|
||||||
|
|
||||||
if (!Array.isArray(questions) || questions.length === 0) {
|
if (!Array.isArray(questions) || questions.length === 0) {
|
||||||
throw new Error('Invalid questions format');
|
console.log(`[ENHANCEMENT] No questions generated for task ${taskId} - input likely complete`);
|
||||||
}
|
questions = [];
|
||||||
|
} else {
|
||||||
// Validate and clean questions
|
|
||||||
questions = questions
|
questions = questions
|
||||||
.filter(q => typeof q === 'string' && q.length > 5 && q.length < 120)
|
.filter(q => typeof q === 'string' && q.length > 5 && q.length < 120)
|
||||||
.slice(0, 3);
|
.slice(0, 3);
|
||||||
|
|
||||||
if (questions.length === 0) {
|
|
||||||
throw new Error('No valid questions found');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (parseError) {
|
||||||
console.error('Failed to parse enhancement response:', aiContent);
|
console.error(`[ENHANCEMENT] Failed to parse AI response for task ${taskId}:`, parseError);
|
||||||
|
console.error(`[ENHANCEMENT] Raw AI content:`, aiContent);
|
||||||
return apiServerError.unavailable('Invalid enhancement response format');
|
return apiServerError.unavailable('Invalid enhancement response format');
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[AI Enhancement] User: ${userId}, Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
console.log(`[ENHANCEMENT] Completed for user ${userId}: ${questions.length} questions, input length: ${sanitizedInput.length}`);
|
||||||
|
|
||||||
return new Response(JSON.stringify({
|
return new Response(JSON.stringify({
|
||||||
success: true,
|
success: true,
|
||||||
questions,
|
questions,
|
||||||
taskId
|
taskId,
|
||||||
|
hasQuestions: questions.length > 0
|
||||||
}), {
|
}), {
|
||||||
status: 200,
|
status: 200,
|
||||||
headers: { 'Content-Type': 'application/json' }
|
headers: { 'Content-Type': 'application/json' }
|
||||||
});
|
});
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Enhancement error:', error);
|
console.error('[ENHANCEMENT] Unexpected error:', error);
|
||||||
return apiServerError.internal('Enhancement processing failed');
|
return apiServerError.internal('Enhancement processing failed');
|
||||||
}
|
}
|
||||||
};
|
};
|
@ -1,29 +1,22 @@
|
|||||||
// src/pages/api/ai/query.ts
|
|
||||||
import type { APIRoute } from 'astro';
|
import type { APIRoute } from 'astro';
|
||||||
import { withAPIAuth } from '../../../utils/auth.js';
|
import { withAPIAuth } from '../../../utils/auth.js';
|
||||||
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
|
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
|
||||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
import { enqueueChainedApiCall } from '../../../utils/rateLimitedQueue.js';
|
||||||
|
import { callAI, getSelectorConfig, getAnalyzerConfig } from '../../../utils/aiService.js';
|
||||||
|
import { createSelectorPrompt, createWorkflowSystemPrompt, createToolSystemPrompt, createJsonConversionPrompt } from '../../../utils/aiPrompts.js';
|
||||||
|
import { isEmbeddingsEnabled, generateToolEmbeddings, findSimilarItems, type ToolEmbedding } from '../../../utils/embeddingsService.js';
|
||||||
|
|
||||||
export const prerender = false;
|
export const prerender = false;
|
||||||
|
|
||||||
function getEnv(key: string): string {
|
|
||||||
const value = process.env[key];
|
|
||||||
if (!value) {
|
|
||||||
throw new Error(`Missing environment variable: ${key}`);
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
const AI_MODEL = getEnv('AI_MODEL');
|
|
||||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
||||||
const RATE_LIMIT_WINDOW = 60 * 1000;
|
const RATE_LIMIT_WINDOW = 60 * 1000;
|
||||||
const RATE_LIMIT_MAX = 10;
|
const RATE_LIMIT_MAX = 10;
|
||||||
|
|
||||||
function sanitizeInput(input: string): string {
|
function sanitizeInput(input: string): string {
|
||||||
let sanitized = input
|
let sanitized = input
|
||||||
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]') // Remove code blocks
|
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
|
||||||
.replace(/\<\/?[^>]+(>|$)/g, '') // Remove HTML tags
|
.replace(/\<\/?[^>]+(>|$)/g, '')
|
||||||
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
||||||
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
||||||
.trim();
|
.trim();
|
||||||
@ -33,11 +26,34 @@ function sanitizeInput(input: string): string {
|
|||||||
return sanitized;
|
return sanitized;
|
||||||
}
|
}
|
||||||
|
|
||||||
function stripMarkdownJson(content: string): string {
|
function extractJsonFromResponse(content: string): string {
|
||||||
return content
|
// First try simple markdown removal
|
||||||
|
let cleaned = content
|
||||||
.replace(/^```json\s*/i, '')
|
.replace(/^```json\s*/i, '')
|
||||||
.replace(/\s*```\s*$/, '')
|
.replace(/\s*```\s*$/i, '')
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
|
// If it looks like JSON already, return it
|
||||||
|
if (cleaned.startsWith('{') && cleaned.endsWith('}')) {
|
||||||
|
return cleaned;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to find JSON block in the response
|
||||||
|
const jsonMatch = content.match(/```json\s*(\{[\s\S]*?\})\s*```/i);
|
||||||
|
if (jsonMatch) {
|
||||||
|
return jsonMatch[1].trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to find any JSON-like structure
|
||||||
|
const jsonStart = content.indexOf('{');
|
||||||
|
const jsonEnd = content.lastIndexOf('}');
|
||||||
|
|
||||||
|
if (jsonStart >= 0 && jsonEnd > jsonStart) {
|
||||||
|
return content.substring(jsonStart, jsonEnd + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no JSON found, return original (will likely fail parsing)
|
||||||
|
return cleaned;
|
||||||
}
|
}
|
||||||
|
|
||||||
function checkRateLimit(userId: string): boolean {
|
function checkRateLimit(userId: string): boolean {
|
||||||
@ -77,301 +93,266 @@ async function loadToolsDatabase() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function createWorkflowSystemPrompt(toolsData: any): string {
|
function simpleTextBasedSelection(query: string, allItems: any[]): any[] {
|
||||||
const toolsList = toolsData.tools.map((tool: any) => ({
|
const maxCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '30');
|
||||||
name: tool.name,
|
const queryLower = query.toLowerCase();
|
||||||
description: tool.description,
|
const queryWords = queryLower.split(/\s+/).filter(word => word.length > 2);
|
||||||
domains: tool.domains,
|
|
||||||
phases: tool.phases,
|
|
||||||
domainAgnostic: tool['domain-agnostic-software'],
|
|
||||||
platforms: tool.platforms,
|
|
||||||
skillLevel: tool.skillLevel,
|
|
||||||
license: tool.license,
|
|
||||||
tags: tool.tags,
|
|
||||||
related_concepts: tool.related_concepts || []
|
|
||||||
}));
|
|
||||||
|
|
||||||
const conceptsList = toolsData.concepts.map((concept: any) => ({
|
console.log(`[FALLBACK] Using text-based selection with ${queryWords.length} query words`);
|
||||||
name: concept.name,
|
|
||||||
description: concept.description,
|
|
||||||
domains: concept.domains,
|
|
||||||
phases: concept.phases,
|
|
||||||
skillLevel: concept.skillLevel,
|
|
||||||
tags: concept.tags
|
|
||||||
}));
|
|
||||||
|
|
||||||
const regularPhases = toolsData.phases || [];
|
const scoredItems = allItems.map(item => {
|
||||||
|
let score = 0;
|
||||||
|
const searchText = `${item.name} ${item.description} ${(item.tags || []).join(' ')} ${(item.domains || []).join(' ')} ${(item.phases || []).join(' ')}`.toLowerCase();
|
||||||
|
|
||||||
const domainAgnosticSoftware = toolsData['domain-agnostic-software'] || [];
|
// Exact name match gets highest score
|
||||||
|
if (item.name.toLowerCase().includes(queryLower)) {
|
||||||
const allPhaseItems = [
|
score += 100;
|
||||||
...regularPhases,
|
|
||||||
...domainAgnosticSoftware
|
|
||||||
];
|
|
||||||
|
|
||||||
const phasesDescription = allPhaseItems.map((phase: any) =>
|
|
||||||
`- ${phase.id}: ${phase.name}`
|
|
||||||
).join('\n');
|
|
||||||
|
|
||||||
const domainsDescription = toolsData.domains.map((domain: any) =>
|
|
||||||
`- ${domain.id}: ${domain.name}`
|
|
||||||
).join('\n');
|
|
||||||
|
|
||||||
const phaseDescriptions = regularPhases.map((phase: any) =>
|
|
||||||
`- ${phase.name}: ${phase.description || 'Tools/Methods for this phase'}`
|
|
||||||
).join('\n');
|
|
||||||
|
|
||||||
const domainAgnosticDescriptions = domainAgnosticSoftware.map((section: any) =>
|
|
||||||
`- ${section.name}: ${section.description || 'Cross-cutting software and platforms'}`
|
|
||||||
).join('\n');
|
|
||||||
|
|
||||||
const validPhases = [
|
|
||||||
...regularPhases.map((p: any) => p.id),
|
|
||||||
...domainAgnosticSoftware.map((s: any) => s.id)
|
|
||||||
].join('|');
|
|
||||||
|
|
||||||
return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der Ermittlern bei der Auswahl von Software und Methoden hilft.
|
|
||||||
|
|
||||||
VERFÜGBARE TOOLS/METHODEN:
|
|
||||||
${JSON.stringify(toolsList, null, 2)}
|
|
||||||
|
|
||||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
|
|
||||||
${JSON.stringify(conceptsList, null, 2)}
|
|
||||||
|
|
||||||
UNTERSUCHUNGSPHASEN (NIST Framework):
|
|
||||||
${phasesDescription}
|
|
||||||
|
|
||||||
FORENSISCHE DOMÄNEN:
|
|
||||||
${domainsDescription}
|
|
||||||
|
|
||||||
WICHTIGE REGELN:
|
|
||||||
1. Pro Phase 2-3 Tools/Methoden empfehlen (immer mindestens 2 wenn verfügbar)
|
|
||||||
2. Tools/Methoden können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool/Methode für jede Phase zu empfehlen, selbst wenn die Priorität "low" ist.
|
|
||||||
3. Für Reporting-Phase: Visualisierungs- und Dokumentationssoftware einschließen
|
|
||||||
4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug.
|
|
||||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
|
|
||||||
6. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
|
|
||||||
7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an, wenn proprietäre Software besser geeignet ist.
|
|
||||||
8. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
|
|
||||||
9. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
|
|
||||||
|
|
||||||
ENHANCED CONTEXTUAL ANALYSIS:
|
|
||||||
10. Analysiere das Szenario detailliert und identifiziere Schlüsselelemente, Bedrohungen und forensische Herausforderungen
|
|
||||||
11. Entwickle einen strategischen Untersuchungsansatz basierend auf dem spezifischen Szenario
|
|
||||||
12. Identifiziere zeitkritische oder besonders wichtige Faktoren für diesen Fall
|
|
||||||
|
|
||||||
SOFTWARE/METHODEN-AUSWAHL NACH PHASE:
|
|
||||||
${phaseDescriptions}
|
|
||||||
|
|
||||||
DOMÄNENAGNOSTISCHE SOFTWARE/METHODEN:
|
|
||||||
${domainAgnosticDescriptions}
|
|
||||||
|
|
||||||
ANTWORT-FORMAT (strict JSON):
|
|
||||||
{
|
|
||||||
"scenario_analysis": "Detaillierte Analyse des Szenarios: Erkannte Schlüsselelemente, Art des Vorfalls, betroffene Systeme, potentielle Bedrohungen und forensische Herausforderungen",
|
|
||||||
"investigation_approach": "Strategischer Untersuchungsansatz für dieses spezifische Szenario: Prioritäten, Reihenfolge der Phasen, besondere Überlegungen",
|
|
||||||
"critical_considerations": "Zeitkritische Faktoren, wichtige Sicherheitsaspekte oder besondere Vorsichtsmaßnahmen für diesen Fall",
|
|
||||||
"recommended_tools": [
|
|
||||||
{
|
|
||||||
"name": "EXAKTER Name aus der Tools-Database",
|
|
||||||
"priority": "high|medium|low",
|
|
||||||
"phase": "${validPhases}",
|
|
||||||
"justification": "Warum diese Methode für diese Phase und dieses spezifische Szenario geeignet ist - mit Bezug zu den erkannten Schlüsselelementen"
|
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"workflow_suggestion": "Vorgeschlagener Untersuchungsablauf mit konkreten Schritten für dieses Szenario",
|
// Description match
|
||||||
"background_knowledge": [
|
if (item.description.toLowerCase().includes(queryLower)) {
|
||||||
{
|
score += 50;
|
||||||
"concept_name": "EXAKTER Name aus der Konzepte-Database",
|
|
||||||
"relevance": "Warum dieses Konzept für das Szenario relevant ist, und bei welchen der empfohlenen Methoden/Tools."
|
|
||||||
}
|
}
|
||||||
],
|
|
||||||
"additional_notes": "Wichtige Überlegungen und Hinweise"
|
// Word matches
|
||||||
|
queryWords.forEach(word => {
|
||||||
|
if (searchText.includes(word)) {
|
||||||
|
score += 10;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Tag exact matches get bonus
|
||||||
|
(item.tags || []).forEach((tag: string) => {
|
||||||
|
if (queryLower.includes(tag.toLowerCase()) || tag.toLowerCase().includes(queryLower)) {
|
||||||
|
score += 25;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
return { item, score };
|
||||||
|
});
|
||||||
|
|
||||||
|
const selected = scoredItems
|
||||||
|
.filter(({ score }) => score > 0)
|
||||||
|
.sort((a, b) => b.score - a.score)
|
||||||
|
.slice(0, maxCandidates)
|
||||||
|
.map(({ item }) => item);
|
||||||
|
|
||||||
|
console.log(`[FALLBACK] Selected ${selected.length} items from ${allItems.length} total`);
|
||||||
|
return selected;
|
||||||
}
|
}
|
||||||
|
|
||||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
|
async function selectRelevantItemsWithEmbeddings(query: string, mode: string) {
|
||||||
}
|
const fullToolsData = await loadToolsDatabase();
|
||||||
|
let candidates;
|
||||||
|
let embeddingCandidates = 0;
|
||||||
|
let selectionMethod = 'text-based';
|
||||||
|
|
||||||
function createToolSystemPrompt(toolsData: any): string {
|
const allItems = [...fullToolsData.tools, ...fullToolsData.concepts];
|
||||||
const toolsList = toolsData.tools.map((tool: any) => ({
|
|
||||||
name: tool.name,
|
|
||||||
description: tool.description,
|
|
||||||
domains: tool.domains,
|
|
||||||
phases: tool.phases,
|
|
||||||
platforms: tool.platforms,
|
|
||||||
skillLevel: tool.skillLevel,
|
|
||||||
license: tool.license,
|
|
||||||
tags: tool.tags,
|
|
||||||
url: tool.url,
|
|
||||||
projectUrl: tool.projectUrl,
|
|
||||||
related_concepts: tool.related_concepts || []
|
|
||||||
}));
|
|
||||||
|
|
||||||
const conceptsList = toolsData.concepts.map((concept: any) => ({
|
if (isEmbeddingsEnabled()) {
|
||||||
name: concept.name,
|
|
||||||
description: concept.description,
|
|
||||||
domains: concept.domains,
|
|
||||||
phases: concept.phases,
|
|
||||||
skillLevel: concept.skillLevel,
|
|
||||||
tags: concept.tags
|
|
||||||
}));
|
|
||||||
|
|
||||||
return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der bei der Auswahl spezifischer Software/Methoden für konkrete Probleme hilft.
|
|
||||||
|
|
||||||
VERFÜGBARE TOOLS/METHODEN:
|
|
||||||
${JSON.stringify(toolsList, null, 2)}
|
|
||||||
|
|
||||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
|
|
||||||
${JSON.stringify(conceptsList, null, 2)}
|
|
||||||
|
|
||||||
WICHTIGE REGELN:
|
|
||||||
1. Analysiere das spezifische Problem/die Anforderung sorgfältig
|
|
||||||
2. Empfehle 1-3 Methoden/Tools, sortiert nach Eignung (beste Empfehlung zuerst)
|
|
||||||
3. Gib detaillierte Erklärungen, WARUM und WIE jede Methode/Tool das Problem löst
|
|
||||||
4. Berücksichtige praktische Aspekte: Skill Level, Plattformen, Verfügbarkeit
|
|
||||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
|
|
||||||
6. Gib konkrete Anwendungshinweise, nicht nur allgemeine Beschreibungen - Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
|
|
||||||
7. Erwähne sowohl Stärken als auch Schwächen/Limitationen
|
|
||||||
8. Schlage alternative Ansätze vor, wenn sinnvoll
|
|
||||||
9. Gib grundsätzliche Hinweise, WIE die Methode/Tool konkret eingesetzt wird
|
|
||||||
10. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
|
|
||||||
11. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
|
|
||||||
|
|
||||||
ENHANCED CONTEXTUAL ANALYSIS:
|
|
||||||
12. Analysiere das Problem detailliert und identifiziere technische Anforderungen, Herausforderungen und Erfolgsfaktoren
|
|
||||||
13. Entwickle einen strategischen Lösungsansatz basierend auf dem spezifischen Problem
|
|
||||||
14. Identifiziere wichtige Voraussetzungen oder Warnungen für die Anwendung
|
|
||||||
|
|
||||||
ANTWORT-FORMAT (strict JSON):
|
|
||||||
{
|
|
||||||
"problem_analysis": "Detaillierte Analyse des Problems: Erkannte technische Anforderungen, Herausforderungen, benötigte Fähigkeiten und Erfolgsfaktoren",
|
|
||||||
"investigation_approach": "Strategischer Lösungsansatz für dieses spezifische Problem: Herangehensweise, Prioritäten, optimale Anwendungsreihenfolge",
|
|
||||||
"critical_considerations": "Wichtige Voraussetzungen, potentielle Fallstricke oder Warnungen für die Anwendung der empfohlenen Lösungen",
|
|
||||||
"recommended_tools": [
|
|
||||||
{
|
|
||||||
"name": "EXAKTER Name aus der Tools-Database",
|
|
||||||
"rank": 1,
|
|
||||||
"suitability_score": "high|medium|low",
|
|
||||||
"detailed_explanation": "Detaillierte Erklärung, warum dieses Tool/diese Methode das spezifische Problem löst - mit Bezug zu den erkannten Anforderungen",
|
|
||||||
"implementation_approach": "Konkrete Schritte/Ansatz zur Anwendung für dieses spezifische Problem",
|
|
||||||
"pros": ["Spezifische Vorteile für diesen Anwendungsfall", "Weitere Vorteile"],
|
|
||||||
"cons": ["Potentielle Nachteile oder Limitationen", "Weitere Einschränkungen"],
|
|
||||||
"alternatives": "Alternative Ansätze oder ergänzende Tools/Methoden, falls relevant"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"background_knowledge": [
|
|
||||||
{
|
|
||||||
"concept_name": "EXAKTER Name aus der Konzepte-Database",
|
|
||||||
"relevance": "Warum dieses Konzept für die empfohlenen Tools/das Problem relevant ist, und für welche der empfohlenen Methoden/Tools."
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"additional_considerations": "Wichtige Überlegungen, Voraussetzungen oder Warnungen"
|
|
||||||
}
|
|
||||||
|
|
||||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
|
|
||||||
}
|
|
||||||
|
|
||||||
export const POST: APIRoute = async ({ request }) => {
|
|
||||||
try {
|
try {
|
||||||
const authResult = await withAPIAuth(request, 'ai');
|
console.log(`[SELECTION] Attempting embeddings-based selection for ${allItems.length} items`);
|
||||||
if (!authResult.authenticated) {
|
const toolEmbeddings = await generateToolEmbeddings(fullToolsData);
|
||||||
return createAuthErrorResponse();
|
const similarItems = await findSimilarItems(query, toolEmbeddings);
|
||||||
|
candidates = similarItems.map(item => item.tool);
|
||||||
|
embeddingCandidates = candidates.length;
|
||||||
|
selectionMethod = 'embeddings';
|
||||||
|
console.log(`[SELECTION] Embeddings selection successful: ${candidates.length} candidates`);
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('[SELECTION] Embeddings failed, using text-based fallback:', error.message);
|
||||||
|
candidates = simpleTextBasedSelection(query, allItems);
|
||||||
|
selectionMethod = 'text-fallback';
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log('[SELECTION] Embeddings disabled, using text-based selection');
|
||||||
|
candidates = simpleTextBasedSelection(query, allItems);
|
||||||
}
|
}
|
||||||
|
|
||||||
const userId = authResult.userId;
|
// Safety check - ensure we have some candidates
|
||||||
|
if (candidates.length === 0) {
|
||||||
if (!checkRateLimit(userId)) {
|
console.warn('[SELECTION] No candidates found, using top items from full dataset');
|
||||||
return apiError.rateLimit('Rate limit exceeded');
|
candidates = allItems.slice(0, parseInt(process.env.AI_EMBEDDING_CANDIDATES || '30'));
|
||||||
|
selectionMethod = 'emergency-fallback';
|
||||||
}
|
}
|
||||||
|
|
||||||
const body = await request.json();
|
console.log(`[SELECTION] Using ${selectionMethod} selection: ${candidates.length} candidates for AI selector`);
|
||||||
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
|
||||||
|
|
||||||
// ADD THIS DEBUG LOGGING
|
const selectorConfig = getSelectorConfig();
|
||||||
console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
|
||||||
|
|
||||||
if (!query || typeof query !== 'string') {
|
try {
|
||||||
console.log(`[AI API] Invalid query for task ${clientTaskId}`);
|
const prompt = createSelectorPrompt(candidates, query, mode);
|
||||||
return apiError.badRequest('Query required');
|
|
||||||
|
console.log(`[SELECTION] Sending ${candidates.length} candidates to AI selector (prompt length: ${prompt.length})`);
|
||||||
|
|
||||||
|
const response = await callAI(selectorConfig, [
|
||||||
|
{ role: 'user', content: prompt }
|
||||||
|
], 1500, 0.1); // Lower temperature for consistent JSON
|
||||||
|
|
||||||
|
const content = response.choices?.[0]?.message?.content;
|
||||||
|
if (!content) {
|
||||||
|
console.error('[SELECTION] AI response structure:', JSON.stringify(response, null, 2));
|
||||||
|
throw new Error('No selection response from AI');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!['workflow', 'tool'].includes(mode)) {
|
console.log(`[SELECTION] AI response received, length: ${content.length}`);
|
||||||
console.log(`[AI API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
console.log(`[SELECTION] AI response preview:`, content.slice(0, 200) + '...');
|
||||||
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
|
||||||
|
let selection;
|
||||||
|
try {
|
||||||
|
const cleanedContent = extractJsonFromResponse(content);
|
||||||
|
console.log(`[SELECTION] Extracted JSON preview:`, cleanedContent.slice(0, 200) + '...');
|
||||||
|
selection = JSON.parse(cleanedContent);
|
||||||
|
} catch (parseError) {
|
||||||
|
console.error('[SELECTION] JSON parsing failed:', parseError);
|
||||||
|
console.error('[SELECTION] Raw AI content:', content);
|
||||||
|
console.error('[SELECTION] Cleaned content:', extractJsonFromResponse(content));
|
||||||
|
throw new Error(`JSON parsing failed: ${parseError.message}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const sanitizedQuery = sanitizeInput(query);
|
const selectedTools = candidates.filter(item =>
|
||||||
if (sanitizedQuery.includes('[FILTERED]')) {
|
item.type !== 'concept' && (selection.selected_tools || []).includes(item.name)
|
||||||
console.log(`[AI API] Filtered input detected for task ${clientTaskId}`);
|
);
|
||||||
return apiError.badRequest('Invalid input detected');
|
const selectedConcepts = candidates.filter(item =>
|
||||||
|
item.type === 'concept' && (selection.selected_concepts || []).includes(item.name)
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`[SELECTION] Final selection: ${selectedTools.length} tools, ${selectedConcepts.length} concepts from ${candidates.length} candidates`);
|
||||||
|
|
||||||
|
// Ensure we have at least some tools selected
|
||||||
|
if (selectedTools.length === 0 && selectedConcepts.length === 0) {
|
||||||
|
console.warn('[SELECTION] No items selected by AI, using fallback selection');
|
||||||
|
const fallbackTools = candidates.filter(item => item.type !== 'concept').slice(0, 10);
|
||||||
|
const fallbackConcepts = candidates.filter(item => item.type === 'concept').slice(0, 3);
|
||||||
|
return {
|
||||||
|
selectedItems: {
|
||||||
|
tools: fallbackTools,
|
||||||
|
concepts: fallbackConcepts,
|
||||||
|
domains: fullToolsData.domains,
|
||||||
|
phases: fullToolsData.phases,
|
||||||
|
...Object.keys(fullToolsData).reduce((acc, key) => {
|
||||||
|
if (!['tools', 'concepts', 'domains', 'phases'].includes(key)) {
|
||||||
|
acc[key] = fullToolsData[key];
|
||||||
}
|
}
|
||||||
|
return acc;
|
||||||
const toolsData = await loadToolsDatabase();
|
}, {} as any)
|
||||||
|
|
||||||
const systemPrompt = mode === 'workflow'
|
|
||||||
? createWorkflowSystemPrompt(toolsData)
|
|
||||||
: createToolSystemPrompt(toolsData);
|
|
||||||
|
|
||||||
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
|
||||||
|
|
||||||
console.log(`[AI API] About to enqueue task ${taskId}`);
|
|
||||||
|
|
||||||
|
|
||||||
const aiResponse = await enqueueApiCall(() =>
|
|
||||||
fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Authorization': `Bearer ${process.env.AI_API_KEY}`
|
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
reasoning: 'Fallback selection used due to AI selection failure',
|
||||||
model: AI_MODEL,
|
candidateCount: candidates.length,
|
||||||
messages: [
|
embeddingCandidates,
|
||||||
{
|
selectionMethod: selectionMethod + '-fallback'
|
||||||
role: 'system',
|
};
|
||||||
content: systemPrompt
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
selectedItems: {
|
||||||
|
tools: selectedTools,
|
||||||
|
concepts: selectedConcepts,
|
||||||
|
domains: fullToolsData.domains,
|
||||||
|
phases: fullToolsData.phases,
|
||||||
|
...Object.keys(fullToolsData).reduce((acc, key) => {
|
||||||
|
if (!['tools', 'concepts', 'domains', 'phases'].includes(key)) {
|
||||||
|
acc[key] = fullToolsData[key];
|
||||||
|
}
|
||||||
|
return acc;
|
||||||
|
}, {} as any)
|
||||||
},
|
},
|
||||||
{
|
reasoning: selection.reasoning,
|
||||||
role: 'user',
|
candidateCount: candidates.length,
|
||||||
content: sanitizedQuery
|
embeddingCandidates,
|
||||||
|
selectionMethod
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[SELECTION] AI selector failed:', error);
|
||||||
|
console.error('[SELECTION] Selector model:', selectorConfig.model);
|
||||||
|
console.error('[SELECTION] Query length:', query.length);
|
||||||
|
console.error('[SELECTION] Candidate count:', candidates.length);
|
||||||
|
throw new Error(`Selection failed: ${error.message}`);
|
||||||
}
|
}
|
||||||
],
|
}
|
||||||
max_tokens: 3500,
|
|
||||||
temperature: 0.3
|
|
||||||
})
|
|
||||||
})
|
|
||||||
, taskId);
|
|
||||||
|
|
||||||
if (!aiResponse.ok) {
|
async function analyzeWithSelectedItems(selectionResult: any, query: string, mode: string) {
|
||||||
console.error('AI API error:', await aiResponse.text());
|
const analyzerConfig = getAnalyzerConfig();
|
||||||
return apiServerError.unavailable('AI service unavailable');
|
const prompt = mode === 'workflow'
|
||||||
|
? createWorkflowSystemPrompt(selectionResult.selectedItems)
|
||||||
|
: createToolSystemPrompt(selectionResult.selectedItems);
|
||||||
|
|
||||||
|
console.log(`[ANALYSIS] Starting ${mode} analysis with ${selectionResult.selectedItems.tools.length} tools and ${selectionResult.selectedItems.concepts.length} concepts`);
|
||||||
|
console.log(`[ANALYSIS] Using model: ${analyzerConfig.model}`);
|
||||||
|
console.log(`[ANALYSIS] System prompt length: ${prompt.length}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await callAI(analyzerConfig, [
|
||||||
|
{ role: 'system', content: prompt },
|
||||||
|
{ role: 'user', content: query }
|
||||||
|
], 3500, 0.1); // Lower temperature for more consistent JSON output
|
||||||
|
|
||||||
|
const content = response.choices?.[0]?.message?.content;
|
||||||
|
if (!content) {
|
||||||
|
console.error('[ANALYSIS] No content in AI response:', JSON.stringify(response, null, 2));
|
||||||
|
throw new Error('No analysis response from AI');
|
||||||
}
|
}
|
||||||
|
|
||||||
const aiData = await aiResponse.json();
|
console.log(`[ANALYSIS] Received AI response, length: ${content.length}`);
|
||||||
const aiContent = aiData.choices?.[0]?.message?.content;
|
console.log(`[ANALYSIS] Response preview:`, content.slice(0, 200) + '...');
|
||||||
|
|
||||||
if (!aiContent) {
|
|
||||||
return apiServerError.unavailable('No response from AI');
|
|
||||||
}
|
|
||||||
|
|
||||||
let recommendation;
|
let recommendation;
|
||||||
try {
|
try {
|
||||||
const cleanedContent = stripMarkdownJson(aiContent);
|
const cleanedContent = extractJsonFromResponse(content);
|
||||||
|
console.log(`[ANALYSIS] Extracted JSON preview:`, cleanedContent.slice(0, 200) + '...');
|
||||||
recommendation = JSON.parse(cleanedContent);
|
recommendation = JSON.parse(cleanedContent);
|
||||||
} catch (error) {
|
} catch (parseError) {
|
||||||
console.error('Failed to parse AI response:', aiContent);
|
console.warn('[ANALYSIS] JSON parsing failed, attempting prose-to-JSON conversion...');
|
||||||
return apiServerError.unavailable('Invalid AI response format');
|
|
||||||
|
// Check if response looks like prose (doesn't start with {)
|
||||||
|
const trimmedContent = content.trim();
|
||||||
|
if (!trimmedContent.startsWith('{')) {
|
||||||
|
console.log('[ANALYSIS] Response is prose, converting to JSON...');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Use a simpler model for conversion to save costs
|
||||||
|
const conversionConfig = getSelectorConfig();
|
||||||
|
const conversionPrompt = createJsonConversionPrompt(content, query, mode);
|
||||||
|
|
||||||
|
console.log(`[ANALYSIS] Sending prose conversion request (length: ${conversionPrompt.length})`);
|
||||||
|
|
||||||
|
const conversionResponse = await callAI(conversionConfig, [
|
||||||
|
{ role: 'user', content: conversionPrompt }
|
||||||
|
], 2000, 0.1); // Lower temperature for more consistent JSON
|
||||||
|
|
||||||
|
const convertedContent = conversionResponse.choices?.[0]?.message?.content;
|
||||||
|
if (!convertedContent) {
|
||||||
|
throw new Error('No conversion response');
|
||||||
}
|
}
|
||||||
|
|
||||||
const validToolNames = new Set(toolsData.tools.map((t: any) => t.name));
|
console.log(`[ANALYSIS] Conversion response length: ${convertedContent.length}`);
|
||||||
const validConceptNames = new Set(toolsData.concepts.map((c: any) => c.name));
|
console.log(`[ANALYSIS] Conversion preview:`, convertedContent.slice(0, 200) + '...');
|
||||||
|
|
||||||
|
const finalJsonContent = extractJsonFromResponse(convertedContent);
|
||||||
|
recommendation = JSON.parse(finalJsonContent);
|
||||||
|
|
||||||
|
console.log('[ANALYSIS] ✅ Successfully converted prose to JSON');
|
||||||
|
|
||||||
|
} catch (conversionError) {
|
||||||
|
console.error('[ANALYSIS] Prose-to-JSON conversion failed:', conversionError);
|
||||||
|
console.error('[ANALYSIS] Original prose response:', content);
|
||||||
|
throw new Error(`Both direct JSON parsing and prose conversion failed: ${parseError.message}`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.error('[ANALYSIS] Response looks like JSON but parsing failed:', parseError);
|
||||||
|
console.error('[ANALYSIS] Raw content:', content);
|
||||||
|
console.error('[ANALYSIS] Cleaned content:', extractJsonFromResponse(content));
|
||||||
|
throw new Error(`JSON parsing failed: ${parseError.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const validToolNames = new Set(selectionResult.selectedItems.tools.map((t: any) => t.name));
|
||||||
|
const validConceptNames = new Set(selectionResult.selectedItems.concepts.map((c: any) => c.name));
|
||||||
|
|
||||||
let validatedRecommendation;
|
let validatedRecommendation;
|
||||||
|
|
||||||
if (mode === 'workflow') {
|
if (mode === 'workflow') {
|
||||||
validatedRecommendation = {
|
validatedRecommendation = {
|
||||||
...recommendation,
|
...recommendation,
|
||||||
// Ensure all new fields are included with fallbacks
|
|
||||||
scenario_analysis: recommendation.scenario_analysis || recommendation.problem_analysis || '',
|
scenario_analysis: recommendation.scenario_analysis || recommendation.problem_analysis || '',
|
||||||
investigation_approach: recommendation.investigation_approach || '',
|
investigation_approach: recommendation.investigation_approach || '',
|
||||||
critical_considerations: recommendation.critical_considerations || '',
|
critical_considerations: recommendation.critical_considerations || '',
|
||||||
@ -393,7 +374,6 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
} else {
|
} else {
|
||||||
validatedRecommendation = {
|
validatedRecommendation = {
|
||||||
...recommendation,
|
...recommendation,
|
||||||
// Ensure all new fields are included with fallbacks
|
|
||||||
problem_analysis: recommendation.problem_analysis || recommendation.scenario_analysis || '',
|
problem_analysis: recommendation.problem_analysis || recommendation.scenario_analysis || '',
|
||||||
investigation_approach: recommendation.investigation_approach || '',
|
investigation_approach: recommendation.investigation_approach || '',
|
||||||
critical_considerations: recommendation.critical_considerations || '',
|
critical_considerations: recommendation.critical_considerations || '',
|
||||||
@ -420,14 +400,71 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}, Tools: ${validatedRecommendation.recommended_tools.length}, Concepts: ${validatedRecommendation.background_knowledge?.length || 0}`);
|
console.log(`[ANALYSIS] Completed successfully: ${validatedRecommendation.recommended_tools.length} final recommendations`);
|
||||||
|
return validatedRecommendation;
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[ANALYSIS] Failed:', error);
|
||||||
|
throw new Error(`Analysis failed: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const POST: APIRoute = async ({ request }) => {
|
||||||
|
try {
|
||||||
|
const authResult = await withAPIAuth(request, 'ai');
|
||||||
|
if (!authResult.authenticated) {
|
||||||
|
return createAuthErrorResponse();
|
||||||
|
}
|
||||||
|
|
||||||
|
const userId = authResult.userId;
|
||||||
|
|
||||||
|
if (!checkRateLimit(userId)) {
|
||||||
|
return apiError.rateLimit('Rate limit exceeded');
|
||||||
|
}
|
||||||
|
|
||||||
|
const body = await request.json();
|
||||||
|
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
||||||
|
|
||||||
|
console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
||||||
|
|
||||||
|
if (!query || typeof query !== 'string') {
|
||||||
|
console.log(`[AI API] Invalid query for task ${clientTaskId}`);
|
||||||
|
return apiError.badRequest('Query required');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!['workflow', 'tool'].includes(mode)) {
|
||||||
|
console.log(`[AI API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
||||||
|
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
||||||
|
}
|
||||||
|
|
||||||
|
const sanitizedQuery = sanitizeInput(query);
|
||||||
|
if (sanitizedQuery.includes('[FILTERED]')) {
|
||||||
|
console.log(`[AI API] Filtered input detected for task ${clientTaskId}`);
|
||||||
|
return apiError.badRequest('Invalid input detected');
|
||||||
|
}
|
||||||
|
|
||||||
|
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
||||||
|
|
||||||
|
console.log(`[AI API] Starting AI processing for task ${taskId}`);
|
||||||
|
|
||||||
|
const result = await enqueueChainedApiCall(
|
||||||
|
async () => selectRelevantItemsWithEmbeddings(sanitizedQuery, mode),
|
||||||
|
(selectionResult: any) => async () => analyzeWithSelectedItems(selectionResult, sanitizedQuery, mode),
|
||||||
|
taskId
|
||||||
|
);
|
||||||
|
|
||||||
|
console.log(`[AI Query] Success - Mode: ${mode}, User: ${userId}, Method: ${result.stage1Result.selectionMethod}, Tools: ${result.finalResult.recommended_tools.length}, Concepts: ${result.finalResult.background_knowledge?.length || 0}`);
|
||||||
|
|
||||||
return new Response(JSON.stringify({
|
return new Response(JSON.stringify({
|
||||||
success: true,
|
success: true,
|
||||||
mode,
|
mode,
|
||||||
taskId,
|
taskId,
|
||||||
recommendation: validatedRecommendation,
|
recommendation: result.finalResult,
|
||||||
query: sanitizedQuery
|
query: sanitizedQuery,
|
||||||
|
selectedItems: result.stage1Result.candidateCount,
|
||||||
|
embeddingCandidates: result.stage1Result.embeddingCandidates,
|
||||||
|
selectionMethod: result.stage1Result.selectionMethod,
|
||||||
|
selectionReasoning: result.stage1Result.reasoning,
|
||||||
|
embeddingsEnabled: isEmbeddingsEnabled()
|
||||||
}), {
|
}), {
|
||||||
status: 200,
|
status: 200,
|
||||||
headers: { 'Content-Type': 'application/json' }
|
headers: { 'Content-Type': 'application/json' }
|
||||||
|
221
src/utils/aiPrompts.ts
Normal file
221
src/utils/aiPrompts.ts
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
export function createSelectorPrompt(candidateItems: any[], query: string, mode: string): string {
|
||||||
|
const maxItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '15');
|
||||||
|
|
||||||
|
// Create a simpler representation of items for selection
|
||||||
|
const simplifiedItems = candidateItems.map(item => ({
|
||||||
|
name: item.name,
|
||||||
|
type: item.type,
|
||||||
|
description: item.description?.slice(0, 150) + (item.description?.length > 150 ? '...' : ''),
|
||||||
|
tags: (item.tags || []).slice(0, 5),
|
||||||
|
domains: item.domains || [],
|
||||||
|
phases: item.phases || []
|
||||||
|
}));
|
||||||
|
|
||||||
|
return `RESPOND ONLY IN JSON FORMAT. NO EXPLANATIONS. NO MARKDOWN. ONLY JSON.
|
||||||
|
|
||||||
|
You are a DFIR expert selecting relevant tools and concepts.
|
||||||
|
|
||||||
|
QUERY: "${query}"
|
||||||
|
MODE: ${mode}
|
||||||
|
|
||||||
|
From these ${candidateItems.length} candidates, select the ${maxItems} most relevant:
|
||||||
|
|
||||||
|
${JSON.stringify(simplifiedItems, null, 2)}
|
||||||
|
|
||||||
|
OUTPUT FORMAT (COPY EXACTLY):
|
||||||
|
{"selected_tools":["name1","name2"],"selected_concepts":["concept1"],"reasoning":"brief explanation"}
|
||||||
|
|
||||||
|
CRITICAL: Output ONLY the JSON object above. No other text.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createWorkflowSystemPrompt(selectedData: any): string {
|
||||||
|
const toolsList = selectedData.tools.map((tool: any) => ({
|
||||||
|
name: tool.name,
|
||||||
|
description: tool.description?.slice(0, 200),
|
||||||
|
domains: tool.domains,
|
||||||
|
phases: tool.phases,
|
||||||
|
platforms: tool.platforms,
|
||||||
|
skillLevel: tool.skillLevel,
|
||||||
|
license: tool.license,
|
||||||
|
tags: tool.tags?.slice(0, 5),
|
||||||
|
related_concepts: tool.related_concepts || []
|
||||||
|
}));
|
||||||
|
|
||||||
|
const conceptsList = selectedData.concepts.map((concept: any) => ({
|
||||||
|
name: concept.name,
|
||||||
|
description: concept.description?.slice(0, 200),
|
||||||
|
domains: concept.domains,
|
||||||
|
phases: concept.phases,
|
||||||
|
skillLevel: concept.skillLevel,
|
||||||
|
tags: concept.tags?.slice(0, 5)
|
||||||
|
}));
|
||||||
|
|
||||||
|
const validPhases = [
|
||||||
|
...selectedData.phases.map((p: any) => p.id),
|
||||||
|
...Object.keys(selectedData).filter(key =>
|
||||||
|
!['tools', 'concepts', 'domains', 'phases'].includes(key)
|
||||||
|
).map(key => selectedData[key]).flat().filter(Boolean).map((s: any) => s.id)
|
||||||
|
].join('|');
|
||||||
|
|
||||||
|
return `RESPOND ONLY IN VALID JSON FORMAT. NO EXPLANATIONS BEFORE OR AFTER JSON.
|
||||||
|
|
||||||
|
You are a DFIR expert providing workflow recommendations.
|
||||||
|
|
||||||
|
AVAILABLE TOOLS: ${JSON.stringify(toolsList)}
|
||||||
|
AVAILABLE CONCEPTS: ${JSON.stringify(conceptsList)}
|
||||||
|
|
||||||
|
RULES:
|
||||||
|
- Recommend 2-3 tools per phase
|
||||||
|
- Use exact tool names from the database
|
||||||
|
- Include relevant concepts for background knowledge
|
||||||
|
- German responses for German queries
|
||||||
|
|
||||||
|
REQUIRED OUTPUT FORMAT:
|
||||||
|
{
|
||||||
|
"scenario_analysis": "Analysis text in German",
|
||||||
|
"investigation_approach": "Approach text in German",
|
||||||
|
"critical_considerations": "Considerations text in German",
|
||||||
|
"recommended_tools": [
|
||||||
|
{
|
||||||
|
"name": "EXACT_TOOL_NAME",
|
||||||
|
"priority": "high|medium|low",
|
||||||
|
"phase": "${validPhases}",
|
||||||
|
"justification": "Why this tool fits"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"workflow_suggestion": "Workflow text in German",
|
||||||
|
"background_knowledge": [
|
||||||
|
{
|
||||||
|
"concept_name": "EXACT_CONCEPT_NAME",
|
||||||
|
"relevance": "Why relevant"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"additional_notes": "Important notes"
|
||||||
|
}
|
||||||
|
|
||||||
|
CRITICAL: Output ONLY the JSON object. No markdown. No explanations outside JSON.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createToolSystemPrompt(selectedData: any): string {
|
||||||
|
const toolsList = selectedData.tools.map((tool: any) => ({
|
||||||
|
name: tool.name,
|
||||||
|
description: tool.description?.slice(0, 200),
|
||||||
|
domains: tool.domains,
|
||||||
|
phases: tool.phases,
|
||||||
|
platforms: tool.platforms,
|
||||||
|
skillLevel: tool.skillLevel,
|
||||||
|
license: tool.license,
|
||||||
|
tags: tool.tags?.slice(0, 5),
|
||||||
|
related_concepts: tool.related_concepts || []
|
||||||
|
}));
|
||||||
|
|
||||||
|
const conceptsList = selectedData.concepts.map((concept: any) => ({
|
||||||
|
name: concept.name,
|
||||||
|
description: concept.description?.slice(0, 200),
|
||||||
|
domains: concept.domains,
|
||||||
|
phases: concept.phases,
|
||||||
|
skillLevel: concept.skillLevel,
|
||||||
|
tags: concept.tags?.slice(0, 5)
|
||||||
|
}));
|
||||||
|
|
||||||
|
return `RESPOND ONLY IN VALID JSON FORMAT. NO EXPLANATIONS BEFORE OR AFTER JSON.
|
||||||
|
|
||||||
|
You are a DFIR expert providing specific tool recommendations.
|
||||||
|
|
||||||
|
AVAILABLE TOOLS: ${JSON.stringify(toolsList)}
|
||||||
|
AVAILABLE CONCEPTS: ${JSON.stringify(conceptsList)}
|
||||||
|
|
||||||
|
RULES:
|
||||||
|
- Recommend 1-3 tools ranked by suitability
|
||||||
|
- Use exact tool names from the database
|
||||||
|
- German responses for German queries
|
||||||
|
|
||||||
|
REQUIRED OUTPUT FORMAT:
|
||||||
|
{
|
||||||
|
"problem_analysis": "Problem analysis in German",
|
||||||
|
"investigation_approach": "Solution approach in German",
|
||||||
|
"critical_considerations": "Important prerequisites in German",
|
||||||
|
"recommended_tools": [
|
||||||
|
{
|
||||||
|
"name": "EXACT_TOOL_NAME",
|
||||||
|
"rank": 1,
|
||||||
|
"suitability_score": "high|medium|low",
|
||||||
|
"detailed_explanation": "Why this tool solves the problem",
|
||||||
|
"implementation_approach": "How to apply it",
|
||||||
|
"pros": ["advantage1", "advantage2"],
|
||||||
|
"cons": ["limitation1", "limitation2"],
|
||||||
|
"alternatives": "Alternative approaches"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"background_knowledge": [
|
||||||
|
{
|
||||||
|
"concept_name": "EXACT_CONCEPT_NAME",
|
||||||
|
"relevance": "Why relevant for this problem"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"additional_considerations": "Important considerations"
|
||||||
|
}
|
||||||
|
|
||||||
|
CRITICAL: Output ONLY the JSON object. No markdown. No explanations outside JSON.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createJsonConversionPrompt(proseResponse: string, originalQuery: string, mode: string): string {
|
||||||
|
return `Convert this prose response to JSON format.
|
||||||
|
|
||||||
|
ORIGINAL QUERY: "${originalQuery}"
|
||||||
|
MODE: ${mode}
|
||||||
|
|
||||||
|
PROSE RESPONSE TO CONVERT:
|
||||||
|
${proseResponse}
|
||||||
|
|
||||||
|
OUTPUT REQUIRED FORMAT FOR ${mode.toUpperCase()}:
|
||||||
|
${mode === 'workflow' ? `{
|
||||||
|
"scenario_analysis": "extracted analysis text",
|
||||||
|
"investigation_approach": "extracted approach text",
|
||||||
|
"critical_considerations": "extracted considerations",
|
||||||
|
"recommended_tools": [
|
||||||
|
{
|
||||||
|
"name": "tool name from response",
|
||||||
|
"priority": "high|medium|low",
|
||||||
|
"phase": "phase_id",
|
||||||
|
"justification": "extracted reasoning"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"workflow_suggestion": "extracted workflow",
|
||||||
|
"background_knowledge": [],
|
||||||
|
"additional_notes": "extracted notes"
|
||||||
|
}` : `{
|
||||||
|
"problem_analysis": "extracted analysis",
|
||||||
|
"investigation_approach": "extracted approach",
|
||||||
|
"critical_considerations": "extracted considerations",
|
||||||
|
"recommended_tools": [
|
||||||
|
{
|
||||||
|
"name": "tool name",
|
||||||
|
"rank": 1,
|
||||||
|
"suitability_score": "high|medium|low",
|
||||||
|
"detailed_explanation": "extracted explanation",
|
||||||
|
"implementation_approach": "extracted approach",
|
||||||
|
"pros": ["extracted pros"],
|
||||||
|
"cons": ["extracted cons"],
|
||||||
|
"alternatives": "extracted alternatives"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"background_knowledge": [],
|
||||||
|
"additional_considerations": "extracted considerations"
|
||||||
|
}`}
|
||||||
|
|
||||||
|
CRITICAL: Extract the information from the prose and format as JSON. Output ONLY the JSON object.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createEnhancementPrompt(input: string): string {
|
||||||
|
return `Analyze this forensic scenario input and provide questions if incomplete.
|
||||||
|
|
||||||
|
If incomplete: return JSON array of 1-3 questions
|
||||||
|
If complete: return empty array []
|
||||||
|
|
||||||
|
INPUT: ${input}
|
||||||
|
|
||||||
|
FORMAT: ["question1?", "question2?"] or []
|
||||||
|
|
||||||
|
CRITICAL: Output ONLY the JSON array.`;
|
||||||
|
}
|
106
src/utils/aiService.ts
Normal file
106
src/utils/aiService.ts
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
interface AIConfig {
|
||||||
|
model: string;
|
||||||
|
endpoint: string;
|
||||||
|
apiKey: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getSelectorConfig(): AIConfig {
|
||||||
|
return {
|
||||||
|
model: process.env.AI_SELECTOR_MODEL || process.env.AI_MODEL || 'claude-sonnet-4-20250514',
|
||||||
|
endpoint: (process.env.AI_SELECTOR_ENDPOINT || process.env.AI_API_ENDPOINT!) + '/v1/messages',
|
||||||
|
apiKey: process.env.AI_SELECTOR_API_KEY || process.env.AI_API_KEY!
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function getAnalyzerConfig(): AIConfig {
|
||||||
|
return {
|
||||||
|
model: process.env.AI_ANALYZER_MODEL || process.env.AI_MODEL || 'claude-sonnet-4-20250514',
|
||||||
|
endpoint: (process.env.AI_ANALYZER_ENDPOINT || process.env.AI_API_ENDPOINT!) + '/v1/messages',
|
||||||
|
apiKey: process.env.AI_ANALYZER_API_KEY || process.env.AI_API_KEY!
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function callAI(config: AIConfig, messages: any[], maxTokens: number = 1000, temperature: number = 0.3) {
|
||||||
|
const requestBody = {
|
||||||
|
model: config.model,
|
||||||
|
max_tokens: maxTokens,
|
||||||
|
temperature,
|
||||||
|
messages
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`[AI API] Calling ${config.model} with ${messages.length} messages, max_tokens: ${maxTokens}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(config.endpoint, {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": `Bearer ${config.apiKey}`
|
||||||
|
},
|
||||||
|
body: JSON.stringify(requestBody)
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
let errorDetails = `${response.status} ${response.statusText}`;
|
||||||
|
try {
|
||||||
|
const errorBody = await response.text();
|
||||||
|
console.error(`[AI API] Error response body:`, errorBody);
|
||||||
|
errorDetails += ` - ${errorBody}`;
|
||||||
|
} catch {
|
||||||
|
console.error(`[AI API] Could not read error response body`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.error(`[AI API] Request failed:`, {
|
||||||
|
endpoint: config.endpoint,
|
||||||
|
model: config.model,
|
||||||
|
messageCount: messages.length,
|
||||||
|
maxTokens,
|
||||||
|
status: response.status,
|
||||||
|
hasApiKey: !!config.apiKey
|
||||||
|
});
|
||||||
|
|
||||||
|
throw new Error(`AI API error: ${response.status} - ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await response.json();
|
||||||
|
|
||||||
|
// Handle different API response formats
|
||||||
|
let content: string | null = null;
|
||||||
|
|
||||||
|
// Mistral API format
|
||||||
|
if (result.content && Array.isArray(result.content) && result.content[0]?.text) {
|
||||||
|
content = result.content[0].text;
|
||||||
|
console.log(`[AI API] Success (Mistral format) - Response length: ${content.length} chars`);
|
||||||
|
return {
|
||||||
|
choices: [{
|
||||||
|
message: {
|
||||||
|
content: content
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Anthropic/OpenAI format
|
||||||
|
if (result.choices && result.choices[0] && result.choices[0].message) {
|
||||||
|
content = result.choices[0].message.content;
|
||||||
|
console.log(`[AI API] Success (OpenAI/Anthropic format) - Response length: ${content?.length || 0} chars`);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If neither format matches, log the structure and fail
|
||||||
|
console.error(`[AI API] Unexpected response structure:`, JSON.stringify(result, null, 2));
|
||||||
|
throw new Error('Unexpected AI API response structure');
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
if (error.message?.includes('AI API error:')) {
|
||||||
|
// Re-throw API errors as-is
|
||||||
|
throw error;
|
||||||
|
} else {
|
||||||
|
// Network or other errors
|
||||||
|
console.error(`[AI API] Network/request error:`, error);
|
||||||
|
throw new Error(`AI API request failed: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export { getSelectorConfig, getAnalyzerConfig };
|
215
src/utils/embeddingsService.ts
Normal file
215
src/utils/embeddingsService.ts
Normal file
@ -0,0 +1,215 @@
|
|||||||
|
interface EmbeddingConfig {
|
||||||
|
endpoint: string;
|
||||||
|
apiKey: string;
|
||||||
|
model: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ToolEmbedding {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
type: 'tool' | 'concept';
|
||||||
|
embedding: number[];
|
||||||
|
content: string;
|
||||||
|
tool: any;
|
||||||
|
similarity?: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
function getEmbeddingConfig(): EmbeddingConfig {
|
||||||
|
return {
|
||||||
|
endpoint: process.env.AI_EMBEDDINGS_ENDPOINT || 'https://api.mistral.ai/v1/embeddings',
|
||||||
|
apiKey: process.env.AI_EMBEDDINGS_API_KEY!,
|
||||||
|
model: process.env.AI_EMBEDDINGS_MODEL || 'mistral-embed'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function isEmbeddingsEnabled(): boolean {
|
||||||
|
return process.env.AI_EMBEDDINGS_ENABLED === 'true';
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function generateEmbedding(text: string): Promise<number[]> {
|
||||||
|
const config = getEmbeddingConfig();
|
||||||
|
|
||||||
|
const response = await fetch(config.endpoint, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${config.apiKey}`
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: config.model,
|
||||||
|
input: [text]
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
let errorDetails = `${response.status} ${response.statusText}`;
|
||||||
|
try {
|
||||||
|
const errorBody = await response.text();
|
||||||
|
errorDetails += ` - ${errorBody}`;
|
||||||
|
} catch {
|
||||||
|
// If we can't read the error body, use basic error info
|
||||||
|
}
|
||||||
|
console.error('Embeddings API error details:', errorDetails);
|
||||||
|
throw new Error(`Embeddings API error: ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
return data.data[0].embedding;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function generateBatchEmbeddings(texts: string[]): Promise<number[][]> {
|
||||||
|
const config = getEmbeddingConfig();
|
||||||
|
const batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20');
|
||||||
|
const delay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000');
|
||||||
|
|
||||||
|
console.log(`[EMBEDDINGS] Processing ${texts.length} items in batches of ${batchSize}`);
|
||||||
|
|
||||||
|
const results: number[][] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < texts.length; i += batchSize) {
|
||||||
|
const batch = texts.slice(i, i + batchSize);
|
||||||
|
const batchNum = Math.floor(i/batchSize) + 1;
|
||||||
|
const totalBatches = Math.ceil(texts.length/batchSize);
|
||||||
|
|
||||||
|
console.log(`[EMBEDDINGS] Processing batch ${batchNum}/${totalBatches} (${batch.length} items)`);
|
||||||
|
|
||||||
|
// Log batch content lengths for debugging
|
||||||
|
const batchLengths = batch.map(text => text.length);
|
||||||
|
const batchTotalLength = batchLengths.reduce((a, b) => a + b, 0);
|
||||||
|
console.log(`[EMBEDDINGS] Batch ${batchNum} total chars: ${batchTotalLength}, avg per item: ${Math.round(batchTotalLength / batch.length)}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await fetch(config.endpoint, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${config.apiKey}`
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: config.model,
|
||||||
|
input: batch
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
let errorDetails = `${response.status} ${response.statusText}`;
|
||||||
|
try {
|
||||||
|
const errorBody = await response.text();
|
||||||
|
errorDetails += ` - ${errorBody}`;
|
||||||
|
} catch {
|
||||||
|
// If we can't read the error body, use basic error info
|
||||||
|
}
|
||||||
|
console.error(`Batch embeddings API error (batch ${i/batchSize + 1}):`, errorDetails);
|
||||||
|
throw new Error(`Batch embeddings API error: ${response.status}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
const batchEmbeddings = data.data.map((item: any) => item.embedding);
|
||||||
|
results.push(...batchEmbeddings);
|
||||||
|
|
||||||
|
// Add delay between batches to avoid rate limiting
|
||||||
|
if (i + batchSize < texts.length && delay > 0) {
|
||||||
|
await new Promise(resolve => setTimeout(resolve, delay));
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Failed to process batch ${i/batchSize + 1}:`, error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[EMBEDDINGS] Successfully processed all ${results.length} embeddings`);
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
function cosineSimilarity(a: number[], b: number[]): number {
|
||||||
|
if (a.length !== b.length) return 0;
|
||||||
|
|
||||||
|
let dotProduct = 0;
|
||||||
|
let normA = 0;
|
||||||
|
let normB = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < a.length; i++) {
|
||||||
|
dotProduct += a[i] * b[i];
|
||||||
|
normA += a[i] * a[i];
|
||||||
|
normB += b[i] * b[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
||||||
|
}
|
||||||
|
|
||||||
|
export function createEmbeddingContent(item: any): string {
|
||||||
|
const maxDescriptionLength = 200;
|
||||||
|
const maxTags = 5;
|
||||||
|
const maxTotalLength = 400;
|
||||||
|
|
||||||
|
// Truncate description if too long
|
||||||
|
const description = item.description?.slice(0, maxDescriptionLength) || '';
|
||||||
|
|
||||||
|
// Limit tags
|
||||||
|
const tags = (item.tags || []).slice(0, maxTags).join(', ');
|
||||||
|
|
||||||
|
let content: string;
|
||||||
|
|
||||||
|
if (item.type === 'concept') {
|
||||||
|
content = `${item.name}: ${description}`;
|
||||||
|
if (tags) content += ` | Tags: ${tags}`;
|
||||||
|
if (item.domains?.length) content += ` | Domains: ${item.domains.slice(0, 3).join(', ')}`;
|
||||||
|
} else {
|
||||||
|
content = `${item.name}: ${description}`;
|
||||||
|
if (tags) content += ` | Tags: ${tags}`;
|
||||||
|
if (item.platforms?.length) content += ` | Platforms: ${item.platforms.slice(0, 3).join(', ')}`;
|
||||||
|
if (item.skillLevel) content += ` | Skill: ${item.skillLevel}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure total length doesn't exceed limit
|
||||||
|
if (content.length > maxTotalLength) {
|
||||||
|
content = content.slice(0, maxTotalLength - 3) + '...';
|
||||||
|
}
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function generateToolEmbeddings(toolsData: any): Promise<ToolEmbedding[]> {
|
||||||
|
const allItems = [
|
||||||
|
...toolsData.tools.map((tool: any) => ({ ...tool, type: 'tool' })),
|
||||||
|
...toolsData.concepts.map((concept: any) => ({ ...concept, type: 'concept' }))
|
||||||
|
];
|
||||||
|
|
||||||
|
console.log(`[EMBEDDINGS] Creating content for ${allItems.length} items`);
|
||||||
|
const contents = allItems.map(createEmbeddingContent);
|
||||||
|
|
||||||
|
// Log content length statistics
|
||||||
|
const lengths = contents.map(c => c.length);
|
||||||
|
const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
|
||||||
|
const maxLength = Math.max(...lengths);
|
||||||
|
console.log(`[EMBEDDINGS] Content lengths - avg: ${Math.round(avgLength)}, max: ${maxLength}`);
|
||||||
|
|
||||||
|
const embeddings = await generateBatchEmbeddings(contents);
|
||||||
|
|
||||||
|
return allItems.map((item, index) => ({
|
||||||
|
id: `${item.type}_${item.name}`,
|
||||||
|
name: item.name,
|
||||||
|
type: item.type,
|
||||||
|
embedding: embeddings[index],
|
||||||
|
content: contents[index],
|
||||||
|
tool: item
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
export async function findSimilarItems(query: string, toolEmbeddings: ToolEmbedding[]): Promise<ToolEmbedding[]> {
|
||||||
|
const maxCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '30');
|
||||||
|
const threshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
|
||||||
|
|
||||||
|
const queryEmbedding = await generateEmbedding(query);
|
||||||
|
|
||||||
|
const similarities = toolEmbeddings.map(item => ({
|
||||||
|
...item,
|
||||||
|
similarity: cosineSimilarity(queryEmbedding, item.embedding)
|
||||||
|
}));
|
||||||
|
|
||||||
|
return similarities
|
||||||
|
.filter(item => item.similarity >= threshold)
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.slice(0, maxCandidates);
|
||||||
|
}
|
@ -1,5 +1,3 @@
|
|||||||
// src/utils/rateLimitedQueue.ts
|
|
||||||
|
|
||||||
import dotenv from "dotenv";
|
import dotenv from "dotenv";
|
||||||
|
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
@ -25,6 +23,11 @@ export interface QueueStatus {
|
|||||||
taskStatus?: string;
|
taskStatus?: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface ChainedTaskResult<T> {
|
||||||
|
stage1Result: any;
|
||||||
|
finalResult: T;
|
||||||
|
}
|
||||||
|
|
||||||
class RateLimitedQueue {
|
class RateLimitedQueue {
|
||||||
private tasks: QueuedTask[] = [];
|
private tasks: QueuedTask[] = [];
|
||||||
private isProcessing = false;
|
private isProcessing = false;
|
||||||
@ -60,6 +63,39 @@ class RateLimitedQueue {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addChained<T>(
|
||||||
|
stage1Task: Task<any>,
|
||||||
|
stage2TaskBuilder: (stage1Result: any) => Task<T>,
|
||||||
|
taskId?: string
|
||||||
|
): Promise<ChainedTaskResult<T>> {
|
||||||
|
const id = taskId || this.generateTaskId();
|
||||||
|
|
||||||
|
return new Promise<ChainedTaskResult<T>>((resolve, reject) => {
|
||||||
|
const chainedTask: QueuedTask = {
|
||||||
|
id,
|
||||||
|
task: async () => {
|
||||||
|
try {
|
||||||
|
const stage1Result = await stage1Task();
|
||||||
|
const stage2Task = stage2TaskBuilder(stage1Result);
|
||||||
|
const finalResult = await stage2Task();
|
||||||
|
|
||||||
|
const result = { stage1Result, finalResult };
|
||||||
|
resolve(result);
|
||||||
|
return result;
|
||||||
|
} catch (err) {
|
||||||
|
reject(err);
|
||||||
|
throw err;
|
||||||
|
}
|
||||||
|
},
|
||||||
|
addedAt: Date.now(),
|
||||||
|
status: 'queued'
|
||||||
|
};
|
||||||
|
|
||||||
|
this.tasks.push(chainedTask);
|
||||||
|
setTimeout(() => this.processQueue(), 100);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
getStatus(taskId?: string): QueueStatus {
|
getStatus(taskId?: string): QueueStatus {
|
||||||
const queuedTasks = this.tasks.filter(t => t.status === 'queued');
|
const queuedTasks = this.tasks.filter(t => t.status === 'queued');
|
||||||
const processingTasks = this.tasks.filter(t => t.status === 'processing');
|
const processingTasks = this.tasks.filter(t => t.status === 'processing');
|
||||||
@ -103,7 +139,6 @@ class RateLimitedQueue {
|
|||||||
const processingOffset = processingTasks.length > 0 ? 1 : 0;
|
const processingOffset = processingTasks.length > 0 ? 1 : 0;
|
||||||
status.currentPosition = processingOffset + positionInQueue + 1;
|
status.currentPosition = processingOffset + positionInQueue + 1;
|
||||||
}
|
}
|
||||||
} else if (task.status === 'completed' || task.status === 'failed') {
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const taskTimestamp = taskId.match(/ai_(\d+)_/)?.[1];
|
const taskTimestamp = taskId.match(/ai_(\d+)_/)?.[1];
|
||||||
@ -152,16 +187,13 @@ class RateLimitedQueue {
|
|||||||
this.currentlyProcessingTaskId = nextTask.id;
|
this.currentlyProcessingTaskId = nextTask.id;
|
||||||
this.lastProcessedAt = Date.now();
|
this.lastProcessedAt = Date.now();
|
||||||
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await nextTask.task();
|
await nextTask.task();
|
||||||
nextTask.status = 'completed';
|
nextTask.status = 'completed';
|
||||||
nextTask.completedAt = Date.now();
|
nextTask.completedAt = Date.now();
|
||||||
console.log(`[QUEUE] Task ${nextTask.id} completed`);
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
nextTask.status = 'failed';
|
nextTask.status = 'failed';
|
||||||
nextTask.completedAt = Date.now();
|
nextTask.completedAt = Date.now();
|
||||||
console.error(`[QUEUE] Task ${nextTask.id} failed:`, error);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
this.currentlyProcessingTaskId = null;
|
this.currentlyProcessingTaskId = null;
|
||||||
@ -169,20 +201,17 @@ class RateLimitedQueue {
|
|||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
const index = this.tasks.findIndex(t => t.id === nextTask.id);
|
const index = this.tasks.findIndex(t => t.id === nextTask.id);
|
||||||
if (index >= 0) {
|
if (index >= 0) {
|
||||||
console.log(`[QUEUE] Removing completed task ${nextTask.id}`);
|
|
||||||
this.tasks.splice(index, 1);
|
this.tasks.splice(index, 1);
|
||||||
}
|
}
|
||||||
}, 10000);
|
}, 10000);
|
||||||
|
|
||||||
const hasMoreQueued = this.tasks.some(t => t.status === 'queued');
|
const hasMoreQueued = this.tasks.some(t => t.status === 'queued');
|
||||||
if (hasMoreQueued) {
|
if (hasMoreQueued) {
|
||||||
console.log(`[QUEUE] Waiting ${this.delayMs}ms before next task`);
|
|
||||||
await new Promise((r) => setTimeout(r, this.delayMs));
|
await new Promise((r) => setTimeout(r, this.delayMs));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
this.isProcessing = false;
|
this.isProcessing = false;
|
||||||
console.log(`[QUEUE] Queue processing finished`);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -197,6 +226,14 @@ export function enqueueApiCall<T>(task: Task<T>, taskId?: string): Promise<T> {
|
|||||||
return queue.add(task, taskId);
|
return queue.add(task, taskId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function enqueueChainedApiCall<T>(
|
||||||
|
stage1Task: Task<any>,
|
||||||
|
stage2TaskBuilder: (stage1Result: any) => Task<T>,
|
||||||
|
taskId?: string
|
||||||
|
): Promise<ChainedTaskResult<T>> {
|
||||||
|
return queue.addChained(stage1Task, stage2TaskBuilder, taskId);
|
||||||
|
}
|
||||||
|
|
||||||
export function getQueueStatus(taskId?: string): QueueStatus {
|
export function getQueueStatus(taskId?: string): QueueStatus {
|
||||||
return queue.getStatus(taskId);
|
return queue.getStatus(taskId);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user