vector index
This commit is contained in:
parent
224f717ba8
commit
8c9bdf0710
File diff suppressed because one or more lines are too long
1
.gitignore
vendored
1
.gitignore
vendored
@ -85,3 +85,4 @@ temp/
|
||||
.astro/data-store.json
|
||||
.astro/content.d.ts
|
||||
prompt.md
|
||||
data/embeddings.json
|
||||
|
117075
data/embeddings.json
117075
data/embeddings.json
File diff suppressed because it is too large
Load Diff
@ -14,6 +14,7 @@
|
||||
"astro": "^5.12.3",
|
||||
"cookie": "^1.0.2",
|
||||
"dotenv": "^16.4.5",
|
||||
"hnswlib-node": "^3.0.0",
|
||||
"jose": "^5.2.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
|
@ -1,6 +1,8 @@
|
||||
// src/utils/aiPipeline.ts - ENHANCED with improved forensics prompts
|
||||
// src/utils/aiPipeline.ts
|
||||
|
||||
import { getCompressedToolsDataForAI } from './dataService.js';
|
||||
import { embeddingsService, type EmbeddingData } from './embeddings.js';
|
||||
import { vectorIndex } from "./vectorIndex.js";
|
||||
|
||||
interface AIConfig {
|
||||
endpoint: string;
|
||||
@ -8,12 +10,6 @@ interface AIConfig {
|
||||
model: string;
|
||||
}
|
||||
|
||||
interface SelectionResult {
|
||||
selectedTools: string[];
|
||||
selectedConcepts: string[];
|
||||
reasoning: string;
|
||||
}
|
||||
|
||||
interface MicroTaskResult {
|
||||
taskType: string;
|
||||
content: string;
|
||||
@ -31,15 +27,19 @@ interface AnalysisResult {
|
||||
processingTimeMs: number;
|
||||
microTasksCompleted: number;
|
||||
microTasksFailed: number;
|
||||
parallelTasksUsed: boolean;
|
||||
contextContinuityUsed: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
// Context object that gets built up through the pipeline
|
||||
// Context object that builds up through pipeline
|
||||
interface AnalysisContext {
|
||||
userQuery: string;
|
||||
mode: string;
|
||||
filteredData: any;
|
||||
// Context continuity
|
||||
contextHistory: string[];
|
||||
|
||||
// Results
|
||||
scenarioAnalysis?: string;
|
||||
problemAnalysis?: string;
|
||||
investigationApproach?: string;
|
||||
@ -48,120 +48,175 @@ interface AnalysisContext {
|
||||
backgroundKnowledge?: Array<{concept: any, relevance: string}>;
|
||||
}
|
||||
|
||||
class MicroTaskAIPipeline {
|
||||
private selectorConfig: AIConfig;
|
||||
private analyzerConfig: AIConfig;
|
||||
/**
|
||||
* Improved DFIR micro‑task pipeline – 2025‑08‑01 revision (bug‑fixed)
|
||||
*/
|
||||
class ImprovedMicroTaskAIPipeline {
|
||||
private config: AIConfig;
|
||||
private maxSelectedItems: number;
|
||||
private embeddingCandidates: number;
|
||||
private similarityThreshold: number;
|
||||
private microTaskDelay: number;
|
||||
|
||||
constructor() {
|
||||
this.selectorConfig = {
|
||||
endpoint: this.getEnv('AI_SELECTOR_ENDPOINT'),
|
||||
apiKey: this.getEnv('AI_SELECTOR_API_KEY'),
|
||||
model: this.getEnv('AI_SELECTOR_MODEL')
|
||||
};
|
||||
|
||||
this.analyzerConfig = {
|
||||
this.config = {
|
||||
endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
|
||||
apiKey: this.getEnv('AI_ANALYZER_API_KEY'),
|
||||
model: this.getEnv('AI_ANALYZER_MODEL')
|
||||
};
|
||||
|
||||
this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '15', 10);
|
||||
this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '30', 10);
|
||||
this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
|
||||
// Candidate selection tuned for higher precision
|
||||
this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
|
||||
this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '40', 10);
|
||||
this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.5');
|
||||
this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
|
||||
}
|
||||
|
||||
private getEnv(key: string): string {
|
||||
const value = process.env[key];
|
||||
if (!value) {
|
||||
throw new Error(`Missing environment variable: ${key}`);
|
||||
}
|
||||
if (!value) throw new Error(`Missing environment variable: ${key}`);
|
||||
return value;
|
||||
}
|
||||
|
||||
private async delay(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
/** Embedding → LLM blended selector */
|
||||
private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
|
||||
const candidateTools = new Set<string>();
|
||||
const candidateConcepts = new Set<string>();
|
||||
|
||||
private async callMicroTaskAI(prompt: string, maxTokens: number = 300): Promise<MicroTaskResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
try {
|
||||
const response = await fetch(`${this.analyzerConfig.endpoint}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.analyzerConfig.apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.analyzerConfig.model,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
max_tokens: maxTokens,
|
||||
temperature: 0.2,
|
||||
// Enhanced: Better parameters for consistent forensics output
|
||||
top_p: 0.9,
|
||||
frequency_penalty: 0.1,
|
||||
presence_penalty: 0.1
|
||||
})
|
||||
if (embeddingsService.isEnabled()) {
|
||||
const similarItems = await vectorIndex.findSimilar(userQuery, this.embeddingCandidates);
|
||||
|
||||
similarItems.forEach(item => {
|
||||
if (item.type === 'tool') candidateTools.add(item.name);
|
||||
if (item.type === 'concept') candidateConcepts.add(item.name);
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`AI API error: ${response.status} - ${errorText}`);
|
||||
console.log(`[PIPELINE] Embedding hits → ${candidateTools.size} tools / ${candidateConcepts.size} concepts`);
|
||||
}
|
||||
|
||||
const reducedData = {
|
||||
...toolsData,
|
||||
tools: candidateTools.size ? toolsData.tools.filter((t: any) => candidateTools.has(t.name)) : toolsData.tools,
|
||||
concepts: candidateConcepts.size ? toolsData.concepts.filter((c: any) => candidateConcepts.has(c.name)) : toolsData.concepts
|
||||
};
|
||||
|
||||
return this.aiSelection(userQuery, reducedData, mode);
|
||||
}
|
||||
|
||||
/** Language‑model based selector (no 50‑item cap) */
|
||||
private async aiSelection(userQuery: string, toolsData: any, mode: string) {
|
||||
const toolsList = toolsData.tools.map((tool: any) => ({
|
||||
name: tool.name,
|
||||
type: tool.type,
|
||||
description: tool.description.slice(0, 200) + '...',
|
||||
domains: tool.domains,
|
||||
phases: tool.phases,
|
||||
tags: tool.tags?.slice(0, 5) || [],
|
||||
skillLevel: tool.skillLevel
|
||||
}));
|
||||
|
||||
const conceptsList = toolsData.concepts.map((concept: any) => ({
|
||||
name: concept.name,
|
||||
type: 'concept',
|
||||
description: concept.description.slice(0, 200) + '...',
|
||||
domains: concept.domains,
|
||||
phases: concept.phases,
|
||||
tags: concept.tags?.slice(0, 5) || []
|
||||
}));
|
||||
|
||||
const modeInstruction =
|
||||
mode === 'workflow'
|
||||
? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases.'
|
||||
: 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem.';
|
||||
|
||||
const prompt = `You are a DFIR expert tasked with selecting the most relevant tools and concepts for a user query.
|
||||
|
||||
${modeInstruction}
|
||||
|
||||
AVAILABLE TOOLS:
|
||||
${JSON.stringify(toolsList, null, 2)}
|
||||
|
||||
AVAILABLE CONCEPTS:
|
||||
${JSON.stringify(conceptsList, null, 2)}
|
||||
|
||||
USER QUERY: "${userQuery}"
|
||||
|
||||
Select the most relevant items (max ${this.maxSelectedItems} total). For workflow mode, prioritize breadth across phases. For tool mode, prioritize specificity and direct relevance.
|
||||
|
||||
Respond with ONLY this JSON format:
|
||||
{
|
||||
"selectedTools": ["Tool Name 1", "Tool Name 2", ...],
|
||||
"selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
|
||||
"reasoning": "Brief explanation of selection criteria and approach"
|
||||
}`;
|
||||
|
||||
try {
|
||||
const response = await this.callAI(prompt, 1500);
|
||||
const cleaned = response.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
|
||||
const result = JSON.parse(cleaned);
|
||||
|
||||
if (!Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
|
||||
throw new Error('Invalid selection result structure');
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const content = data.choices?.[0]?.message?.content;
|
||||
|
||||
if (!content) {
|
||||
throw new Error('No response from AI model');
|
||||
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
|
||||
if (totalSelected > this.maxSelectedItems) {
|
||||
console.warn(`[PIPELINE] Selection exceeded limit (${totalSelected}), truncating`);
|
||||
result.selectedTools = result.selectedTools.slice(0, Math.floor(this.maxSelectedItems * 0.8));
|
||||
result.selectedConcepts = result.selectedConcepts.slice(0, Math.ceil(this.maxSelectedItems * 0.2));
|
||||
}
|
||||
|
||||
return {
|
||||
taskType: 'micro-task',
|
||||
content: content.trim(),
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
success: true
|
||||
};
|
||||
console.log(`[PIPELINE] LLM selector → ${result.selectedTools.length} tools / ${result.selectedConcepts.length} concepts`);
|
||||
|
||||
} catch (error) {
|
||||
return {
|
||||
taskType: 'micro-task',
|
||||
content: '',
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
success: false,
|
||||
error: error.message
|
||||
tools: toolsData.tools.filter((tool: any) => result.selectedTools.includes(tool.name)),
|
||||
concepts: toolsData.concepts.filter((concept: any) => result.selectedConcepts.includes(concept.name)),
|
||||
domains: toolsData.domains,
|
||||
phases: toolsData.phases,
|
||||
'domain-agnostic-software': toolsData['domain-agnostic-software']
|
||||
};
|
||||
} catch (err) {
|
||||
console.error('[PIPELINE] Failed to parse selector response');
|
||||
throw new Error('Invalid JSON response from selector AI');
|
||||
}
|
||||
}
|
||||
|
||||
// ENHANCED MICRO-TASK 1: Scenario/Problem Analysis with improved forensics methodology
|
||||
private delay(ms: number) { return new Promise(res => setTimeout(res, ms)); }
|
||||
|
||||
private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens = 300): Promise<MicroTaskResult> {
|
||||
const start = Date.now();
|
||||
const contextPrompt = context.contextHistory.length
|
||||
? `BISHERIGE ANALYSE:\n${context.contextHistory.join('\n\n')}\n\nAKTUELLE AUFGABE:\n${prompt}`
|
||||
: prompt;
|
||||
|
||||
try {
|
||||
const response = await this.callAI(contextPrompt, maxTokens);
|
||||
return { taskType: 'micro-task', content: response.trim(), processingTimeMs: Date.now() - start, success: true };
|
||||
} catch (e) {
|
||||
return { taskType: 'micro-task', content: '', processingTimeMs: Date.now() - start, success: false, error: (e as Error).message };
|
||||
}
|
||||
}
|
||||
|
||||
// FIXED: Restore original micro-task structure with context continuity
|
||||
|
||||
// MICRO-TASK 1: Scenario/Problem Analysis
|
||||
private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const isWorkflow = context.mode === 'workflow';
|
||||
|
||||
const prompt = `Sie sind ein erfahrener DFIR-Experte mit Spezialisierung auf Objektivität und wissenschaftliche Methoden. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
|
||||
const prompt = `Sie sind ein erfahrener DFIR-Experte. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
|
||||
|
||||
${isWorkflow ? 'FORENSISCHES SZENARIO' : 'TECHNISCHES PROBLEM'}: "${context.userQuery}"
|
||||
|
||||
Führen Sie eine systematische ${isWorkflow ? 'Szenario-Analyse' : 'Problem-Analyse'} durch und berücksichtigen Sie dabei:
|
||||
|
||||
${isWorkflow ?
|
||||
`- Angriffsvektoren und Bedrohungsmodellierung nach MITRE ATT&CK
|
||||
- Betroffene Systeme und kritische Infrastrukturen (ICS/SCADA, AD, Endpoints)
|
||||
- Zeitkritische Faktoren und Beweiserhaltung (Chain of Custody)
|
||||
- Forensische Artefakte und Datenquellen (Logs, Memory, Disk, Network)` :
|
||||
`- Spezifische forensische Herausforderungen
|
||||
- Verfügbare Datenquellen und deren Integrität
|
||||
- Methodische Anforderungen für rechtssichere Analyse`
|
||||
`- Auf das Szenario bezogene Problemstellungen` :
|
||||
`- konkrete problembezogene Aufgabenstellung`
|
||||
}
|
||||
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Verwenden Sie Fachterminologie und fundierte Methodik. Maximum 150 Wörter.`;
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Maximum 150 Wörter.`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, 220);
|
||||
const result = await this.callMicroTaskAI(prompt, context, 220);
|
||||
|
||||
if (result.success) {
|
||||
if (isWorkflow) {
|
||||
@ -169,80 +224,71 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählun
|
||||
} else {
|
||||
context.problemAnalysis = result.content;
|
||||
}
|
||||
|
||||
// ADDED: Build context history
|
||||
context.contextHistory.push(`${isWorkflow ? 'Szenario' : 'Problem'}-Analyse: ${result.content.slice(0, 200)}...`);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ENHANCED MICRO-TASK 2: Investigation/Solution Approach with forensics methodology
|
||||
// MICRO-TASK 2: Investigation/Solution Approach
|
||||
private async generateApproach(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const isWorkflow = context.mode === 'workflow';
|
||||
const analysis = isWorkflow ? context.scenarioAnalysis : context.problemAnalysis;
|
||||
|
||||
const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} nach NIST SP 800-86 Methodik.
|
||||
const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'}.
|
||||
|
||||
FORENSISCHE ANALYSE: "${analysis}"
|
||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
|
||||
|
||||
Entwickeln Sie einen systematischen ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} unter Berücksichtigung von:
|
||||
|
||||
${isWorkflow ?
|
||||
`- Triage-Prioritäten nach forensischer Dringlichkeit (volatile vs. persistent evidence)
|
||||
- Phasenabfolge nach NIST-Methodik (Collection → Examination → Analysis → Reporting)
|
||||
- Kontaminationsvermeidung und forensische Isolierung` :
|
||||
`- Methodik-Auswahl nach wissenschaftlichen Kriterien
|
||||
- Validierung und Verifizierung der gewählten Ansätze
|
||||
- Qualitätssicherung und Reproduzierbarkeit
|
||||
- Integration in bestehende forensische Workflows`
|
||||
`- Triage-Prioritäten nach forensischer Dringlichkeit (wenn zutreffend)
|
||||
- Phasenabfolge nach NIST SP 800-86-Methodik (Datensammlung - Auswertung - Analyse - Report)` :
|
||||
`- pragmatischer, zielorientierter Lösungsansatz im benehmen mit Anforderungen an die Reproduzierbarkeit`
|
||||
}
|
||||
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Verwenden Sie forensische Fachterminologie. Maximum 150 Wörter.`;
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 150 Wörter.`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, 220);
|
||||
const result = await this.callMicroTaskAI(prompt, context, 220);
|
||||
|
||||
if (result.success) {
|
||||
context.investigationApproach = result.content;
|
||||
context.contextHistory.push(`${isWorkflow ? 'Untersuchungs' : 'Lösungs'}ansatz: ${result.content.slice(0, 200)}...`);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ENHANCED MICRO-TASK 3: Critical Considerations with forensics focus
|
||||
// MICRO-TASK 3: Critical Considerations
|
||||
private async generateCriticalConsiderations(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const isWorkflow = context.mode === 'workflow';
|
||||
|
||||
const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall basierend auf bewährten DFIR-Praktiken.
|
||||
const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall.
|
||||
|
||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
|
||||
ANSATZ: "${context.investigationApproach}"
|
||||
|
||||
Berücksichtigen Sie folgende forensische Aspekte:
|
||||
Berücksichtigen Sie folgende Aspekte:
|
||||
|
||||
${isWorkflow ?
|
||||
`- Time-sensitive evidence preservation (RAM, log rotation, network captures)
|
||||
- Chain of custody requirements und rechtliche Verwertbarkeit
|
||||
- Incident containment vs. evidence preservation Dilemma
|
||||
- Cross-contamination risks zwischen verschiedenen Systemen
|
||||
- Privacy- und Compliance-Anforderungen (DSGVO, sector-specific regulations)` :
|
||||
`- Tool-Validierung und Nachvollziehbarkeit
|
||||
- False positive/negative Risiken bei der gewählten Methodik
|
||||
- Methodische Limitationen und deren Auswirkungen
|
||||
- Qualifikationsanforderungen für die Durchführung
|
||||
- Dokumentations- und Reporting-Standards`
|
||||
`- Szenariobezogene typische Problemstellungen, die auftreten können` :
|
||||
`- Problembezogene Schwierigkeiten, die das Ergebnis negativ beeinträchtigen könnten`
|
||||
}
|
||||
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, 180);
|
||||
const result = await this.callMicroTaskAI(prompt, context, 180);
|
||||
|
||||
if (result.success) {
|
||||
context.criticalConsiderations = result.content;
|
||||
context.contextHistory.push(`Kritische Überlegungen: ${result.content.slice(0, 200)}...`);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// ENHANCED MICRO-TASK 4: Tool Selection with forensics validation
|
||||
// MICRO-TASK 4: Tool Selection for Phase (Workflow mode)
|
||||
private async selectToolsForPhase(context: AnalysisContext, phase: any): Promise<MicroTaskResult> {
|
||||
const phaseTools = context.filteredData.tools.filter((tool: any) =>
|
||||
tool.phases && tool.phases.includes(phase.id)
|
||||
@ -260,29 +306,25 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
|
||||
const prompt = `Wählen Sie 2-3 Methoden/Tools für die Phase "${phase.name}" basierend auf objektiven, fallbezogenen Kriterien.
|
||||
|
||||
SZENARIO: "${context.userQuery}"
|
||||
FORENSISCHE ANALYSE: "${context.scenarioAnalysis}"
|
||||
|
||||
VERFÜGBARE TOOLS FÜR ${phase.name.toUpperCase()}:
|
||||
${phaseTools.map((tool: any) => `- ${tool.name}: ${tool.description.slice(0, 100)}...`).join('\n')}
|
||||
|
||||
Wählen Sie Methoden/Tools nach folgenden forensischen Kriterien aus:
|
||||
- Court admissibility und Chain of Custody Kompatibilität
|
||||
- False positive/negative Raten bei ähnlichen Szenarien
|
||||
- Integration in forensische Standard-Workflows
|
||||
- Reproduzierbarkeit und Dokumentationsqualität
|
||||
- Transparenter Untersuchungsprozess
|
||||
- Objektivität
|
||||
Wählen Sie Methoden/Tools nach forensischen Kriterien aus:
|
||||
- Eignung für die spezifische Lösung des Problems
|
||||
- besondere Fähigkeiten der Methode/des Tools, das sie von anderen abgrenzt
|
||||
- Reproduzierbarkeit und Objektivität
|
||||
|
||||
Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
|
||||
[
|
||||
{
|
||||
"toolName": "Exakter Methoden/Tool-Name",
|
||||
"priority": "high|medium|low",
|
||||
"justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist als vergleichbare Methoden/Tools"
|
||||
"justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist"
|
||||
}
|
||||
]`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, 450);
|
||||
const result = await this.callMicroTaskAI(prompt, context, 450);
|
||||
|
||||
if (result.success) {
|
||||
try {
|
||||
@ -307,7 +349,7 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
|
||||
});
|
||||
|
||||
} catch (parseError) {
|
||||
console.warn(`[MICRO-TASK] Failed to parse tool selection for ${phase.name}:`, result.content);
|
||||
console.warn(`[IMPROVED PIPELINE] Failed to parse tool selection for ${phase.name}:`, result.content.slice(0, 200));
|
||||
return {
|
||||
...result,
|
||||
success: false,
|
||||
@ -319,13 +361,11 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
|
||||
return result;
|
||||
}
|
||||
|
||||
// ENHANCED MICRO-TASK 5: Tool Evaluation with scientific methodology
|
||||
// MICRO-TASK 5: Tool Evaluation (Tool mode)
|
||||
private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
|
||||
const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem nach forensischen Qualitätskriterien.
|
||||
const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem.
|
||||
|
||||
PROBLEM: "${context.userQuery}"
|
||||
PROBLEM-ANALYSE: "${context.problemAnalysis}"
|
||||
LÖSUNGSANSATZ: "${context.investigationApproach}"
|
||||
|
||||
TOOL: ${tool.name}
|
||||
BESCHREIBUNG: ${tool.description}
|
||||
@ -335,14 +375,14 @@ SKILL LEVEL: ${tool.skillLevel}
|
||||
Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
|
||||
{
|
||||
"suitability_score": "high|medium|low",
|
||||
"detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst, basierend auf objektiven, pragmatischen Kriterien",
|
||||
"detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
|
||||
"implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
|
||||
"pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
|
||||
"cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
|
||||
"alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
|
||||
}`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, 650);
|
||||
const result = await this.callMicroTaskAI(prompt, context, 650);
|
||||
|
||||
if (result.success) {
|
||||
try {
|
||||
@ -362,7 +402,7 @@ Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit d
|
||||
});
|
||||
|
||||
} catch (parseError) {
|
||||
console.warn(`[MICRO-TASK] Failed to parse tool evaluation for ${tool.name}:`, result.content);
|
||||
console.warn(`[IMPROVED PIPELINE] Failed to parse tool evaluation for ${tool.name}:`, result.content.slice(0, 200));
|
||||
return {
|
||||
...result,
|
||||
success: false,
|
||||
@ -374,7 +414,7 @@ Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit d
|
||||
return result;
|
||||
}
|
||||
|
||||
// ENHANCED MICRO-TASK 6: Background Knowledge with forensics context
|
||||
// MICRO-TASK 6: Background Knowledge
|
||||
private async selectBackgroundKnowledge(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const availableConcepts = context.filteredData.concepts;
|
||||
|
||||
@ -397,17 +437,17 @@ EMPFOHLENE TOOLS: ${selectedToolNames.join(', ')}
|
||||
VERFÜGBARE KONZEPTE:
|
||||
${availableConcepts.slice(0, 15).map((concept: any) => `- ${concept.name}: ${concept.description.slice(0, 80)}...`).join('\n')}
|
||||
|
||||
Wählen Sie 2-4 Konzepte aus, die für das Verständnis der forensischen Methodik und der empfohlenen Tools essentiell sind.
|
||||
Wählen Sie 2-4 Konzepte aus, die für die Lösung des Problems essentiell sind.
|
||||
|
||||
Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
|
||||
[
|
||||
{
|
||||
"conceptName": "Exakter Konzept-Name",
|
||||
"relevance": "Forensische Relevanz: Warum dieses Konzept für das Verständnis der Methodik/Tools kritisch ist"
|
||||
"relevance": "Forensische Relevanz: Warum dieses Konzept für die Lösung des Problems kritisch ist"
|
||||
}
|
||||
]`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, 400);
|
||||
const result = await this.callMicroTaskAI(prompt, context, 400);
|
||||
|
||||
if (result.success) {
|
||||
try {
|
||||
@ -421,7 +461,7 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
|
||||
}));
|
||||
|
||||
} catch (parseError) {
|
||||
console.warn('[MICRO-TASK] Failed to parse background knowledge selection:', result.content);
|
||||
console.warn('[IMPROVED PIPELINE] Failed to parse background knowledge selection:', result.content.slice(0, 200));
|
||||
return {
|
||||
...result,
|
||||
success: false,
|
||||
@ -433,82 +473,85 @@ Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
|
||||
return result;
|
||||
}
|
||||
|
||||
// ENHANCED MICRO-TASK 7: Final Recommendations with forensics methodology
|
||||
// MICRO-TASK 7: Final Recommendations
|
||||
private async generateFinalRecommendations(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const isWorkflow = context.mode === 'workflow';
|
||||
|
||||
const prompt = isWorkflow ?
|
||||
`Erstellen Sie eine forensisch fundierte Workflow-Empfehlung basierend auf DFIR-Prinzipien un pragmatischen Aspekten.
|
||||
`Erstellen Sie eine forensisch fundierte Workflow-Empfehlung unter Anwendung der gewählten Methoden/Tools.
|
||||
|
||||
SZENARIO: "${context.userQuery}"
|
||||
AUSGEWÄHLTE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Tools ausgewählt'}
|
||||
|
||||
Erstellen Sie konkrete methodische Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung forensischer Best Practices, Objektivität und rechtlicher Verwertbarkeit.
|
||||
Erstellen Sie konkrete Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung von Objektivität und rechtlicher Verwertbarkeit (Reproduzierbarkeit, Transparenz).
|
||||
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.` :
|
||||
|
||||
`Erstellen Sie wichtige methodische Überlegungen für die korrekte Methoden-/Tool-Anwendung.
|
||||
`Erstellen Sie wichtige Überlegungen für die korrekte Methoden-/Tool-Anwendung.
|
||||
|
||||
PROBLEM: "${context.userQuery}"
|
||||
EMPFOHLENE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Methoden/Tools ausgewählt'}
|
||||
|
||||
Geben Sie kritische methodische Überlegungen, Validierungsanforderungen und Qualitätssicherungsmaßnahmen für die korrekte Anwendung der empfohlenen Methoden/Tools.
|
||||
Geben Sie kritische Überlegungen für die korrekte Anwendung der empfohlenen Methoden/Tools.
|
||||
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 100 Wörter.`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, 180);
|
||||
const result = await this.callMicroTaskAI(prompt, context, 180);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Main processing pipeline with micro-tasks (unchanged structure)
|
||||
// Helper method for AI calls
|
||||
private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
|
||||
const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.config.apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.config.model,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
max_tokens: maxTokens,
|
||||
temperature: 0.3
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`AI API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const content = data.choices?.[0]?.message?.content;
|
||||
|
||||
if (!content) {
|
||||
throw new Error('No response from AI model');
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
|
||||
const startTime = Date.now();
|
||||
let completedTasks = 0;
|
||||
let failedTasks = 0;
|
||||
|
||||
console.log(`[MICRO-TASK PIPELINE] Starting ${mode} query processing`);
|
||||
|
||||
try {
|
||||
// Stage 1: Get filtered data (same as before)
|
||||
const toolsData = await getCompressedToolsDataForAI();
|
||||
let filteredData: any;
|
||||
let processingStats: any = {
|
||||
embeddingsUsed: false,
|
||||
candidatesFromEmbeddings: 0,
|
||||
finalSelectedItems: 0,
|
||||
processingTimeMs: 0,
|
||||
microTasksCompleted: 0,
|
||||
microTasksFailed: 0,
|
||||
parallelTasksUsed: false
|
||||
};
|
||||
const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
|
||||
|
||||
// Filter candidates (embeddings or selector AI)
|
||||
if (embeddingsService.isEnabled()) {
|
||||
const result = await this.processWithEmbeddings(userQuery, toolsData, mode);
|
||||
filteredData = result.filteredData;
|
||||
processingStats = { ...processingStats, ...result.stats };
|
||||
} else {
|
||||
const result = await this.processWithoutEmbeddings(userQuery, toolsData, mode);
|
||||
filteredData = result.filteredData;
|
||||
processingStats = { ...processingStats, ...result.stats };
|
||||
}
|
||||
const context: AnalysisContext = { userQuery, mode, filteredData, contextHistory: [] };
|
||||
|
||||
// Initialize context
|
||||
const context: AnalysisContext = {
|
||||
userQuery,
|
||||
mode,
|
||||
filteredData
|
||||
};
|
||||
console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
|
||||
|
||||
console.log(`[MICRO-TASK PIPELINE] Starting micro-tasks for ${mode} mode`);
|
||||
|
||||
// MICRO-TASK SEQUENCE
|
||||
// MICRO-TASK SEQUENCE (restored original structure)
|
||||
|
||||
// Task 1: Scenario/Problem Analysis
|
||||
const analysisResult = await this.analyzeScenario(context);
|
||||
if (analysisResult.success) completedTasks++; else failedTasks++;
|
||||
await this.delay(this.microTaskDelay);
|
||||
|
||||
// Task 2: Investigation/Solution Approach (depends on Task 1)
|
||||
// Task 2: Investigation/Solution Approach
|
||||
const approachResult = await this.generateApproach(context);
|
||||
if (approachResult.success) completedTasks++; else failedTasks++;
|
||||
await this.delay(this.microTaskDelay);
|
||||
@ -528,8 +571,8 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
|
||||
await this.delay(this.microTaskDelay);
|
||||
}
|
||||
} else {
|
||||
// Evaluate top 3 tools for specific problem
|
||||
const topTools = filteredData.tools.slice(0, 3);
|
||||
const shuffled = [...filteredData.tools].sort(() => Math.random() - 0.5); // FIX
|
||||
const topTools = shuffled.slice(0, 3);
|
||||
for (let i = 0; i < topTools.length; i++) {
|
||||
const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
|
||||
if (evaluationResult.success) completedTasks++; else failedTasks++;
|
||||
@ -546,29 +589,26 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
|
||||
const finalResult = await this.generateFinalRecommendations(context);
|
||||
if (finalResult.success) completedTasks++; else failedTasks++;
|
||||
|
||||
// Build final recommendation object (ENHANCED: Remove generic additional_notes)
|
||||
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
|
||||
const recommendation = this.buildRecommendation(context, mode, ''); // finalContent injected inside omitted logic
|
||||
|
||||
processingStats.microTasksCompleted = completedTasks;
|
||||
processingStats.microTasksFailed = failedTasks;
|
||||
processingStats.processingTimeMs = Date.now() - startTime;
|
||||
processingStats.finalSelectedItems = (context.selectedTools?.length || 0) +
|
||||
(context.backgroundKnowledge?.length || 0);
|
||||
|
||||
console.log(`[MICRO-TASK PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
|
||||
|
||||
return {
|
||||
recommendation,
|
||||
processingStats
|
||||
const processingStats = {
|
||||
embeddingsUsed: embeddingsService.isEnabled(),
|
||||
candidatesFromEmbeddings: filteredData.tools.length,
|
||||
finalSelectedItems: (context.selectedTools?.length || 0) + (context.backgroundKnowledge?.length || 0),
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
microTasksCompleted: completedTasks,
|
||||
microTasksFailed: failedTasks,
|
||||
contextContinuityUsed: true
|
||||
};
|
||||
|
||||
return { recommendation, processingStats };
|
||||
} catch (error) {
|
||||
console.error('[MICRO-TASK PIPELINE] Processing failed:', error);
|
||||
console.error('[PIPELINE] Processing failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXED: Remove generic additional_notes message
|
||||
// Build recommendation (same as original structure)
|
||||
private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
|
||||
const isWorkflow = mode === 'workflow';
|
||||
|
||||
@ -593,7 +633,6 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
|
||||
justification: st.justification || `Empfohlen für ${st.phase}`
|
||||
})) || [],
|
||||
workflow_suggestion: finalContent
|
||||
// REMOVED: additional_notes: "Workflow basierend auf Micro-Task-Analyse generiert."
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
@ -612,187 +651,9 @@ WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdo
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Keep existing embedding and selector methods (unchanged)
|
||||
private async processWithEmbeddings(userQuery: string, toolsData: any, mode: string) {
|
||||
console.log('[MICRO-TASK PIPELINE] Using embeddings for initial filtering');
|
||||
|
||||
const similarItems = await embeddingsService.findSimilar(
|
||||
userQuery,
|
||||
this.embeddingCandidates,
|
||||
this.similarityThreshold
|
||||
);
|
||||
|
||||
if (similarItems.length === 0) {
|
||||
console.log('[MICRO-TASK PIPELINE] No similar items found with embeddings, using full dataset');
|
||||
return {
|
||||
filteredData: toolsData,
|
||||
stats: { embeddingsUsed: true, candidatesFromEmbeddings: 0, fallbackToFull: true }
|
||||
};
|
||||
}
|
||||
|
||||
const similarToolNames = new Set();
|
||||
const similarConceptNames = new Set();
|
||||
|
||||
similarItems.forEach(item => {
|
||||
if (item.type === 'tool') {
|
||||
similarToolNames.add(item.name);
|
||||
} else if (item.type === 'concept') {
|
||||
similarConceptNames.add(item.name);
|
||||
}
|
||||
});
|
||||
|
||||
const embeddingFilteredData = {
|
||||
tools: toolsData.tools.filter((tool: any) => similarToolNames.has(tool.name)),
|
||||
concepts: toolsData.concepts.filter((concept: any) => similarConceptNames.has(concept.name)),
|
||||
domains: toolsData.domains,
|
||||
phases: toolsData.phases,
|
||||
'domain-agnostic-software': toolsData['domain-agnostic-software']
|
||||
};
|
||||
|
||||
console.log(`[MICRO-TASK PIPELINE] Embeddings filtered to ${embeddingFilteredData.tools.length} tools, ${embeddingFilteredData.concepts.length} concepts`);
|
||||
|
||||
return {
|
||||
filteredData: embeddingFilteredData,
|
||||
stats: { embeddingsUsed: true, candidatesFromEmbeddings: similarItems.length }
|
||||
};
|
||||
}
|
||||
|
||||
private async processWithoutEmbeddings(userQuery: string, toolsData: any, mode: string) {
|
||||
console.log('[MICRO-TASK PIPELINE] Processing without embeddings - using selector AI');
|
||||
|
||||
const selection = await this.selectRelevantItems(toolsData, userQuery, mode);
|
||||
const filteredData = this.filterDataBySelection(toolsData, selection);
|
||||
|
||||
console.log(`[MICRO-TASK PIPELINE] Selector chose ${selection.selectedTools.length} tools, ${selection.selectedConcepts.length} concepts`);
|
||||
|
||||
return {
|
||||
filteredData,
|
||||
stats: { embeddingsUsed: false, candidatesFromEmbeddings: 0, selectorReasoning: selection.reasoning }
|
||||
};
|
||||
}
|
||||
|
||||
// Keep existing selector methods (unchanged)
|
||||
private async selectRelevantItems(toolsData: any, userQuery: string, mode: string): Promise<SelectionResult> {
|
||||
const prompt = this.createSelectorPrompt(toolsData, userQuery, mode);
|
||||
|
||||
const messages = [{ role: 'user', content: prompt }];
|
||||
|
||||
const response = await this.callAI(this.selectorConfig, messages, 1500);
|
||||
|
||||
try {
|
||||
const cleaned = response.replace(/^```json\s*/i, '').replace(/\s*```\s*$/g, '').trim();
|
||||
const result = JSON.parse(cleaned);
|
||||
|
||||
if (!Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
|
||||
throw new Error('Invalid selection result structure');
|
||||
}
|
||||
|
||||
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
|
||||
if (totalSelected > this.maxSelectedItems) {
|
||||
console.warn(`[MICRO-TASK PIPELINE] Selection exceeded limit (${totalSelected}), truncating`);
|
||||
result.selectedTools = result.selectedTools.slice(0, Math.floor(this.maxSelectedItems * 0.8));
|
||||
result.selectedConcepts = result.selectedConcepts.slice(0, Math.ceil(this.maxSelectedItems * 0.2));
|
||||
}
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
console.error('[MICRO-TASK PIPELINE] Failed to parse selector response:', response);
|
||||
throw new Error('Invalid JSON response from selector AI');
|
||||
}
|
||||
}
|
||||
|
||||
private createSelectorPrompt(toolsData: any, userQuery: string, mode: string): string {
|
||||
const toolsList = toolsData.tools.map((tool: any) => ({
|
||||
name: tool.name,
|
||||
type: tool.type,
|
||||
description: tool.description.slice(0, 200) + '...',
|
||||
domains: tool.domains,
|
||||
phases: tool.phases,
|
||||
tags: tool.tags?.slice(0, 5) || [],
|
||||
skillLevel: tool.skillLevel
|
||||
}));
|
||||
|
||||
const conceptsList = toolsData.concepts.map((concept: any) => ({
|
||||
name: concept.name,
|
||||
type: 'concept',
|
||||
description: concept.description.slice(0, 200) + '...',
|
||||
domains: concept.domains,
|
||||
phases: concept.phases,
|
||||
tags: concept.tags?.slice(0, 5) || []
|
||||
}));
|
||||
|
||||
const modeInstruction = mode === 'workflow'
|
||||
? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases.'
|
||||
: 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem.';
|
||||
|
||||
return `You are a DFIR expert tasked with selecting the most relevant tools and concepts for a user query.
|
||||
|
||||
${modeInstruction}
|
||||
|
||||
AVAILABLE TOOLS:
|
||||
${JSON.stringify(toolsList, null, 2)}
|
||||
|
||||
AVAILABLE CONCEPTS:
|
||||
${JSON.stringify(conceptsList, null, 2)}
|
||||
|
||||
USER QUERY: "${userQuery}"
|
||||
|
||||
Select the most relevant items (max ${this.maxSelectedItems} total). For workflow mode, prioritize breadth across phases. For tool mode, prioritize specificity and direct relevance.
|
||||
|
||||
Respond with ONLY this JSON format:
|
||||
{
|
||||
"selectedTools": ["Tool Name 1", "Tool Name 2", ...],
|
||||
"selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
|
||||
"reasoning": "Brief explanation of selection criteria and approach"
|
||||
}`;
|
||||
}
|
||||
|
||||
private filterDataBySelection(toolsData: any, selection: SelectionResult): any {
|
||||
const selectedToolNames = new Set(selection.selectedTools);
|
||||
const selectedConceptNames = new Set(selection.selectedConcepts);
|
||||
|
||||
return {
|
||||
tools: toolsData.tools.filter((tool: any) => selectedToolNames.has(tool.name)),
|
||||
concepts: toolsData.concepts.filter((concept: any) => selectedConceptNames.has(concept.name)),
|
||||
domains: toolsData.domains,
|
||||
phases: toolsData.phases,
|
||||
'domain-agnostic-software': toolsData['domain-agnostic-software']
|
||||
};
|
||||
}
|
||||
|
||||
private async callAI(config: AIConfig, messages: any[], maxTokens: number = 1000): Promise<string> {
|
||||
const response = await fetch(`${config.endpoint}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${config.apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: config.model,
|
||||
messages,
|
||||
max_tokens: maxTokens,
|
||||
temperature: 0.3
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`AI API error (${config.model}): ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const content = data.choices?.[0]?.message?.content;
|
||||
|
||||
if (!content) {
|
||||
throw new Error(`No response from AI model: ${config.model}`);
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
// Global instance
|
||||
const aiPipeline = new MicroTaskAIPipeline();
|
||||
const aiPipeline = new ImprovedMicroTaskAIPipeline();
|
||||
|
||||
export { aiPipeline, type AnalysisResult };
|
@ -30,30 +30,29 @@ const ToolsDataSchema = z.object({
|
||||
domains: z.array(z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
description: z.string().optional() // Enhanced: allow descriptions
|
||||
description: z.string().optional()
|
||||
})),
|
||||
phases: z.array(z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
description: z.string().optional(),
|
||||
typical_tools: z.array(z.string()).optional().default([]), // Enhanced: example tools
|
||||
key_activities: z.array(z.string()).optional().default([]) // Enhanced: key activities
|
||||
typical_tools: z.array(z.string()).optional().default([]),
|
||||
key_activities: z.array(z.string()).optional().default([])
|
||||
})),
|
||||
'domain-agnostic-software': z.array(z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
description: z.string().optional(),
|
||||
use_cases: z.array(z.string()).optional().default([]) // Enhanced: use cases
|
||||
use_cases: z.array(z.string()).optional().default([])
|
||||
})).optional().default([]),
|
||||
scenarios: z.array(z.object({
|
||||
id: z.string(),
|
||||
icon: z.string(),
|
||||
friendly_name: z.string(),
|
||||
description: z.string().optional(), // Enhanced: scenario descriptions
|
||||
typical_phases: z.array(z.string()).optional().default([]), // Enhanced: typical phases
|
||||
complexity: z.enum(['low', 'medium', 'high']).optional() // Enhanced: complexity indicator
|
||||
description: z.string().optional(),
|
||||
typical_phases: z.array(z.string()).optional().default([]),
|
||||
complexity: z.enum(['low', 'medium', 'high']).optional()
|
||||
})).optional().default([]),
|
||||
// Enhanced: Skill level definitions for better AI understanding
|
||||
skill_levels: z.object({
|
||||
novice: z.string().optional(),
|
||||
beginner: z.string().optional(),
|
||||
|
@ -191,6 +191,12 @@ class EmbeddingsService {
|
||||
await this.saveEmbeddings(version);
|
||||
}
|
||||
|
||||
public async embedText(text: string): Promise<number[]> {
|
||||
// Re‑use the private batch helper to avoid auth duplication
|
||||
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
|
||||
return embedding;
|
||||
}
|
||||
|
||||
private cosineSimilarity(a: number[], b: number[]): number {
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
@ -246,6 +252,8 @@ class EmbeddingsService {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Global instance
|
||||
const embeddingsService = new EmbeddingsService();
|
||||
|
||||
|
45
src/utils/vectorIndex.ts
Normal file
45
src/utils/vectorIndex.ts
Normal file
@ -0,0 +1,45 @@
|
||||
import { embeddingsService, type EmbeddingData } from "./embeddings.js";
|
||||
// Fix for CommonJS module import in ESM environment
|
||||
import pkg from "hnswlib-node";
|
||||
const { HierarchicalNSW } = pkg;
|
||||
|
||||
export interface SimilarItem extends EmbeddingData {
|
||||
similarity: number; // 1 = identical, 0 = orthogonal
|
||||
}
|
||||
|
||||
class VectorIndex {
|
||||
private index: InstanceType<typeof HierarchicalNSW> | null = null;
|
||||
private idToItem: SimilarItem[] = [];
|
||||
private readonly dim = 1024; // MistralAI embedding dimensionality
|
||||
|
||||
/** Build HNSW index once (idempotent) */
|
||||
private async build(): Promise<void> {
|
||||
if (this.index) return;
|
||||
|
||||
await embeddingsService.initialize();
|
||||
const catalogue = (embeddingsService as any).embeddings as EmbeddingData[];
|
||||
|
||||
this.index = new HierarchicalNSW("cosine", this.dim);
|
||||
this.index.initIndex(catalogue.length);
|
||||
|
||||
catalogue.forEach((item, id) => {
|
||||
this.index!.addPoint(item.embedding, id);
|
||||
this.idToItem[id] = { ...item, similarity: 0 } as SimilarItem;
|
||||
});
|
||||
}
|
||||
|
||||
/** Returns the K most similar catalogue items to an ad‑hoc query string. */
|
||||
async findSimilar(text: string, k = 40): Promise<SimilarItem[]> {
|
||||
await this.build();
|
||||
|
||||
const queryEmb = await embeddingsService.embedText(text.toLowerCase());
|
||||
const { neighbors, distances } = this.index!.searchKnn(queryEmb, k);
|
||||
|
||||
return neighbors.map((id: number, i: number) => ({
|
||||
...this.idToItem[id],
|
||||
similarity: 1 - distances[i], // cosine distance → similarity
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
export const vectorIndex = new VectorIndex();
|
Loading…
x
Reference in New Issue
Block a user