main #11

Merged
mstoeck3 merged 66 commits from main into forensic-ai 2025-08-11 12:02:56 +00:00
2 changed files with 208 additions and 0 deletions
Showing only changes of commit 3c6fb568d6 - Show all commits

View File

@ -189,6 +189,47 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-S
]`; ]`;
}, },
generatePhaseCompletionPrompt(
originalQuery: string,
phase: any,
candidateTools: any[],
candidateConcepts: any[]
): string {
return `Du bist ein DFIR-Experte. Die Phase "${phase.name}" ist in der aktuellen Analyse unterrepräsentiert.
ORIGINAL ANFRAGE: "${originalQuery}"
PHASE ZU VERVOLLSTÄNDIGEN: ${phase.name} - ${phase.description || ''}
Wähle 1-2 BESTE Tools aus den gefundenen Kandidaten, die diese Phase optimal ergänzen:
VERFÜGBARE TOOLS (${candidateTools.length}):
${candidateTools.map((tool: any) => `
- ${tool.name} (${tool.type})
Beschreibung: ${tool.description.slice(0, 120)}...
Skill Level: ${tool.skillLevel}
`).join('')}
${candidateConcepts.length > 0 ? `
VERFÜGBARE KONZEPTE (${candidateConcepts.length}):
${candidateConcepts.map((concept: any) => `
- ${concept.name}
Beschreibung: ${concept.description.slice(0, 120)}...
`).join('')}
` : ''}
AUSWAHLREGELN:
1. Wähle Tools, die die ${phase.name}-Phase der ursprünglichen Anfrage optimal ergänzen
2. Priorisiere Tools, die zur Gesamtlösung beitragen
3. Maximal 2 Tools für diese Phase
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
{
"selectedTools": ["ToolName1", "ToolName2"],
"selectedConcepts": ["ConceptName1"],
"reasoning": "Kurze Begründung der Auswahl für ${phase.name}"
}`;
},
finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => { finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => {
const focus = isWorkflow ? const focus = isWorkflow ?
'Workflow-Schritte, Best Practices, Objektivität' : 'Workflow-Schritte, Best Practices, Objektivität' :
@ -213,6 +254,7 @@ export function getPrompt(key: 'phaseToolSelection', userQuery: string, phase: a
export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number, taskRelevance: number): string; export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number, taskRelevance: number): string;
export function getPrompt(key: 'backgroundKnowledgeSelection', userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]): string; export function getPrompt(key: 'backgroundKnowledgeSelection', userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]): string;
export function getPrompt(key: 'finalRecommendations', isWorkflow: boolean, userQuery: string, selectedToolNames: string[]): string; export function getPrompt(key: 'finalRecommendations', isWorkflow: boolean, userQuery: string, selectedToolNames: string[]): string;
export function getPrompt(key: 'generatePhaseCompletionPrompt', originalQuery: string, phase: any, candidateTools: any[], candidateConcepts: any[]): string;
export function getPrompt(promptKey: keyof typeof AI_PROMPTS, ...args: any[]): string { export function getPrompt(promptKey: keyof typeof AI_PROMPTS, ...args: any[]): string {
try { try {
const promptFunction = AI_PROMPTS[promptKey]; const promptFunction = AI_PROMPTS[promptKey];

View File

@ -1096,6 +1096,168 @@ class ImprovedMicroTaskAIPipeline {
return result; return result;
} }
private async completeUnderrepresentedPhases(
context: AnalysisContext,
toolsData: any,
originalQuery: string
): Promise<void> {
const phases = toolsData.phases || [];
const selectedPhases = new Map<string, number>();
// Count tools per phase from current selection
context.selectedTools?.forEach(st => {
const count = selectedPhases.get(st.phase) || 0;
selectedPhases.set(st.phase, count + 1);
});
console.log(`[AI PIPELINE] Phase coverage analysis:`);
phases.forEach(phase => {
const count = selectedPhases.get(phase.id) || 0;
console.log(`[AI PIPELINE] ${phase.id}: ${count} tools`);
});
// Define phase-specific semantic queries
const phaseQueryTemplates = {
'data-collection': 'forensic data acquisition imaging memory disk capture evidence collection',
'examination': 'forensic analysis parsing extraction artifact examination file system',
'analysis': 'forensic correlation timeline analysis pattern detection investigation',
'reporting': 'forensic report documentation case management collaboration presentation findings'
};
// Identify underrepresented phases (0 tools = missing, 1 tool = underrepresented)
const underrepresentedPhases = phases.filter(phase => {
const count = selectedPhases.get(phase.id) || 0;
return count <= 1; // Missing (0) or underrepresented (1)
});
if (underrepresentedPhases.length === 0) {
console.log(`[AI PIPELINE] All phases adequately represented, no completion needed`);
return;
}
console.log(`[AI PIPELINE] Underrepresented phases: ${underrepresentedPhases.map(p => p.id).join(', ')}`);
// Process each underrepresented phase
for (const phase of underrepresentedPhases) {
await this.completePhaseWithSemanticSearch(context, phase, phaseQueryTemplates, toolsData, originalQuery);
await this.delay(this.microTaskDelay);
}
}
private async completePhaseWithSemanticSearch(
context: AnalysisContext,
phase: any,
phaseQueryTemplates: Record<string, string>,
toolsData: any,
originalQuery: string
): Promise<void> {
const phaseStart = Date.now();
// Generate phase-specific semantic query
const phaseQuery = phaseQueryTemplates[phase.id] || `forensic ${phase.name.toLowerCase()} tools methods`;
console.log(`[AI PIPELINE] Completing phase ${phase.id} with query: "${phaseQuery}"`);
try {
// Run semantic search with phase-specific query
const phaseResults = await embeddingsService.findSimilar(
phaseQuery,
20, // Smaller set for phase completion
0.2 // Lower threshold for more results
);
if (phaseResults.length === 0) {
console.log(`[AI PIPELINE] No semantic results for phase ${phase.id}`);
return;
}
// Filter to tools that actually belong to this phase
const toolsMap = new Map<string, any>(toolsData.tools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map<string, any>(toolsData.concepts.map((concept: any) => [concept.name, concept]));
const phaseTools = phaseResults
.filter(result => result.type === 'tool')
.map(result => toolsMap.get(result.name))
.filter((tool): tool is any =>
tool !== undefined &&
tool.phases &&
tool.phases.includes(phase.id) &&
!context.seenToolNames.has(tool.name) // Don't re-select already chosen tools
)
.slice(0, 5); // Top 5 candidates for this phase
const phaseConcepts = phaseResults
.filter(result => result.type === 'concept')
.map(result => conceptsMap.get(result.name))
.filter((concept): concept is any => concept !== undefined)
.slice(0, 2); // Top 2 concepts
console.log(`[AI PIPELINE] Phase ${phase.id} semantic search found: ${phaseTools.length} tools, ${phaseConcepts.length} concepts`);
if (phaseTools.length === 0) {
console.log(`[AI PIPELINE] No suitable tools found for phase ${phase.id} after filtering`);
return;
}
const prompt = AI_PROMPTS.generatePhaseCompletionPrompt(originalQuery, phase, phaseTools, phaseConcepts);
const response = await this.callAI(prompt, 800);
const selection = this.safeParseJSON(response, { selectedTools: [], selectedConcepts: [] });
const validTools = selection.selectedTools
.map(name => phaseTools.find(t => t.name === name))
.filter((tool): tool is any => tool !== undefined)
.slice(0, 2);
validTools.forEach(tool => {
console.log(`[AI PIPELINE] Adding phase completion tool: ${tool.name} for ${phase.id}`);
this.addToolToSelection(
context,
tool,
phase.id,
'medium', // Phase completion tools get medium priority
`Hinzugefügt zur Vervollständigung der ${phase.name}-Phase`,
75, // Good relevance for phase-specific search
['Via phasenspezifische semantische Suche hinzugefügt']
);
});
// Audit the phase completion
this.addAuditEntry(context, 'validation', 'phase-completion',
{
phase: phase.id,
phaseQuery,
candidatesFound: phaseTools.length,
originalQuery: originalQuery.slice(0, 100) + '...'
},
{
toolsAdded: validTools.length,
addedTools: validTools.map(t => t.name),
semanticResults: phaseResults.length
},
validTools.length > 0 ? 80 : 40,
phaseStart,
{
phaseCompletion: true,
semanticSearch: true,
originalQueryBias: true
}
);
} catch (error) {
console.error(`[AI PIPELINE] Phase completion failed for ${phase.id}:`, error);
this.addAuditEntry(context, 'validation', 'phase-completion-failed',
{ phase: phase.id, phaseQuery },
{ error: error.message },
10,
phaseStart,
{ phaseCompletion: true, failed: true }
);
}
}
private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> { private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
const existingSelection = context.selectedTools?.find(st => st.tool.name === tool.name); const existingSelection = context.selectedTools?.find(st => st.tool.name === tool.name);
const taskRelevance = existingSelection?.taskRelevance || 70; const taskRelevance = existingSelection?.taskRelevance || 70;
@ -1373,6 +1535,10 @@ class ImprovedMicroTaskAIPipeline {
if (toolSelectionResult.success) completeTasks++; else failedTasks++; if (toolSelectionResult.success) completeTasks++; else failedTasks++;
await this.delay(this.microTaskDelay); await this.delay(this.microTaskDelay);
} }
console.log('[AI PIPELINE] Checking for underrepresented phases...');
await this.completeUnderrepresentedPhases(context, toolsData, userQuery);
} else { } else {
const topTools = filteredData.tools.slice(0, 3); const topTools = filteredData.tools.slice(0, 3);
for (let i = 0; i < topTools.length; i++) { for (let i = 0; i < topTools.length; i++) {