airefactor #19

Merged
mstoeck3 merged 25 commits from airefactor into main 2025-08-17 22:59:31 +00:00
7 changed files with 149 additions and 167 deletions
Showing only changes of commit 2cb25d1dd6 - Show all commits

View File

@ -60,7 +60,7 @@ FORENSIC_AUDIT_MAX_ENTRIES=50
# === AI SEMANTIC SEARCH ===
# Enable semantic search (highly recommended for better results)
AI_EMBEDDINGS_ENABLED=true
REMOVE_AI_EMBEDDINGS_ENABLED=true
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
AI_EMBEDDINGS_API_KEY=your-embeddings-api-key-here
AI_EMBEDDINGS_MODEL=mistral-embed
@ -122,8 +122,8 @@ AI_EMBEDDINGS_BATCH_SIZE=10
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
# === Context Management ===
AI_MAX_CONTEXT_TOKENS=4000
AI_MAX_PROMPT_TOKENS=2500
REMOVE_AI_MAX_CONTEXT_TOKENS=4000
REMOVE_AI_MAX_PROMPT_TOKENS=2500
# === Confidence Scoring ===
CONFIDENCE_SEMANTIC_WEIGHT=0.5

View File

@ -2,17 +2,15 @@
export const AI_PROMPTS = {
toolSelection: (mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number) => {
toolSelection: (mode: string, userQuery: string, maxSelectedItems: number) => {
const modeInstruction = mode === 'workflow'
? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
: 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.
AUSWAHLMETHODE: ${selectionMethod}
${selectionMethod === 'embeddings_candidates' ?
'✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe' :
'✓ Vollständige Datenbank verfügbar\n✓ Wähle die relevantesten Items'}
AUSWAHLMETHODE:
'✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe'}
${modeInstruction}
@ -309,7 +307,7 @@ Antwort: Fließtext ohne Listen, max ${isWorkflow ? '100' : '80'} Wörter.`;
}
} as const;
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number): string;
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, maxSelectedItems: number): string;
export function getPrompt(key: 'toolSelectionWithData', basePrompt: string, toolsToSend: any[], conceptsToSend: any[]): string;
export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuery: string): string;
export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;

View File

@ -11,7 +11,7 @@ import 'dotenv/config';
interface PipelineConfig {
microTaskDelay: number;
maxContextTokens: number;
//maxContextTokens: number;
maxPromptTokens: number;
taskRelevanceModeration: {
maxInitialScore: number;
@ -36,7 +36,7 @@ interface MicroTaskResult {
interface AnalysisResult {
recommendation: any;
processingStats: {
embeddingsUsed: boolean;
//embeddingsUsed: boolean;
candidatesFromEmbeddings: number;
finalSelectedItems: number;
processingTimeMs: number;
@ -57,7 +57,7 @@ interface PipelineContext {
mode: string;
filteredData: any;
contextHistory: string[];
maxContextLength: number;
//maxContextLength: number;
currentContextLength: number;
scenarioAnalysis?: string;
problemAnalysis?: string;
@ -91,7 +91,7 @@ class AIPipeline {
constructor() {
this.config = {
microTaskDelay: parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10),
maxContextTokens: parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10),
//maxContextTokens: parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10),
maxPromptTokens: parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10),
taskRelevanceModeration: {
maxInitialScore: 85,
@ -123,7 +123,7 @@ class AIPipeline {
mode,
filteredData: {},
contextHistory: [],
maxContextLength: this.config.maxContextTokens,
//maxContextLength: this.config.maxContextTokens,
currentContextLength: 0,
seenToolNames: new Set<string>(),
embeddingsSimilarities: new Map<string, number>(),
@ -138,20 +138,20 @@ class AIPipeline {
const selectionConfidence = this.calculateToolSelectionConfidence(
candidateData.tools.length,
toolsData.tools.length,
candidateData.selectionMethod,
//candidateData.selectionMethod,
candidateData.concepts.length
);
auditService.addToolSelection(
candidateData.tools.map(t => t.name),
toolsData.tools.map(t => t.name),
candidateData.selectionMethod,
//candidateData.selectionMethod,
selectionConfidence,
candidateSelectionStart,
{
embeddingsUsed: embeddingsService.isEnabled(),
//embeddingsUsed: embeddingsService.isEnabled(),
totalCandidatesFound: candidateData.tools.length + candidateData.concepts.length,
selectionMethod: candidateData.selectionMethod,
//selectionMethod: candidateData.selectionMethod,
reductionRatio: candidateData.tools.length / toolsData.tools.length
}
);
@ -201,7 +201,7 @@ class AIPipeline {
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
const processingStats = {
embeddingsUsed: embeddingsService.isEnabled(),
//embeddingsUsed: embeddingsService.isEnabled(),
candidatesFromEmbeddings: candidateData.tools.length,
finalSelectedItems: (context.selectedTools?.length || 0) + (context.backgroundKnowledge?.length || 0),
processingTimeMs: Date.now() - startTime,
@ -213,7 +213,7 @@ class AIPipeline {
aiModel: aiConfig.model,
toolsDataHash,
temperature: 0.3,
maxTokensUsed: 2500
maxTokensUsed: 32768
};
console.log('[AI-PIPELINE] Pipeline completed successfully:', {
@ -292,7 +292,7 @@ class AIPipeline {
private calculateToolSelectionConfidence(
selectedCount: number,
totalCount: number,
method: string,
//method: string,
conceptsCount: number
): number {
let confidence = 50;
@ -307,9 +307,9 @@ class AIPipeline {
confidence -= 15;
}
if (method.includes('embeddings')) {
confidence += 15;
}
//if (method.includes('embeddings')) {
//confidence += 15;
//}
if (conceptsCount > 0) {
confidence += 10;
@ -1280,10 +1280,12 @@ class AIPipeline {
context.contextHistory.push(newEntry);
context.currentContextLength += entryTokens;
while (context.currentContextLength > this.config.maxContextTokens && context.contextHistory.length > 1) {
/*while (context.currentContextLength > this.config.maxContextTokens && context.contextHistory.length > 1) {
const removed = context.contextHistory.shift()!;
context.currentContextLength -= aiService.estimateTokens(removed);
}*/
const removed = context.contextHistory.shift()!;
context.currentContextLength -= aiService.estimateTokens(removed);
}
}
private addToolToSelection(

View File

@ -34,7 +34,7 @@ class AIService {
};
this.defaultOptions = {
maxTokens: 1500,
maxTokens: 32768,
temperature: 0.3,
timeout: 30000
};

View File

@ -26,7 +26,7 @@ export interface AuditEntry {
completionTokens?: number;
toolsDataHash?: string;
embeddingsUsed?: boolean;
selectionMethod?: string;
//selectionMethod?: string;
microTaskType?: string;
confidenceFactors?: string[];
reasoning?: string;
@ -146,7 +146,7 @@ class AuditService {
addToolSelection(
selectedTools: string[],
availableTools: string[],
selectionMethod: string,
//selectionMethod: string,
confidence: number,
startTime: number,
metadata: Record<string, any> = {}
@ -154,17 +154,22 @@ class AuditService {
const calculatedConfidence = this.calculateSelectionConfidence(
selectedTools,
availableTools,
selectionMethod,
//selectionMethod,
metadata
);
const decisionBasis =
metadata.embeddingsUsed || metadata.similarityScores
? 'semantic-search'
: (metadata.aiPrompt || metadata.microTaskType ? 'ai-analysis' : 'rule-based');
this.addEntry(
'tool-selection',
'selection-decision',
{
availableTools: availableTools.slice(0, 10),
totalAvailable: availableTools.length,
selectionMethod: selectionMethod
//selectionMethod: selectionMethod
},
{
selectedTools: selectedTools,
@ -174,10 +179,11 @@ class AuditService {
startTime,
{
...metadata,
selectionMethod,
//selectionMethod,
availableToolsCount: availableTools.length,
selectedToolsCount: selectedTools.length,
decisionBasis: selectionMethod.includes('embeddings') ? 'semantic-search' : 'ai-analysis'
//decisionBasis: selectionMethod.includes('embeddings') ? 'semantic-search' : 'ai-analysis'
decisionBasis
}
);
}
@ -282,7 +288,7 @@ class AuditService {
private calculateSelectionConfidence(
selectedTools: string[],
availableTools: string[],
selectionMethod: string,
//selectionMethod: string,
metadata: Record<string, any>
): number {
let confidence = 50;
@ -297,9 +303,9 @@ class AuditService {
confidence -= 20;
}
if (selectionMethod.includes('embeddings')) {
/*if (selectionMethod.includes('embeddings')) {
confidence += 15;
}
}*/
if (selectedTools.length >= 5 && selectedTools.length <= 25) {
confidence += 10;
@ -589,7 +595,8 @@ class AuditService {
}
private inferDecisionBasis(metadata: Record<string, any>): string {
if (metadata.embeddingsUsed || metadata.selectionMethod?.includes('embeddings')) return 'semantic-search';
if (metadata.embeddingsUsed) return 'semantic-search';
//if (metadata.embeddingsUsed || metadata.selectionMethod?.includes('embeddings')) return 'semantic-search';
if (metadata.aiPrompt || metadata.microTaskType) return 'ai-analysis';
if (metadata.semanticQuery && metadata.aiReasoningUsed) return 'hybrid';
return 'rule-based';

View File

@ -31,7 +31,7 @@ interface EmbeddingsDatabase {
}
interface EmbeddingsConfig {
enabled: boolean;
//enabled: boolean;
endpoint?: string;
apiKey?: string;
model?: string;
@ -49,14 +49,14 @@ class EmbeddingsService {
constructor() {
this.config = this.loadConfig();
console.log('[EMBEDDINGS-SERVICE] Initialized:', {
enabled: this.config.enabled,
//enabled: this.config.enabled,
hasEndpoint: !!this.config.endpoint,
hasModel: !!this.config.model
});
}
private loadConfig(): EmbeddingsConfig {
const enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
//const enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
const model = process.env.AI_EMBEDDINGS_MODEL;
@ -64,7 +64,7 @@ class EmbeddingsService {
const batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
return {
enabled,
//enabled,
endpoint,
apiKey,
model,
@ -92,10 +92,10 @@ class EmbeddingsService {
try {
console.log('[EMBEDDINGS-SERVICE] Starting initialization');
if (!this.config.enabled) {
/*if (!this.config.enabled) {
console.log('[EMBEDDINGS-SERVICE] Service disabled via configuration');
return;
}
}*/
await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
@ -263,7 +263,7 @@ class EmbeddingsService {
}
async embedText(text: string): Promise<number[]> {
if (!this.isEnabled() || !this.isInitialized) {
if (!this.isInitialized) {
throw new Error('Embeddings service not available');
}
@ -272,9 +272,9 @@ class EmbeddingsService {
}
async waitForInitialization(): Promise<void> {
if (!this.config.enabled) {
/*if (!this.config.enabled) {
return Promise.resolve();
}
}*/
if (this.isInitialized) {
return Promise.resolve();
@ -303,10 +303,10 @@ class EmbeddingsService {
}
async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<SimilarityResult[]> {
if (!this.config.enabled) {
/*if (!this.config.enabled) {
console.log('[EMBEDDINGS-SERVICE] Service disabled, returning empty results');
return [];
}
}*/
if (!this.isInitialized || this.embeddings.length === 0) {
console.log('[EMBEDDINGS-SERVICE] Not initialized or no embeddings available');
@ -349,16 +349,24 @@ class EmbeddingsService {
}
}
isEnabled(): boolean {
/*isEnabled(): boolean {
return this.config.enabled;
}
}*/
getStats(): { enabled: boolean; initialized: boolean; count: number } {
/*getStats(): { enabled: boolean; initialized: boolean; count: number } {
return {
enabled: this.config.enabled,
initialized: this.isInitialized,
count: this.embeddings.length
};
}*/
getStats(): {initialized: boolean; count: number } {
return {
//enabled: this.config.enabled,
initialized: this.isInitialized,
count: this.embeddings.length
};
}
getConfig(): EmbeddingsConfig {

View File

@ -38,7 +38,7 @@ export interface SelectionContext {
export interface ToolSelectionResult {
selectedTools: any[];
selectedConcepts: any[];
selectionMethod: string;
//selectionMethod: string;
confidence: number;
}
@ -84,13 +84,13 @@ class ToolSelector {
domains: any[];
phases: any[];
'domain-agnostic-software': any[];
selectionMethod: string;
//selectionMethod: string;
}> {
console.log('[TOOL-SELECTOR] Getting intelligent candidates for query');
let candidateTools: any[] = [];
let candidateConcepts: any[] = [];
let selectionMethod = 'unknown';
//let selectionMethod = 'unknown';
context.embeddingsSimilarities.clear();
@ -100,7 +100,6 @@ class ToolSelector {
console.error('[TOOL-SELECTOR] Embeddings initialization failed:', error);
}
if (embeddingsService.isEnabled()) {
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
const similarItems = await embeddingsService.findSimilar(
@ -134,28 +133,23 @@ class ToolSelector {
if (similarTools.length >= this.config.embeddingsMinTools && reductionRatio <= this.config.embeddingsMaxReductionRatio) {
candidateTools = similarTools;
candidateConcepts = similarConcepts;
selectionMethod = 'embeddings_candidates';
//selectionMethod = 'embeddings_candidates';
console.log('[TOOL-SELECTOR] Using embeddings filtering:', totalAvailableTools, '→', similarTools.length, 'tools');
} else {
console.log('[TOOL-SELECTOR] Embeddings filtering insufficient, using full dataset');
candidateTools = toolsData.tools;
candidateConcepts = toolsData.concepts;
selectionMethod = 'full_dataset';
}
} else {
console.log('[TOOL-SELECTOR] Embeddings disabled, using full dataset');
candidateTools = toolsData.tools;
candidateConcepts = toolsData.concepts;
selectionMethod = 'full_dataset';
//selectionMethod = 'full_dataset';
}
const selection = await this.performAISelection(
userQuery,
candidateTools,
candidateConcepts,
mode,
selectionMethod,
//selectionMethod,
context
);
@ -165,7 +159,7 @@ class ToolSelector {
domains: toolsData.domains,
phases: toolsData.phases,
'domain-agnostic-software': toolsData['domain-agnostic-software'],
selectionMethod
//selectionMethod
};
}
@ -174,7 +168,6 @@ class ToolSelector {
candidateTools: any[],
candidateConcepts: any[],
mode: string,
selectionMethod: string,
context: SelectionContext
): Promise<ToolSelectionResult> {
console.log('[TOOL-SELECTOR] Performing AI selection');
@ -188,53 +181,29 @@ class ToolSelector {
const softwareWithFullData = candidateSoftware.map(this.createToolData);
const conceptsWithFullData = candidateConcepts.map(this.createConceptData);
let toolsToSend: any[];
let conceptsToSend: any[];
// Unified selection limits (method-agnostic)
const maxTools = Math.min(this.config.embeddingSelectionLimit, this.config.noEmbeddingsToolLimit);
const maxConcepts = Math.min(this.config.embeddingConceptsLimit, this.config.noEmbeddingsConceptLimit);
const methodLimit = Math.ceil(maxTools * this.config.methodSelectionRatio);
const softwareLimit = Math.floor(maxTools * this.config.softwareSelectionRatio);
if (selectionMethod === 'embeddings_candidates') {
const totalLimit = this.config.embeddingSelectionLimit;
const methodLimit = Math.ceil(totalLimit * this.config.methodSelectionRatio);
const softwareLimit = Math.floor(totalLimit * this.config.softwareSelectionRatio);
toolsToSend = [
// Build tool list to send
const toolsToSend: any[] = [
...methodsWithFullData.slice(0, methodLimit),
...softwareWithFullData.slice(0, softwareLimit)
];
const remainingCapacity = totalLimit - toolsToSend.length;
if (remainingCapacity > 0) {
if (methodsWithFullData.length > methodLimit) {
toolsToSend.push(...methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity));
} else if (softwareWithFullData.length > softwareLimit) {
toolsToSend.push(...softwareWithFullData.slice(softwareLimit, softwareLimit + remainingCapacity));
}
}
conceptsToSend = conceptsWithFullData.slice(0, this.config.embeddingConceptsLimit);
} else {
const maxTools = this.config.noEmbeddingsToolLimit;
const maxConcepts = this.config.noEmbeddingsConceptLimit;
const methodLimit = Math.ceil(maxTools * 0.4);
const softwareLimit = Math.floor(maxTools * 0.5);
toolsToSend = [
...methodsWithFullData.slice(0, methodLimit),
...softwareWithFullData.slice(0, softwareLimit)
...softwareWithFullData.slice(0, softwareLimit),
];
const remainingCapacity = maxTools - toolsToSend.length;
if (remainingCapacity > 0) {
if (methodsWithFullData.length > methodLimit) {
toolsToSend.push(...methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity));
} else if (softwareWithFullData.length > softwareLimit) {
toolsToSend.push(...softwareWithFullData.slice(softwareLimit, softwareLimit + remainingCapacity));
}
// Fill remainder from whichever bucket still has items
const extraMethods = methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity);
const extraSoftware = softwareWithFullData.slice(softwareLimit, softwareLimit + (remainingCapacity - extraMethods.length));
toolsToSend.push(...extraMethods, ...extraSoftware);
}
conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
}
const conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.config.maxSelectedItems);
const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
aiService.validatePromptLength(prompt);
@ -246,7 +215,7 @@ class ToolSelector {
);
try {
const response = await aiService.callAI(prompt, { maxTokens: 2500 });
const response = await aiService.callAI(prompt, { maxTokens: 32768 });
const result = JSONParser.safeParseJSON(response.content, null);
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
@ -275,21 +244,19 @@ class ToolSelector {
console.log('[TOOL-SELECTOR] AI selected:', selectedMethods.length, 'methods,', selectedSoftware.length, 'software,', selectedConcepts.length, 'concepts');
const confidence = confidenceScoring.calculateSelectionConfidence(result, candidateTools.length + candidateConcepts.length);
return {
selectedTools,
selectedConcepts,
selectionMethod,
confidence
};
const confidence = confidenceScoring.calculateSelectionConfidence(
result,
candidateTools.length + candidateConcepts.length
);
return { selectedTools, selectedConcepts, confidence };
} catch (error) {
console.error('[TOOL-SELECTOR] AI selection failed:', error);
throw error;
}
}
async selectToolsForPhase(
userQuery: string,
phase: any,