remove some env vars

This commit is contained in:
overcuriousity 2025-08-17 18:17:33 +02:00
parent bcd92af8a0
commit 2cb25d1dd6
7 changed files with 149 additions and 167 deletions

View File

@ -60,7 +60,7 @@ FORENSIC_AUDIT_MAX_ENTRIES=50
# === AI SEMANTIC SEARCH === # === AI SEMANTIC SEARCH ===
# Enable semantic search (highly recommended for better results) # Enable semantic search (highly recommended for better results)
AI_EMBEDDINGS_ENABLED=true REMOVE_AI_EMBEDDINGS_ENABLED=true
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
AI_EMBEDDINGS_API_KEY=your-embeddings-api-key-here AI_EMBEDDINGS_API_KEY=your-embeddings-api-key-here
AI_EMBEDDINGS_MODEL=mistral-embed AI_EMBEDDINGS_MODEL=mistral-embed
@ -122,8 +122,8 @@ AI_EMBEDDINGS_BATCH_SIZE=10
AI_EMBEDDINGS_BATCH_DELAY_MS=1000 AI_EMBEDDINGS_BATCH_DELAY_MS=1000
# === Context Management === # === Context Management ===
AI_MAX_CONTEXT_TOKENS=4000 REMOVE_AI_MAX_CONTEXT_TOKENS=4000
AI_MAX_PROMPT_TOKENS=2500 REMOVE_AI_MAX_PROMPT_TOKENS=2500
# === Confidence Scoring === # === Confidence Scoring ===
CONFIDENCE_SEMANTIC_WEIGHT=0.5 CONFIDENCE_SEMANTIC_WEIGHT=0.5

View File

@ -2,17 +2,15 @@
export const AI_PROMPTS = { export const AI_PROMPTS = {
toolSelection: (mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number) => { toolSelection: (mode: string, userQuery: string, maxSelectedItems: number) => {
const modeInstruction = mode === 'workflow' const modeInstruction = mode === 'workflow'
? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.' ? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
: 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.'; : 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set. return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.
AUSWAHLMETHODE: ${selectionMethod} AUSWAHLMETHODE:
${selectionMethod === 'embeddings_candidates' ? '✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe'}
'✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe' :
'✓ Vollständige Datenbank verfügbar\n✓ Wähle die relevantesten Items'}
${modeInstruction} ${modeInstruction}
@ -309,7 +307,7 @@ Antwort: Fließtext ohne Listen, max ${isWorkflow ? '100' : '80'} Wörter.`;
} }
} as const; } as const;
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number): string; export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, maxSelectedItems: number): string;
export function getPrompt(key: 'toolSelectionWithData', basePrompt: string, toolsToSend: any[], conceptsToSend: any[]): string; export function getPrompt(key: 'toolSelectionWithData', basePrompt: string, toolsToSend: any[], conceptsToSend: any[]): string;
export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuery: string): string; export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuery: string): string;
export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string; export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;

View File

@ -11,7 +11,7 @@ import 'dotenv/config';
interface PipelineConfig { interface PipelineConfig {
microTaskDelay: number; microTaskDelay: number;
maxContextTokens: number; //maxContextTokens: number;
maxPromptTokens: number; maxPromptTokens: number;
taskRelevanceModeration: { taskRelevanceModeration: {
maxInitialScore: number; maxInitialScore: number;
@ -36,7 +36,7 @@ interface MicroTaskResult {
interface AnalysisResult { interface AnalysisResult {
recommendation: any; recommendation: any;
processingStats: { processingStats: {
embeddingsUsed: boolean; //embeddingsUsed: boolean;
candidatesFromEmbeddings: number; candidatesFromEmbeddings: number;
finalSelectedItems: number; finalSelectedItems: number;
processingTimeMs: number; processingTimeMs: number;
@ -57,7 +57,7 @@ interface PipelineContext {
mode: string; mode: string;
filteredData: any; filteredData: any;
contextHistory: string[]; contextHistory: string[];
maxContextLength: number; //maxContextLength: number;
currentContextLength: number; currentContextLength: number;
scenarioAnalysis?: string; scenarioAnalysis?: string;
problemAnalysis?: string; problemAnalysis?: string;
@ -91,7 +91,7 @@ class AIPipeline {
constructor() { constructor() {
this.config = { this.config = {
microTaskDelay: parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10), microTaskDelay: parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10),
maxContextTokens: parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10), //maxContextTokens: parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10),
maxPromptTokens: parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10), maxPromptTokens: parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10),
taskRelevanceModeration: { taskRelevanceModeration: {
maxInitialScore: 85, maxInitialScore: 85,
@ -123,7 +123,7 @@ class AIPipeline {
mode, mode,
filteredData: {}, filteredData: {},
contextHistory: [], contextHistory: [],
maxContextLength: this.config.maxContextTokens, //maxContextLength: this.config.maxContextTokens,
currentContextLength: 0, currentContextLength: 0,
seenToolNames: new Set<string>(), seenToolNames: new Set<string>(),
embeddingsSimilarities: new Map<string, number>(), embeddingsSimilarities: new Map<string, number>(),
@ -138,20 +138,20 @@ class AIPipeline {
const selectionConfidence = this.calculateToolSelectionConfidence( const selectionConfidence = this.calculateToolSelectionConfidence(
candidateData.tools.length, candidateData.tools.length,
toolsData.tools.length, toolsData.tools.length,
candidateData.selectionMethod, //candidateData.selectionMethod,
candidateData.concepts.length candidateData.concepts.length
); );
auditService.addToolSelection( auditService.addToolSelection(
candidateData.tools.map(t => t.name), candidateData.tools.map(t => t.name),
toolsData.tools.map(t => t.name), toolsData.tools.map(t => t.name),
candidateData.selectionMethod, //candidateData.selectionMethod,
selectionConfidence, selectionConfidence,
candidateSelectionStart, candidateSelectionStart,
{ {
embeddingsUsed: embeddingsService.isEnabled(), //embeddingsUsed: embeddingsService.isEnabled(),
totalCandidatesFound: candidateData.tools.length + candidateData.concepts.length, totalCandidatesFound: candidateData.tools.length + candidateData.concepts.length,
selectionMethod: candidateData.selectionMethod, //selectionMethod: candidateData.selectionMethod,
reductionRatio: candidateData.tools.length / toolsData.tools.length reductionRatio: candidateData.tools.length / toolsData.tools.length
} }
); );
@ -201,7 +201,7 @@ class AIPipeline {
const recommendation = this.buildRecommendation(context, mode, finalResult.content); const recommendation = this.buildRecommendation(context, mode, finalResult.content);
const processingStats = { const processingStats = {
embeddingsUsed: embeddingsService.isEnabled(), //embeddingsUsed: embeddingsService.isEnabled(),
candidatesFromEmbeddings: candidateData.tools.length, candidatesFromEmbeddings: candidateData.tools.length,
finalSelectedItems: (context.selectedTools?.length || 0) + (context.backgroundKnowledge?.length || 0), finalSelectedItems: (context.selectedTools?.length || 0) + (context.backgroundKnowledge?.length || 0),
processingTimeMs: Date.now() - startTime, processingTimeMs: Date.now() - startTime,
@ -213,7 +213,7 @@ class AIPipeline {
aiModel: aiConfig.model, aiModel: aiConfig.model,
toolsDataHash, toolsDataHash,
temperature: 0.3, temperature: 0.3,
maxTokensUsed: 2500 maxTokensUsed: 32768
}; };
console.log('[AI-PIPELINE] Pipeline completed successfully:', { console.log('[AI-PIPELINE] Pipeline completed successfully:', {
@ -292,7 +292,7 @@ class AIPipeline {
private calculateToolSelectionConfidence( private calculateToolSelectionConfidence(
selectedCount: number, selectedCount: number,
totalCount: number, totalCount: number,
method: string, //method: string,
conceptsCount: number conceptsCount: number
): number { ): number {
let confidence = 50; let confidence = 50;
@ -307,9 +307,9 @@ class AIPipeline {
confidence -= 15; confidence -= 15;
} }
if (method.includes('embeddings')) { //if (method.includes('embeddings')) {
confidence += 15; //confidence += 15;
} //}
if (conceptsCount > 0) { if (conceptsCount > 0) {
confidence += 10; confidence += 10;
@ -1280,10 +1280,12 @@ class AIPipeline {
context.contextHistory.push(newEntry); context.contextHistory.push(newEntry);
context.currentContextLength += entryTokens; context.currentContextLength += entryTokens;
while (context.currentContextLength > this.config.maxContextTokens && context.contextHistory.length > 1) { /*while (context.currentContextLength > this.config.maxContextTokens && context.contextHistory.length > 1) {
const removed = context.contextHistory.shift()!; const removed = context.contextHistory.shift()!;
context.currentContextLength -= aiService.estimateTokens(removed); context.currentContextLength -= aiService.estimateTokens(removed);
} }*/
const removed = context.contextHistory.shift()!;
context.currentContextLength -= aiService.estimateTokens(removed);
} }
private addToolToSelection( private addToolToSelection(

View File

@ -34,7 +34,7 @@ class AIService {
}; };
this.defaultOptions = { this.defaultOptions = {
maxTokens: 1500, maxTokens: 32768,
temperature: 0.3, temperature: 0.3,
timeout: 30000 timeout: 30000
}; };

View File

@ -26,7 +26,7 @@ export interface AuditEntry {
completionTokens?: number; completionTokens?: number;
toolsDataHash?: string; toolsDataHash?: string;
embeddingsUsed?: boolean; embeddingsUsed?: boolean;
selectionMethod?: string; //selectionMethod?: string;
microTaskType?: string; microTaskType?: string;
confidenceFactors?: string[]; confidenceFactors?: string[];
reasoning?: string; reasoning?: string;
@ -146,7 +146,7 @@ class AuditService {
addToolSelection( addToolSelection(
selectedTools: string[], selectedTools: string[],
availableTools: string[], availableTools: string[],
selectionMethod: string, //selectionMethod: string,
confidence: number, confidence: number,
startTime: number, startTime: number,
metadata: Record<string, any> = {} metadata: Record<string, any> = {}
@ -154,17 +154,22 @@ class AuditService {
const calculatedConfidence = this.calculateSelectionConfidence( const calculatedConfidence = this.calculateSelectionConfidence(
selectedTools, selectedTools,
availableTools, availableTools,
selectionMethod, //selectionMethod,
metadata metadata
); );
const decisionBasis =
metadata.embeddingsUsed || metadata.similarityScores
? 'semantic-search'
: (metadata.aiPrompt || metadata.microTaskType ? 'ai-analysis' : 'rule-based');
this.addEntry( this.addEntry(
'tool-selection', 'tool-selection',
'selection-decision', 'selection-decision',
{ {
availableTools: availableTools.slice(0, 10), availableTools: availableTools.slice(0, 10),
totalAvailable: availableTools.length, totalAvailable: availableTools.length,
selectionMethod: selectionMethod //selectionMethod: selectionMethod
}, },
{ {
selectedTools: selectedTools, selectedTools: selectedTools,
@ -174,10 +179,11 @@ class AuditService {
startTime, startTime,
{ {
...metadata, ...metadata,
selectionMethod, //selectionMethod,
availableToolsCount: availableTools.length, availableToolsCount: availableTools.length,
selectedToolsCount: selectedTools.length, selectedToolsCount: selectedTools.length,
decisionBasis: selectionMethod.includes('embeddings') ? 'semantic-search' : 'ai-analysis' //decisionBasis: selectionMethod.includes('embeddings') ? 'semantic-search' : 'ai-analysis'
decisionBasis
} }
); );
} }
@ -282,7 +288,7 @@ class AuditService {
private calculateSelectionConfidence( private calculateSelectionConfidence(
selectedTools: string[], selectedTools: string[],
availableTools: string[], availableTools: string[],
selectionMethod: string, //selectionMethod: string,
metadata: Record<string, any> metadata: Record<string, any>
): number { ): number {
let confidence = 50; let confidence = 50;
@ -297,9 +303,9 @@ class AuditService {
confidence -= 20; confidence -= 20;
} }
if (selectionMethod.includes('embeddings')) { /*if (selectionMethod.includes('embeddings')) {
confidence += 15; confidence += 15;
} }*/
if (selectedTools.length >= 5 && selectedTools.length <= 25) { if (selectedTools.length >= 5 && selectedTools.length <= 25) {
confidence += 10; confidence += 10;
@ -589,7 +595,8 @@ class AuditService {
} }
private inferDecisionBasis(metadata: Record<string, any>): string { private inferDecisionBasis(metadata: Record<string, any>): string {
if (metadata.embeddingsUsed || metadata.selectionMethod?.includes('embeddings')) return 'semantic-search'; if (metadata.embeddingsUsed) return 'semantic-search';
//if (metadata.embeddingsUsed || metadata.selectionMethod?.includes('embeddings')) return 'semantic-search';
if (metadata.aiPrompt || metadata.microTaskType) return 'ai-analysis'; if (metadata.aiPrompt || metadata.microTaskType) return 'ai-analysis';
if (metadata.semanticQuery && metadata.aiReasoningUsed) return 'hybrid'; if (metadata.semanticQuery && metadata.aiReasoningUsed) return 'hybrid';
return 'rule-based'; return 'rule-based';

View File

@ -31,7 +31,7 @@ interface EmbeddingsDatabase {
} }
interface EmbeddingsConfig { interface EmbeddingsConfig {
enabled: boolean; //enabled: boolean;
endpoint?: string; endpoint?: string;
apiKey?: string; apiKey?: string;
model?: string; model?: string;
@ -49,14 +49,14 @@ class EmbeddingsService {
constructor() { constructor() {
this.config = this.loadConfig(); this.config = this.loadConfig();
console.log('[EMBEDDINGS-SERVICE] Initialized:', { console.log('[EMBEDDINGS-SERVICE] Initialized:', {
enabled: this.config.enabled, //enabled: this.config.enabled,
hasEndpoint: !!this.config.endpoint, hasEndpoint: !!this.config.endpoint,
hasModel: !!this.config.model hasModel: !!this.config.model
}); });
} }
private loadConfig(): EmbeddingsConfig { private loadConfig(): EmbeddingsConfig {
const enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true'; //const enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT; const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
const apiKey = process.env.AI_EMBEDDINGS_API_KEY; const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
const model = process.env.AI_EMBEDDINGS_MODEL; const model = process.env.AI_EMBEDDINGS_MODEL;
@ -64,7 +64,7 @@ class EmbeddingsService {
const batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10); const batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
return { return {
enabled, //enabled,
endpoint, endpoint,
apiKey, apiKey,
model, model,
@ -92,10 +92,10 @@ class EmbeddingsService {
try { try {
console.log('[EMBEDDINGS-SERVICE] Starting initialization'); console.log('[EMBEDDINGS-SERVICE] Starting initialization');
if (!this.config.enabled) { /*if (!this.config.enabled) {
console.log('[EMBEDDINGS-SERVICE] Service disabled via configuration'); console.log('[EMBEDDINGS-SERVICE] Service disabled via configuration');
return; return;
} }*/
await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true }); await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
@ -263,7 +263,7 @@ class EmbeddingsService {
} }
async embedText(text: string): Promise<number[]> { async embedText(text: string): Promise<number[]> {
if (!this.isEnabled() || !this.isInitialized) { if (!this.isInitialized) {
throw new Error('Embeddings service not available'); throw new Error('Embeddings service not available');
} }
@ -272,9 +272,9 @@ class EmbeddingsService {
} }
async waitForInitialization(): Promise<void> { async waitForInitialization(): Promise<void> {
if (!this.config.enabled) { /*if (!this.config.enabled) {
return Promise.resolve(); return Promise.resolve();
} }*/
if (this.isInitialized) { if (this.isInitialized) {
return Promise.resolve(); return Promise.resolve();
@ -303,10 +303,10 @@ class EmbeddingsService {
} }
async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<SimilarityResult[]> { async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<SimilarityResult[]> {
if (!this.config.enabled) { /*if (!this.config.enabled) {
console.log('[EMBEDDINGS-SERVICE] Service disabled, returning empty results'); console.log('[EMBEDDINGS-SERVICE] Service disabled, returning empty results');
return []; return [];
} }*/
if (!this.isInitialized || this.embeddings.length === 0) { if (!this.isInitialized || this.embeddings.length === 0) {
console.log('[EMBEDDINGS-SERVICE] Not initialized or no embeddings available'); console.log('[EMBEDDINGS-SERVICE] Not initialized or no embeddings available');
@ -349,16 +349,24 @@ class EmbeddingsService {
} }
} }
isEnabled(): boolean { /*isEnabled(): boolean {
return this.config.enabled; return this.config.enabled;
} }*/
getStats(): { enabled: boolean; initialized: boolean; count: number } { /*getStats(): { enabled: boolean; initialized: boolean; count: number } {
return { return {
enabled: this.config.enabled, enabled: this.config.enabled,
initialized: this.isInitialized, initialized: this.isInitialized,
count: this.embeddings.length count: this.embeddings.length
}; };
}*/
getStats(): {initialized: boolean; count: number } {
return {
//enabled: this.config.enabled,
initialized: this.isInitialized,
count: this.embeddings.length
};
} }
getConfig(): EmbeddingsConfig { getConfig(): EmbeddingsConfig {

View File

@ -38,7 +38,7 @@ export interface SelectionContext {
export interface ToolSelectionResult { export interface ToolSelectionResult {
selectedTools: any[]; selectedTools: any[];
selectedConcepts: any[]; selectedConcepts: any[];
selectionMethod: string; //selectionMethod: string;
confidence: number; confidence: number;
} }
@ -84,13 +84,13 @@ class ToolSelector {
domains: any[]; domains: any[];
phases: any[]; phases: any[];
'domain-agnostic-software': any[]; 'domain-agnostic-software': any[];
selectionMethod: string; //selectionMethod: string;
}> { }> {
console.log('[TOOL-SELECTOR] Getting intelligent candidates for query'); console.log('[TOOL-SELECTOR] Getting intelligent candidates for query');
let candidateTools: any[] = []; let candidateTools: any[] = [];
let candidateConcepts: any[] = []; let candidateConcepts: any[] = [];
let selectionMethod = 'unknown'; //let selectionMethod = 'unknown';
context.embeddingsSimilarities.clear(); context.embeddingsSimilarities.clear();
@ -100,62 +100,56 @@ class ToolSelector {
console.error('[TOOL-SELECTOR] Embeddings initialization failed:', error); console.error('[TOOL-SELECTOR] Embeddings initialization failed:', error);
} }
if (embeddingsService.isEnabled()) { console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
const similarItems = await embeddingsService.findSimilar(
userQuery,
this.config.embeddingCandidates,
this.config.similarityThreshold
) as SimilarityResult[];
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
similarItems.forEach(item => {
context.embeddingsSimilarities.set(item.name, item.similarity);
});
const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept]));
const similarTools = similarItems
.filter((item: any) => item.type === 'tool')
.map((item: any) => toolsMap.get(item.name))
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
const similarConcepts = similarItems
.filter((item: any) => item.type === 'concept')
.map((item: any) => conceptsMap.get(item.name))
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
const totalAvailableTools = toolsData.tools.length;
const reductionRatio = similarTools.length / totalAvailableTools;
if (similarTools.length >= this.config.embeddingsMinTools && reductionRatio <= this.config.embeddingsMaxReductionRatio) {
candidateTools = similarTools;
candidateConcepts = similarConcepts;
//selectionMethod = 'embeddings_candidates';
const similarItems = await embeddingsService.findSimilar( console.log('[TOOL-SELECTOR] Using embeddings filtering:', totalAvailableTools, '→', similarTools.length, 'tools');
userQuery,
this.config.embeddingCandidates,
this.config.similarityThreshold
) as SimilarityResult[];
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
similarItems.forEach(item => {
context.embeddingsSimilarities.set(item.name, item.similarity);
});
const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept]));
const similarTools = similarItems
.filter((item: any) => item.type === 'tool')
.map((item: any) => toolsMap.get(item.name))
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
const similarConcepts = similarItems
.filter((item: any) => item.type === 'concept')
.map((item: any) => conceptsMap.get(item.name))
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
const totalAvailableTools = toolsData.tools.length;
const reductionRatio = similarTools.length / totalAvailableTools;
if (similarTools.length >= this.config.embeddingsMinTools && reductionRatio <= this.config.embeddingsMaxReductionRatio) {
candidateTools = similarTools;
candidateConcepts = similarConcepts;
selectionMethod = 'embeddings_candidates';
console.log('[TOOL-SELECTOR] Using embeddings filtering:', totalAvailableTools, '→', similarTools.length, 'tools');
} else {
console.log('[TOOL-SELECTOR] Embeddings filtering insufficient, using full dataset');
candidateTools = toolsData.tools;
candidateConcepts = toolsData.concepts;
selectionMethod = 'full_dataset';
}
} else { } else {
console.log('[TOOL-SELECTOR] Embeddings disabled, using full dataset'); console.log('[TOOL-SELECTOR] Embeddings filtering insufficient, using full dataset');
candidateTools = toolsData.tools; candidateTools = toolsData.tools;
candidateConcepts = toolsData.concepts; candidateConcepts = toolsData.concepts;
selectionMethod = 'full_dataset'; //selectionMethod = 'full_dataset';
} }
const selection = await this.performAISelection( const selection = await this.performAISelection(
userQuery, userQuery,
candidateTools, candidateTools,
candidateConcepts, candidateConcepts,
mode, mode,
selectionMethod, //selectionMethod,
context context
); );
@ -165,7 +159,7 @@ class ToolSelector {
domains: toolsData.domains, domains: toolsData.domains,
phases: toolsData.phases, phases: toolsData.phases,
'domain-agnostic-software': toolsData['domain-agnostic-software'], 'domain-agnostic-software': toolsData['domain-agnostic-software'],
selectionMethod //selectionMethod
}; };
} }
@ -174,81 +168,56 @@ class ToolSelector {
candidateTools: any[], candidateTools: any[],
candidateConcepts: any[], candidateConcepts: any[],
mode: string, mode: string,
selectionMethod: string,
context: SelectionContext context: SelectionContext
): Promise<ToolSelectionResult> { ): Promise<ToolSelectionResult> {
console.log('[TOOL-SELECTOR] Performing AI selection'); console.log('[TOOL-SELECTOR] Performing AI selection');
const candidateMethods = candidateTools.filter((tool: any) => tool && tool.type === 'method'); const candidateMethods = candidateTools.filter((tool: any) => tool && tool.type === 'method');
const candidateSoftware = candidateTools.filter((tool: any) => tool && tool.type === 'software'); const candidateSoftware = candidateTools.filter((tool: any) => tool && tool.type === 'software');
console.log('[TOOL-SELECTOR] Candidates:', candidateMethods.length, 'methods,', candidateSoftware.length, 'software,', candidateConcepts.length, 'concepts'); console.log('[TOOL-SELECTOR] Candidates:', candidateMethods.length, 'methods,', candidateSoftware.length, 'software,', candidateConcepts.length, 'concepts');
const methodsWithFullData = candidateMethods.map(this.createToolData); const methodsWithFullData = candidateMethods.map(this.createToolData);
const softwareWithFullData = candidateSoftware.map(this.createToolData); const softwareWithFullData = candidateSoftware.map(this.createToolData);
const conceptsWithFullData = candidateConcepts.map(this.createConceptData); const conceptsWithFullData = candidateConcepts.map(this.createConceptData);
let toolsToSend: any[]; // Unified selection limits (method-agnostic)
let conceptsToSend: any[]; const maxTools = Math.min(this.config.embeddingSelectionLimit, this.config.noEmbeddingsToolLimit);
const maxConcepts = Math.min(this.config.embeddingConceptsLimit, this.config.noEmbeddingsConceptLimit);
if (selectionMethod === 'embeddings_candidates') { const methodLimit = Math.ceil(maxTools * this.config.methodSelectionRatio);
const totalLimit = this.config.embeddingSelectionLimit; const softwareLimit = Math.floor(maxTools * this.config.softwareSelectionRatio);
const methodLimit = Math.ceil(totalLimit * this.config.methodSelectionRatio);
const softwareLimit = Math.floor(totalLimit * this.config.softwareSelectionRatio); // Build tool list to send
const toolsToSend: any[] = [
toolsToSend = [ ...methodsWithFullData.slice(0, methodLimit),
...methodsWithFullData.slice(0, methodLimit), ...softwareWithFullData.slice(0, softwareLimit),
...softwareWithFullData.slice(0, softwareLimit) ];
];
const remainingCapacity = maxTools - toolsToSend.length;
const remainingCapacity = totalLimit - toolsToSend.length; if (remainingCapacity > 0) {
if (remainingCapacity > 0) { // Fill remainder from whichever bucket still has items
if (methodsWithFullData.length > methodLimit) { const extraMethods = methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity);
toolsToSend.push(...methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity)); const extraSoftware = softwareWithFullData.slice(softwareLimit, softwareLimit + (remainingCapacity - extraMethods.length));
} else if (softwareWithFullData.length > softwareLimit) { toolsToSend.push(...extraMethods, ...extraSoftware);
toolsToSend.push(...softwareWithFullData.slice(softwareLimit, softwareLimit + remainingCapacity));
}
}
conceptsToSend = conceptsWithFullData.slice(0, this.config.embeddingConceptsLimit);
} else {
const maxTools = this.config.noEmbeddingsToolLimit;
const maxConcepts = this.config.noEmbeddingsConceptLimit;
const methodLimit = Math.ceil(maxTools * 0.4);
const softwareLimit = Math.floor(maxTools * 0.5);
toolsToSend = [
...methodsWithFullData.slice(0, methodLimit),
...softwareWithFullData.slice(0, softwareLimit)
];
const remainingCapacity = maxTools - toolsToSend.length;
if (remainingCapacity > 0) {
if (methodsWithFullData.length > methodLimit) {
toolsToSend.push(...methodsWithFullData.slice(methodLimit, methodLimit + remainingCapacity));
} else if (softwareWithFullData.length > softwareLimit) {
toolsToSend.push(...softwareWithFullData.slice(softwareLimit, softwareLimit + remainingCapacity));
}
}
conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
} }
const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.config.maxSelectedItems); const conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend); const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
aiService.validatePromptLength(prompt); aiService.validatePromptLength(prompt);
console.log('[TOOL-SELECTOR] Sending to AI:', console.log('[TOOL-SELECTOR] Sending to AI:',
toolsToSend.filter((t: any) => t.type === 'method').length, 'methods,', toolsToSend.filter((t: any) => t.type === 'method').length, 'methods,',
toolsToSend.filter((t: any) => t.type === 'software').length, 'software,', toolsToSend.filter((t: any) => t.type === 'software').length, 'software,',
conceptsToSend.length, 'concepts' conceptsToSend.length, 'concepts'
); );
try { try {
const response = await aiService.callAI(prompt, { maxTokens: 2500 }); const response = await aiService.callAI(prompt, { maxTokens: 32768 });
const result = JSONParser.safeParseJSON(response.content, null); const result = JSONParser.safeParseJSON(response.content, null);
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) { if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
console.error('[TOOL-SELECTOR] AI selection returned invalid structure'); console.error('[TOOL-SELECTOR] AI selection returned invalid structure');
throw new Error('AI selection failed to return valid tool and concept selection'); throw new Error('AI selection failed to return valid tool and concept selection');
@ -258,38 +227,36 @@ class ToolSelector {
if (totalSelected === 0) { if (totalSelected === 0) {
throw new Error('AI selection returned empty selection'); throw new Error('AI selection returned empty selection');
} }
const toolsMap = new Map(candidateTools.map((tool: any) => [tool.name, tool])); const toolsMap = new Map(candidateTools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map(candidateConcepts.map((concept: any) => [concept.name, concept])); const conceptsMap = new Map(candidateConcepts.map((concept: any) => [concept.name, concept]));
const selectedTools = result.selectedTools const selectedTools = result.selectedTools
.map((name: string) => toolsMap.get(name)) .map((name: string) => toolsMap.get(name))
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null); .filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
const selectedConcepts = result.selectedConcepts const selectedConcepts = result.selectedConcepts
.map((name: string) => conceptsMap.get(name)) .map((name: string) => conceptsMap.get(name))
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null); .filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
const selectedMethods = selectedTools.filter((t: any) => t && t.type === 'method'); const selectedMethods = selectedTools.filter((t: any) => t && t.type === 'method');
const selectedSoftware = selectedTools.filter((t: any) => t && t.type === 'software'); const selectedSoftware = selectedTools.filter((t: any) => t && t.type === 'software');
console.log('[TOOL-SELECTOR] AI selected:', selectedMethods.length, 'methods,', selectedSoftware.length, 'software,', selectedConcepts.length, 'concepts'); console.log('[TOOL-SELECTOR] AI selected:', selectedMethods.length, 'methods,', selectedSoftware.length, 'software,', selectedConcepts.length, 'concepts');
const confidence = confidenceScoring.calculateSelectionConfidence(result, candidateTools.length + candidateConcepts.length); const confidence = confidenceScoring.calculateSelectionConfidence(
result,
return { candidateTools.length + candidateConcepts.length
selectedTools, );
selectedConcepts,
selectionMethod,
confidence
};
return { selectedTools, selectedConcepts, confidence };
} catch (error) { } catch (error) {
console.error('[TOOL-SELECTOR] AI selection failed:', error); console.error('[TOOL-SELECTOR] AI selection failed:', error);
throw error; throw error;
} }
} }
async selectToolsForPhase( async selectToolsForPhase(
userQuery: string, userQuery: string,
phase: any, phase: any,