forensic-pathways/src/utils/aiPipeline.ts

// src/utils/aiPipeline.ts - FIXED: Critical error corrections

import { getCompressedToolsDataForAI } from './dataService.js';
import { embeddingsService, type EmbeddingData } from './embeddings.js';

interface AIConfig {
  endpoint: string;
  apiKey: string;
  model: string;
}

interface MicroTaskResult {
  taskType: string;
  content: string;
  processingTimeMs: number;
  success: boolean;
  error?: string;
}

interface AnalysisResult {
  recommendation: any;
  processingStats: {
    embeddingsUsed: boolean;
    candidatesFromEmbeddings: number;
    finalSelectedItems: number;
    processingTimeMs: number;
    microTasksCompleted: number;
    microTasksFailed: number;
    contextContinuityUsed: boolean;
  };
}

interface AnalysisContext {
  userQuery: string;
  mode: string;
  filteredData: any;
  contextHistory: string[];

  // FIXED: Add max context length tracking
  maxContextLength: number;
  currentContextLength: number;

  scenarioAnalysis?: string;
  problemAnalysis?: string;
  investigationApproach?: string;
  criticalConsiderations?: string;
  selectedTools?: Array<{tool: any, phase: string, priority: string, justification?: string}>;
  backgroundKnowledge?: Array<{concept: any, relevance: string}>;

  // FIXED: Add seen tools tracking to prevent duplicates
  seenToolNames: Set<string>;
}

class ImprovedMicroTaskAIPipeline {
  private config: AIConfig;
  private maxSelectedItems: number;
  private embeddingCandidates: number;
  private similarityThreshold: number;
  private microTaskDelay: number;

  // FIXED: Add proper token management
  private maxContextTokens: number;
  private maxPromptTokens: number;

  constructor() {
    this.config = {
      endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
      apiKey: this.getEnv('AI_ANALYZER_API_KEY'),
      model: this.getEnv('AI_ANALYZER_MODEL')
    };

    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '60', 10);
    this.similarityThreshold = 0.3;
    this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);

    // FIXED: Token management
    this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
    this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
  }

  private getEnv(key: string): string {
    const value = process.env[key];
    if (!value) {
      throw new Error(`Missing environment variable: ${key}`);
    }
    return value;
  }

  // FIXED: Estimate token count (rough approximation)
  private estimateTokens(text: string): number {
    return Math.ceil(text.length / 4); // Rough estimate: 4 chars per token
  }

  // FIXED: Manage context history with token limits
  private addToContextHistory(context: AnalysisContext, newEntry: string): void {
    const entryTokens = this.estimateTokens(newEntry);

    // Add new entry
    context.contextHistory.push(newEntry);
    context.currentContextLength += entryTokens;

    // Prune old entries if exceeding limits
    while (context.currentContextLength > this.maxContextTokens && context.contextHistory.length > 1) {
      const removed = context.contextHistory.shift()!;
      context.currentContextLength -= this.estimateTokens(removed);
    }
  }

  // FIXED: Safe JSON parsing with validation
  private safeParseJSON(jsonString: string, fallback: any = null): any {
    try {
      const cleaned = jsonString
        .replace(/^```json\s*/i, '')
        .replace(/\s*```\s*$/g, '')
        .trim();

      const parsed = JSON.parse(cleaned);
      return parsed;
    } catch (error) {
      console.warn('[AI PIPELINE] JSON parsing failed:', error.message);
      console.warn('[AI PIPELINE] Raw content:', jsonString.slice(0, 200));
      return fallback;
    }
  }

  // FIXED: Add tool deduplication
  private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
    if (context.seenToolNames.has(tool.name)) {
      console.log(`[AI PIPELINE] Skipping duplicate tool: ${tool.name}`);
      return false;
    }

    context.seenToolNames.add(tool.name);
    if (!context.selectedTools) context.selectedTools = [];

    context.selectedTools.push({
      tool,
      phase,
      priority,
      justification
    });

    return true;
  }

  private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
    let candidateTools: any[] = [];
    let candidateConcepts: any[] = [];
    let selectionMethod = 'unknown';

    if (embeddingsService.isEnabled()) {
      const similarItems = await embeddingsService.findSimilar(
        userQuery,
        this.embeddingCandidates,
        this.similarityThreshold
      );

      const toolNames = new Set<string>();
      const conceptNames = new Set<string>();

      similarItems.forEach(item => {
        if (item.type === 'tool') toolNames.add(item.name);
        if (item.type === 'concept') conceptNames.add(item.name);
      });

      console.log(`[IMPROVED PIPELINE] Embeddings found: ${toolNames.size} tools, ${conceptNames.size} concepts`);

      // FIXED: Use your expected flow - get full data of embeddings results
      if (toolNames.size >= 15) { // Reasonable threshold for quality
        candidateTools = toolsData.tools.filter((tool: any) => toolNames.has(tool.name));
        candidateConcepts = toolsData.concepts.filter((concept: any) => conceptNames.has(concept.name));
        selectionMethod = 'embeddings_candidates';

        console.log(`[IMPROVED PIPELINE] Using embeddings candidates: ${candidateTools.length} tools`);
      } else {
        console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${toolNames.size} < 15), using full dataset`);
        candidateTools = toolsData.tools;
        candidateConcepts = toolsData.concepts;
        selectionMethod = 'full_dataset';
      }
    } else {
      console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
      candidateTools = toolsData.tools;
      candidateConcepts = toolsData.concepts;
      selectionMethod = 'full_dataset';
    }

    // FIXED: NOW AI ANALYZES FULL DATA of the candidates
    console.log(`[IMPROVED PIPELINE] AI will analyze FULL DATA of ${candidateTools.length} candidate tools`);
    const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);

    return {
      tools: finalSelection.selectedTools,
      concepts: finalSelection.selectedConcepts,
      domains: toolsData.domains,
      phases: toolsData.phases,
      'domain-agnostic-software': toolsData['domain-agnostic-software']
    };
  }

// src/utils/aiPipeline.ts - FIXED: De-biased AI selection prompt

  private async aiSelectionWithFullData(
    userQuery: string,
    candidateTools: any[],
    candidateConcepts: any[],
    mode: string,
    selectionMethod: string
  ) {
    const modeInstruction = mode === 'workflow'
      ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases. Select 15-25 tools that cover the full investigation lifecycle.'
      : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem. Select 3-8 tools that are most relevant and effective.';

    // FIXED: Give AI the COMPLETE tool data, not truncated
    const toolsWithFullData = candidateTools.map((tool: any) => ({
      name: tool.name,
      type: tool.type,
      description: tool.description,
      domains: tool.domains,
      phases: tool.phases,
      platforms: tool.platforms || [],
      tags: tool.tags || [],
      skillLevel: tool.skillLevel,
      license: tool.license,
      accessType: tool.accessType,
      projectUrl: tool.projectUrl,
      knowledgebase: tool.knowledgebase,
      related_concepts: tool.related_concepts || [],
      related_software: tool.related_software || []
    }));

    const conceptsWithFullData = candidateConcepts.map((concept: any) => ({
      name: concept.name,
      type: 'concept',
      description: concept.description,
      domains: concept.domains,
      phases: concept.phases,
      tags: concept.tags || [],
      skillLevel: concept.skillLevel,
      related_concepts: concept.related_concepts || [],
      related_software: concept.related_software || []
    }));

    const prompt = `You are a DFIR expert with access to the complete forensics tool database. You need to select the most relevant tools and concepts for this specific query.

SELECTION METHOD: ${selectionMethod}
${selectionMethod === 'embeddings_candidates' ?
  'These tools were pre-filtered by vector similarity, so they are already relevant. Your job is to select the BEST ones from this relevant set.' :
  'You have access to the full tool database. Select the most relevant tools for the query.'}

${modeInstruction}

USER QUERY: "${userQuery}"

CRITICAL SELECTION PRINCIPLES:
1. **CONTEXT OVER POPULARITY**: Don't default to "famous" tools like Volatility, Wireshark, or Autopsy just because they're well-known. Choose based on SPECIFIC scenario needs.

2. **METHODOLOGY vs SOFTWARE**:
   - For RAPID/URGENT scenarios → Prioritize METHODS and rapid response approaches
   - For TIME-CRITICAL incidents → Choose triage methods over deep analysis tools
   - For COMPREHENSIVE analysis → Then consider detailed software tools
   - METHODS (type: "method") are often better than SOFTWARE for procedural guidance

3. **SCENARIO-SPECIFIC LOGIC**:
   - "Rapid/Quick/Urgent/Triage" scenarios → Rapid Incident Response and Triage METHOD > Volatility
   - "Industrial/SCADA/ICS" scenarios → Specialized ICS tools > generic network tools
   - "Mobile/Android/iOS" scenarios → Mobile-specific tools > desktop forensics tools
   - "Memory analysis needed urgently" → Quick memory tools/methods > comprehensive Volatility analysis

4. **AVOID TOOL BIAS**:
   - Volatility is NOT always the answer for memory analysis
   - Wireshark is NOT always the answer for network analysis
   - Autopsy is NOT always the answer for disk analysis
   - Consider lighter, faster, more appropriate alternatives

AVAILABLE TOOLS (with complete data):
${JSON.stringify(toolsWithFullData.slice(0, 30), null, 2)}

AVAILABLE CONCEPTS (with complete data):
${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}

ANALYSIS INSTRUCTIONS:
1. Read the FULL description of each tool/concept
2. Consider ALL tags, platforms, related tools, and metadata
3. **MATCH URGENCY LEVEL**: Rapid scenarios need rapid methods, not deep analysis tools
4. **MATCH SPECIFICITY**: Specialized scenarios need specialized tools, not generic ones
5. **CONSIDER TYPE**: Methods provide procedural guidance, software provides technical capability
6. For SCADA/ICS queries: prioritize specialized ICS tools over generic network tools
7. For mobile queries: prioritize mobile-specific tools over desktop tools
8. For rapid/urgent queries: prioritize methodology and triage approaches

BIAS PREVENTION:
- If query mentions "rapid", "quick", "urgent", "triage" → Strongly favor METHODS over deep analysis SOFTWARE
- If query mentions specific technologies (SCADA, Android, etc.) → Strongly favor specialized tools
- Don't recommend Volatility unless deep memory analysis is specifically needed AND time allows
- Don't recommend generic tools when specialized ones are available
- Consider the SKILL LEVEL and TIME CONSTRAINTS implied by the query

Select the most relevant items (max ${this.maxSelectedItems} total).

Respond with ONLY this JSON format:
{
  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
  "reasoning": "Detailed explanation of why these specific tools were selected for this query, addressing why certain popular tools were NOT selected if they were inappropriate for the scenario context"
}`;

    try {
      const response = await this.callAI(prompt, 2500); // More tokens for bias prevention logic

      const result = this.safeParseJSON(response, null);

      if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
        console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
        throw new Error('AI selection failed to return valid tool selection');
      }

      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
      if (totalSelected === 0) {
        console.error('[IMPROVED PIPELINE] AI selection returned no tools');
        throw new Error('AI selection returned empty selection');
      }

      console.log(`[IMPROVED PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts`);
      console.log(`[IMPROVED PIPELINE] AI reasoning: ${result.reasoning}`);

      // Return the actual tool/concept objects
      const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
      const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));

      console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);

      return {
        selectedTools,
        selectedConcepts
      };

    } catch (error) {
      console.error('[IMPROVED PIPELINE] AI selection failed:', error);

      // Emergency fallback with bias awareness
      console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
      return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
    }
  }

  private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
    const queryLower = userQuery.toLowerCase();
    const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);

    // Score tools based on keyword matches in full data
    const scoredTools = candidateTools.map(tool => {
      const toolText = (
        tool.name + ' ' +
        tool.description + ' ' +
        (tool.tags || []).join(' ') + ' ' +
        (tool.platforms || []).join(' ') + ' ' +
        (tool.domains || []).join(' ')
      ).toLowerCase();

      const score = keywords.reduce((acc, keyword) => {
        return acc + (toolText.includes(keyword) ? 1 : 0);
      }, 0);

      return { tool, score };
    }).filter(item => item.score > 0)
      .sort((a, b) => b.score - a.score);

    const maxTools = mode === 'workflow' ? 20 : 8;
    const selectedTools = scoredTools.slice(0, maxTools).map(item => item.tool);

    console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);

    return {
      selectedTools,
      selectedConcepts: candidateConcepts.slice(0, 3)
    };
  }

  private async delay(ms: number): Promise<void> {
    return new Promise(resolve => setTimeout(resolve, ms));
  }

  private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 300): Promise<MicroTaskResult> {
    const startTime = Date.now();

    // FIXED: Build context prompt with token management
    let contextPrompt = prompt;
    if (context.contextHistory.length > 0) {
      const contextSection = `BISHERIGE ANALYSE:\n${context.contextHistory.join('\n\n')}\n\nAKTUELLE AUFGABE:\n`;
      const combinedPrompt = contextSection + prompt;

      // Check if combined prompt exceeds limits
      if (this.estimateTokens(combinedPrompt) <= this.maxPromptTokens) {
        contextPrompt = combinedPrompt;
      } else {
        console.warn('[AI PIPELINE] Context too long, using prompt only');
        // Could implement smarter context truncation here
      }
    }

    try {
      const response = await this.callAI(contextPrompt, maxTokens);

      return {
        taskType: 'micro-task',
        content: response.trim(),
        processingTimeMs: Date.now() - startTime,
        success: true
      };

    } catch (error) {
      return {
        taskType: 'micro-task',
        content: '',
        processingTimeMs: Date.now() - startTime,
        success: false,
        error: error.message
      };
    }
  }

  private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';

    const prompt = `Sie sind ein erfahrener DFIR-Experte. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.

${isWorkflow ? 'FORENSISCHES SZENARIO' : 'TECHNISCHES PROBLEM'}: "${context.userQuery}"

Führen Sie eine systematische ${isWorkflow ? 'Szenario-Analyse' : 'Problem-Analyse'} durch und berücksichtigen Sie dabei:

${isWorkflow ?
  `- Angriffsvektoren und Bedrohungsmodellierung nach MITRE ATT&CK
- Betroffene Systeme und kritische Infrastrukturen
- Zeitkritische Faktoren und Beweiserhaltung
- Forensische Artefakte und Datenquellen` :
  `- Spezifische forensische Herausforderungen
- Verfügbare Datenquellen und deren Integrität
- Methodische Anforderungen für rechtssichere Analyse`
}

WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Maximum 150 Wörter.`;

    const result = await this.callMicroTaskAI(prompt, context, 220);

    if (result.success) {
      if (isWorkflow) {
        context.scenarioAnalysis = result.content;
      } else {
        context.problemAnalysis = result.content;
      }

      // FIXED: Use new context management
      this.addToContextHistory(context, `${isWorkflow ? 'Szenario' : 'Problem'}-Analyse: ${result.content.slice(0, 200)}...`);
    }

    return result;
  }

  private async generateApproach(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';

    const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} nach NIST SP 800-86 Methodik.

${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"

Entwickeln Sie einen systematischen ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} unter Berücksichtigung von:

${isWorkflow ?
  `- Triage-Prioritäten nach forensischer Dringlichkeit
- Phasenabfolge nach NIST-Methodik
- Kontaminationsvermeidung und forensische Isolierung` :
  `- Methodik-Auswahl nach wissenschaftlichen Kriterien
- Validierung und Verifizierung der gewählten Ansätze
- Integration in bestehende forensische Workflows`
}

WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 150 Wörter.`;

    const result = await this.callMicroTaskAI(prompt, context, 220);

    if (result.success) {
      context.investigationApproach = result.content;
      this.addToContextHistory(context, `${isWorkflow ? 'Untersuchungs' : 'Lösungs'}ansatz: ${result.content.slice(0, 200)}...`);
    }

    return result;
  }

  private async generateCriticalConsiderations(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';

    const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall.

${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"

Berücksichtigen Sie folgende forensische Aspekte:

${isWorkflow ?
  `- Time-sensitive evidence preservation
- Chain of custody requirements und rechtliche Verwertbarkeit
- Incident containment vs. evidence preservation Dilemma
- Privacy- und Compliance-Anforderungen` :
  `- Tool-Validierung und Nachvollziehbarkeit
- False positive/negative Risiken bei der gewählten Methodik
- Qualifikationsanforderungen für die Durchführung
- Dokumentations- und Reporting-Standards`
}

WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.`;

    const result = await this.callMicroTaskAI(prompt, context, 180);

    if (result.success) {
      context.criticalConsiderations = result.content;
      this.addToContextHistory(context, `Kritische Überlegungen: ${result.content.slice(0, 200)}...`);
    }

    return result;
  }

  private async selectToolsForPhase(context: AnalysisContext, phase: any): Promise<MicroTaskResult> {
    const phaseTools = context.filteredData.tools.filter((tool: any) =>
      tool.phases && tool.phases.includes(phase.id)
    );

    if (phaseTools.length === 0) {
      return {
        taskType: 'tool-selection',
        content: JSON.stringify([]),
        processingTimeMs: 0,
        success: true
      };
    }

    const prompt = `Wählen Sie 2-3 Methoden/Tools für die Phase "${phase.name}" basierend auf objektiven, fallbezogenen Kriterien.

SZENARIO: "${context.userQuery}"

VERFÜGBARE TOOLS FÜR ${phase.name.toUpperCase()}:
${phaseTools.map((tool: any) => `- ${tool.name}: ${tool.description.slice(0, 100)}...`).join('\n')}

Wählen Sie Methoden/Tools nach forensischen Kriterien aus:
- Court admissibility und Chain of Custody Kompatibilität
- Integration in forensische Standard-Workflows
- Reproduzierbarkeit und Dokumentationsqualität
- Objektivität

Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
[
  {
    "toolName": "Exakter Methoden/Tool-Name",
    "priority": "high|medium|low",
    "justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist"
  }
]`;

    const result = await this.callMicroTaskAI(prompt, context, 450);

    if (result.success) {
      // FIXED: Safe JSON parsing with validation
      const selections = this.safeParseJSON(result.content, []);

      if (Array.isArray(selections)) {
        const validSelections = selections.filter((sel: any) =>
          sel.toolName && phaseTools.some((tool: any) => tool.name === sel.toolName)
        );

        validSelections.forEach((sel: any) => {
          const tool = phaseTools.find((t: any) => t.name === sel.toolName);
          if (tool) {
            // FIXED: Use deduplication helper
            this.addToolToSelection(context, tool, phase.id, sel.priority, sel.justification);
          }
        });
      }
    }

    return result;
  }

  private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
    const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem nach forensischen Qualitätskriterien.

PROBLEM: "${context.userQuery}"

TOOL: ${tool.name}
BESCHREIBUNG: ${tool.description}
PLATTFORMEN: ${tool.platforms?.join(', ') || 'N/A'}
SKILL LEVEL: ${tool.skillLevel}

Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
{
  "suitability_score": "high|medium|low",
  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
  "implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
  "pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
  "cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
  "alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
}`;

    const result = await this.callMicroTaskAI(prompt, context, 650);

    if (result.success) {
      // FIXED: Safe JSON parsing
      const evaluation = this.safeParseJSON(result.content, {
        suitability_score: 'medium',
        detailed_explanation: 'Evaluation failed',
        implementation_approach: '',
        pros: [],
        cons: [],
        alternatives: ''
      });

      // FIXED: Use deduplication helper
      this.addToolToSelection(context, {
        ...tool,
        evaluation: {
          ...evaluation,
          rank
        }
      }, 'evaluation', evaluation.suitability_score);
    }

    return result;
  }

  private async selectBackgroundKnowledge(context: AnalysisContext): Promise<MicroTaskResult> {
    const availableConcepts = context.filteredData.concepts;

    if (availableConcepts.length === 0) {
      return {
        taskType: 'background-knowledge',
        content: JSON.stringify([]),
        processingTimeMs: 0,
        success: true
      };
    }

    const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];

    const prompt = `Wählen Sie relevante forensische Konzepte für das Verständnis der empfohlenen Methodik.

${context.mode === 'workflow' ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
EMPFOHLENE TOOLS: ${selectedToolNames.join(', ')}

VERFÜGBARE KONZEPTE:
${availableConcepts.slice(0, 15).map((concept: any) => `- ${concept.name}: ${concept.description.slice(0, 80)}...`).join('\n')}

Wählen Sie 2-4 Konzepte aus, die für das Verständnis der forensischen Methodik essentiell sind.

Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
[
  {
    "conceptName": "Exakter Konzept-Name",
    "relevance": "Forensische Relevanz: Warum dieses Konzept für das Verständnis der Methodik kritisch ist"
  }
]`;

    const result = await this.callMicroTaskAI(prompt, context, 400);

    if (result.success) {
      // FIXED: Safe JSON parsing
      const selections = this.safeParseJSON(result.content, []);

      if (Array.isArray(selections)) {
        context.backgroundKnowledge = selections.filter((sel: any) =>
          sel.conceptName && availableConcepts.some((concept: any) => concept.name === sel.conceptName)
        ).map((sel: any) => ({
          concept: availableConcepts.find((c: any) => c.name === sel.conceptName),
          relevance: sel.relevance
        }));
      }
    }

    return result;
  }

  private async generateFinalRecommendations(context: AnalysisContext): Promise<MicroTaskResult> {
    const isWorkflow = context.mode === 'workflow';

    const prompt = isWorkflow ?
      `Erstellen Sie eine forensisch fundierte Workflow-Empfehlung basierend auf DFIR-Prinzipien.

SZENARIO: "${context.userQuery}"
AUSGEWÄHLTE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Tools ausgewählt'}

Erstellen Sie konkrete methodische Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung forensischer Best Practices, Objektivität und rechtlicher Verwertbarkeit.

WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.` :

      `Erstellen Sie wichtige methodische Überlegungen für die korrekte Methoden-/Tool-Anwendung.

PROBLEM: "${context.userQuery}"
EMPFOHLENE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Methoden/Tools ausgewählt'}

Geben Sie kritische methodische Überlegungen, Validierungsanforderungen und Qualitätssicherungsmaßnahmen für die korrekte Anwendung der empfohlenen Methoden/Tools.

WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 100 Wörter.`;

    const result = await this.callMicroTaskAI(prompt, context, 180);
    return result;
  }

  private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
    const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Authorization': `Bearer ${this.config.apiKey}`
      },
      body: JSON.stringify({
        model: this.config.model,
        messages: [{ role: 'user', content: prompt }],
        max_tokens: maxTokens,
        temperature: 0.3
      })
    });

    if (!response.ok) {
      const errorText = await response.text();
      throw new Error(`AI API error: ${response.status} - ${errorText}`);
    }

    const data = await response.json();
    const content = data.choices?.[0]?.message?.content;

    if (!content) {
      throw new Error('No response from AI model');
    }

    return content;
  }

  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
    const startTime = Date.now();
    let completedTasks = 0;
    let failedTasks = 0;

    console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity`);

    try {
      // Stage 1: Get intelligent candidates (embeddings + AI selection)
      const toolsData = await getCompressedToolsDataForAI();
      const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);

      // FIXED: Initialize context with proper state management
      const context: AnalysisContext = {
        userQuery,
        mode,
        filteredData,
        contextHistory: [],
        maxContextLength: this.maxContextTokens,
        currentContextLength: 0,
        seenToolNames: new Set<string>() // FIXED: Add deduplication tracking
      };

      console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);

      // MICRO-TASK SEQUENCE

      // Task 1: Scenario/Problem Analysis
      const analysisResult = await this.analyzeScenario(context);
      if (analysisResult.success) completedTasks++; else failedTasks++;
      await this.delay(this.microTaskDelay);

      // Task 2: Investigation/Solution Approach
      const approachResult = await this.generateApproach(context);
      if (approachResult.success) completedTasks++; else failedTasks++;
      await this.delay(this.microTaskDelay);

      // Task 3: Critical Considerations
      const considerationsResult = await this.generateCriticalConsiderations(context);
      if (considerationsResult.success) completedTasks++; else failedTasks++;
      await this.delay(this.microTaskDelay);

      // Task 4: Tool Selection/Evaluation (mode-dependent)
      if (mode === 'workflow') {
        // Select tools for each phase
        const phases = toolsData.phases || [];
        for (const phase of phases) {
          const toolSelectionResult = await this.selectToolsForPhase(context, phase);
          if (toolSelectionResult.success) completedTasks++; else failedTasks++;
          await this.delay(this.microTaskDelay);
        }
      } else {
        // Evaluate top 3 tools for specific problem
        const topTools = filteredData.tools.slice(0, 3);
        for (let i = 0; i < topTools.length; i++) {
          const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
          if (evaluationResult.success) completedTasks++; else failedTasks++;
          await this.delay(this.microTaskDelay);
        }
      }

      // Task 5: Background Knowledge Selection
      const knowledgeResult = await this.selectBackgroundKnowledge(context);
      if (knowledgeResult.success) completedTasks++; else failedTasks++;
      await this.delay(this.microTaskDelay);

      // Task 6: Final Recommendations
      const finalResult = await this.generateFinalRecommendations(context);
      if (finalResult.success) completedTasks++; else failedTasks++;

      // Build final recommendation
      const recommendation = this.buildRecommendation(context, mode, finalResult.content);

      const processingStats = {
        embeddingsUsed: embeddingsService.isEnabled(),
        candidatesFromEmbeddings: filteredData.tools.length,
        finalSelectedItems: (context.selectedTools?.length || 0) +
                           (context.backgroundKnowledge?.length || 0),
        processingTimeMs: Date.now() - startTime,
        microTasksCompleted: completedTasks,
        microTasksFailed: failedTasks,
        contextContinuityUsed: true
      };

      console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
      console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);

      return {
        recommendation,
        processingStats
      };

    } catch (error) {
      console.error('[IMPROVED PIPELINE] Processing failed:', error);
      throw error;
    }
  }

  // Build recommendation (same structure but using fixed context)
  private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
    const isWorkflow = mode === 'workflow';

    const base = {
      [isWorkflow ? 'scenario_analysis' : 'problem_analysis']:
        isWorkflow ? context.scenarioAnalysis : context.problemAnalysis,
      investigation_approach: context.investigationApproach,
      critical_considerations: context.criticalConsiderations,
      background_knowledge: context.backgroundKnowledge?.map(bk => ({
        concept_name: bk.concept.name,
        relevance: bk.relevance
      })) || []
    };

    if (isWorkflow) {
      return {
        ...base,
        recommended_tools: context.selectedTools?.map(st => ({
          name: st.tool.name,
          phase: st.phase,
          priority: st.priority,
          justification: st.justification || `Empfohlen für ${st.phase}`
        })) || [],
        workflow_suggestion: finalContent
      };
    } else {
      return {
        ...base,
        recommended_tools: context.selectedTools?.map(st => ({
          name: st.tool.name,
          rank: st.tool.evaluation?.rank || 1,
          suitability_score: st.priority,
          detailed_explanation: st.tool.evaluation?.detailed_explanation || '',
          implementation_approach: st.tool.evaluation?.implementation_approach || '',
          pros: st.tool.evaluation?.pros || [],
          cons: st.tool.evaluation?.cons || [],
          alternatives: st.tool.evaluation?.alternatives || ''
        })) || [],
        additional_considerations: finalContent
      };
    }
  }
}

// Global instance
const aiPipeline = new ImprovedMicroTaskAIPipeline();

export { aiPipeline, type AnalysisResult };