Merge pull request 'embeddings-1' (#2) from embeddings-1 into main
Reviewed-on: #2
This commit is contained in:
		
						commit
						f329955c62
					
				
							
								
								
									
										84
									
								
								.env.example
									
									
									
									
									
								
							
							
						
						
									
										84
									
								
								.env.example
									
									
									
									
									
								
							@ -2,34 +2,74 @@
 | 
				
			|||||||
# ForensicPathways Environment Configuration
 | 
					# ForensicPathways Environment Configuration
 | 
				
			||||||
# ===========================================
 | 
					# ===========================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Authentication & OIDC (Required)
 | 
					# === Authentication Configuration ===
 | 
				
			||||||
AUTH_SECRET=change-this-to-a-strong-secret-key-in-production
 | 
					AUTHENTICATION_NECESSARY=false
 | 
				
			||||||
 | 
					AUTHENTICATION_NECESSARY_CONTRIBUTIONS=false
 | 
				
			||||||
 | 
					AUTHENTICATION_NECESSARY_AI=false
 | 
				
			||||||
 | 
					AUTH_SECRET=your-secret-key-change-in-production
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# OIDC Configuration (if authentication enabled)
 | 
				
			||||||
OIDC_ENDPOINT=https://your-oidc-provider.com
 | 
					OIDC_ENDPOINT=https://your-oidc-provider.com
 | 
				
			||||||
OIDC_CLIENT_ID=your-oidc-client-id
 | 
					OIDC_CLIENT_ID=your-client-id
 | 
				
			||||||
OIDC_CLIENT_SECRET=your-oidc-client-secret
 | 
					OIDC_CLIENT_SECRET=your-client-secret
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Auth Scopes - set to true in prod
 | 
					# ===================================================================
 | 
				
			||||||
AUTHENTICATION_NECESSARY_CONTRIBUTIONS=true
 | 
					# AI CONFIGURATION - Complete Reference for Improved Pipeline
 | 
				
			||||||
AUTHENTICATION_NECESSARY_AI=true
 | 
					# ===================================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Application Configuration (Required)
 | 
					# === CORE AI ENDPOINTS & MODELS ===
 | 
				
			||||||
PUBLIC_BASE_URL=https://your-domain.com
 | 
					AI_API_ENDPOINT=https://llm.mikoshi.de
 | 
				
			||||||
NODE_ENV=production
 | 
					AI_API_KEY=sREDACTED3w
 | 
				
			||||||
 | 
					AI_MODEL='mistral/mistral-small-latest'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# AI Service Configuration (Required for AI features)
 | 
					# === IMPROVED PIPELINE: Use separate analyzer model (mistral-small is fine) ===
 | 
				
			||||||
AI_MODEL=mistral-large-latest
 | 
					AI_ANALYZER_ENDPOINT=https://llm.mikoshi.de
 | 
				
			||||||
AI_API_ENDPOINT=https://api.mistral.ai
 | 
					AI_ANALYZER_API_KEY=skREDACTEDw3w  
 | 
				
			||||||
AI_API_KEY=your-mistral-api-key
 | 
					AI_ANALYZER_MODEL='mistral/mistral-small-latest'
 | 
				
			||||||
AI_RATE_LIMIT_DELAY_MS=1000
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Git Integration (Required for contributions)
 | 
					# === EMBEDDINGS CONFIGURATION ===
 | 
				
			||||||
GIT_REPO_URL=https://git.cc24.dev/mstoeck3/forensic-pathways
 | 
					AI_EMBEDDINGS_ENABLED=true
 | 
				
			||||||
GIT_PROVIDER=gitea
 | 
					AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
 | 
				
			||||||
GIT_API_ENDPOINT=https://git.cc24.dev/api/v1
 | 
					AI_EMBEDDINGS_API_KEY=ZREDACTED3wL
 | 
				
			||||||
GIT_API_TOKEN=your-git-api-token
 | 
					AI_EMBEDDINGS_MODEL=mistral-embed
 | 
				
			||||||
 | 
					AI_EMBEDDINGS_BATCH_SIZE=20
 | 
				
			||||||
 | 
					AI_EMBEDDINGS_BATCH_DELAY_MS=1000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# File Upload Configuration (Optional)
 | 
					# === PIPELINE: VectorIndex (HNSW) Configuration ===
 | 
				
			||||||
LOCAL_UPLOAD_PATH=./public/uploads
 | 
					AI_MAX_SELECTED_ITEMS=60                    # Tools visible to each micro-task 
 | 
				
			||||||
 | 
					AI_EMBEDDING_CANDIDATES=60                  # VectorIndex candidates (HNSW is more efficient)
 | 
				
			||||||
 | 
					AI_SIMILARITY_THRESHOLD=0.3                # Not used by VectorIndex (uses cosine distance internally)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# === MICRO-TASK CONFIGURATION ===
 | 
				
			||||||
 | 
					AI_MICRO_TASK_DELAY_MS=500                 # Delay between micro-tasks  
 | 
				
			||||||
 | 
					AI_MICRO_TASK_TIMEOUT_MS=25000             # Timeout per micro-task (increased for full context)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# === RATE LIMITING ===
 | 
				
			||||||
 | 
					AI_RATE_LIMIT_DELAY_MS=3000                # Main rate limit delay
 | 
				
			||||||
 | 
					AI_RATE_LIMIT_MAX_REQUESTS=6               # Main requests per minute (reduced - fewer but richer calls)
 | 
				
			||||||
 | 
					AI_MICRO_TASK_RATE_LIMIT=15                # Micro-task requests per minute (was 30)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# === QUEUE MANAGEMENT ===
 | 
				
			||||||
 | 
					AI_QUEUE_MAX_SIZE=50
 | 
				
			||||||
 | 
					AI_QUEUE_CLEANUP_INTERVAL_MS=300000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# === PERFORMANCE & MONITORING ===
 | 
				
			||||||
 | 
					AI_MICRO_TASK_DEBUG=true
 | 
				
			||||||
 | 
					AI_PERFORMANCE_METRICS=true
 | 
				
			||||||
 | 
					AI_RESPONSE_CACHE_TTL_MS=3600000
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# ===================================================================
 | 
				
			||||||
 | 
					# LEGACY VARIABLES (still used but less important)
 | 
				
			||||||
 | 
					# ===================================================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# These are still used by other parts of the system:
 | 
				
			||||||
 | 
					AI_RESPONSE_CACHE_TTL_MS=3600000           # For caching responses
 | 
				
			||||||
 | 
					AI_QUEUE_MAX_SIZE=50                       # Queue management
 | 
				
			||||||
 | 
					AI_QUEUE_CLEANUP_INTERVAL_MS=300000       # Queue cleanup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# === Application Configuration ===
 | 
				
			||||||
 | 
					PUBLIC_BASE_URL=http://localhost:4321
 | 
				
			||||||
 | 
					NODE_ENV=development
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Nextcloud Integration (Optional)
 | 
					# Nextcloud Integration (Optional)
 | 
				
			||||||
NEXTCLOUD_ENDPOINT=https://your-nextcloud.com
 | 
					NEXTCLOUD_ENDPOINT=https://your-nextcloud.com
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -85,3 +85,4 @@ temp/
 | 
				
			|||||||
.astro/data-store.json
 | 
					.astro/data-store.json
 | 
				
			||||||
.astro/content.d.ts
 | 
					.astro/content.d.ts
 | 
				
			||||||
prompt.md
 | 
					prompt.md
 | 
				
			||||||
 | 
					data/embeddings.json
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										358
									
								
								RAG-Roadmap.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										358
									
								
								RAG-Roadmap.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,358 @@
 | 
				
			|||||||
 | 
					# Forensic-Grade RAG Implementation Roadmap
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Context & Current State Analysis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You have access to a forensic tools recommendation system built with:
 | 
				
			||||||
 | 
					- **Embeddings-based retrieval** (src/utils/embeddings.ts)
 | 
				
			||||||
 | 
					- **Multi-stage AI pipeline** (src/utils/aiPipeline.ts) 
 | 
				
			||||||
 | 
					- **Micro-task processing** for detailed analysis
 | 
				
			||||||
 | 
					- **Rate limiting and queue management** (src/utils/rateLimitedQueue.ts)
 | 
				
			||||||
 | 
					- **YAML-based tool database** (src/data/tools.yaml)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Current Architecture**: Basic RAG (Retrieve → AI Selection → Micro-task Generation)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Target Architecture**: Forensic-Grade RAG with transparency, objectivity, and reproducibility
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Implementation Roadmap
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### PHASE 1: Configuration Externalization & AI Architecture Enhancement (Weeks 1-2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 1.1 Complete Configuration Externalization
 | 
				
			||||||
 | 
					**Objective**: Remove all hard-coded values from codebase (except AI prompts)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Create comprehensive configuration schema** in `src/config/`
 | 
				
			||||||
 | 
					   - `forensic-scoring.yaml` - All scoring criteria, weights, thresholds
 | 
				
			||||||
 | 
					   - `ai-models.yaml` - AI model configurations and routing
 | 
				
			||||||
 | 
					   - `system-parameters.yaml` - Rate limits, queue settings, processing parameters
 | 
				
			||||||
 | 
					   - `validation-criteria.yaml` - Expert validation rules, bias detection parameters
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Implement configuration loader** (`src/utils/configLoader.ts`)
 | 
				
			||||||
 | 
					   - Hot-reload capability for configuration changes
 | 
				
			||||||
 | 
					   - Environment-specific overrides (dev/staging/prod)
 | 
				
			||||||
 | 
					   - Configuration validation and schema enforcement
 | 
				
			||||||
 | 
					   - Default fallbacks for missing values
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. **Audit existing codebase** for hard-coded values:
 | 
				
			||||||
 | 
					   - Search for literal numbers, strings, arrays in TypeScript files
 | 
				
			||||||
 | 
					   - Extract to configuration files with meaningful names
 | 
				
			||||||
 | 
					   - Ensure all thresholds (similarity scores, rate limits, token counts) are configurable
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 1.2 Dual AI Model Architecture Implementation
 | 
				
			||||||
 | 
					**Objective**: Implement large + small model strategy for optimal cost/performance
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Extend environment configuration**:
 | 
				
			||||||
 | 
					   ```
 | 
				
			||||||
 | 
					   # Strategic Analysis Model (Large, Few Tokens)
 | 
				
			||||||
 | 
					   AI_STRATEGIC_ENDPOINT=
 | 
				
			||||||
 | 
					   AI_STRATEGIC_API_KEY=
 | 
				
			||||||
 | 
					   AI_STRATEGIC_MODEL=mistral-large-latest
 | 
				
			||||||
 | 
					   AI_STRATEGIC_MAX_TOKENS=500
 | 
				
			||||||
 | 
					   AI_STRATEGIC_CONTEXT_WINDOW=32000
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					   # Content Generation Model (Small, Many Tokens)  
 | 
				
			||||||
 | 
					   AI_CONTENT_ENDPOINT=
 | 
				
			||||||
 | 
					   AI_CONTENT_API_KEY=
 | 
				
			||||||
 | 
					   AI_CONTENT_MODEL=mistral-small-latest
 | 
				
			||||||
 | 
					   AI_CONTENT_MAX_TOKENS=2000
 | 
				
			||||||
 | 
					   AI_CONTENT_CONTEXT_WINDOW=8000
 | 
				
			||||||
 | 
					   ```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Create AI router** (`src/utils/aiRouter.ts`):
 | 
				
			||||||
 | 
					   - Route different task types to appropriate models
 | 
				
			||||||
 | 
					   - **Strategic tasks** → Large model: tool selection, bias analysis, methodology decisions
 | 
				
			||||||
 | 
					   - **Content tasks** → Small model: descriptions, explanations, micro-task outputs
 | 
				
			||||||
 | 
					   - Automatic fallback logic if primary model fails
 | 
				
			||||||
 | 
					   - Usage tracking and cost optimization
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. **Update aiPipeline.ts**:
 | 
				
			||||||
 | 
					   - Replace single `callAI()` method with task-specific methods
 | 
				
			||||||
 | 
					   - Implement intelligent routing based on task complexity
 | 
				
			||||||
 | 
					   - Add token estimation for optimal model selection
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### PHASE 2: Evidence-Based Scoring Framework (Weeks 3-5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 2.1 Forensic Scoring Engine Implementation
 | 
				
			||||||
 | 
					**Objective**: Replace subjective AI selection with objective, measurable criteria
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Create scoring framework** (`src/scoring/ForensicScorer.ts`):
 | 
				
			||||||
 | 
					   ```typescript
 | 
				
			||||||
 | 
					   interface ScoringCriterion {
 | 
				
			||||||
 | 
					     name: string;
 | 
				
			||||||
 | 
					     weight: number;
 | 
				
			||||||
 | 
					     methodology: string;
 | 
				
			||||||
 | 
					     dataSources: string[];
 | 
				
			||||||
 | 
					     calculator: (tool: Tool, scenario: Scenario) => Promise<CriterionScore>;
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					   interface CriterionScore {
 | 
				
			||||||
 | 
					     value: number;           // 0-100
 | 
				
			||||||
 | 
					     confidence: number;      // 0-100  
 | 
				
			||||||
 | 
					     evidence: Evidence[];
 | 
				
			||||||
 | 
					     lastUpdated: Date;
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					   ```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Implement core scoring criteria**:
 | 
				
			||||||
 | 
					   - **Court Admissibility Scorer**: Based on legal precedent database
 | 
				
			||||||
 | 
					   - **Scientific Validity Scorer**: Based on peer-reviewed research citations
 | 
				
			||||||
 | 
					   - **Methodology Alignment Scorer**: NIST SP 800-86 compliance assessment
 | 
				
			||||||
 | 
					   - **Expert Consensus Scorer**: Practitioner survey data integration
 | 
				
			||||||
 | 
					   - **Error Rate Scorer**: Known false positive/negative rates
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. **Build evidence provenance system**:
 | 
				
			||||||
 | 
					   - Track source of every score component
 | 
				
			||||||
 | 
					   - Maintain citation database for all claims
 | 
				
			||||||
 | 
					   - Version control for scoring methodologies
 | 
				
			||||||
 | 
					   - Automatic staleness detection for outdated evidence
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 2.2 Deterministic Core Implementation  
 | 
				
			||||||
 | 
					**Objective**: Ensure reproducible results for identical inputs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Implement deterministic pipeline** (`src/analysis/DeterministicAnalyzer.ts`):
 | 
				
			||||||
 | 
					   - Rule-based scenario classification (SCADA/Mobile/Network/etc.)
 | 
				
			||||||
 | 
					   - Mathematical scoring combination (weighted averages, not AI decisions)
 | 
				
			||||||
 | 
					   - Consistent tool ranking algorithms
 | 
				
			||||||
 | 
					   - Reproducibility validation tests
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Add AI enhancement layer**:
 | 
				
			||||||
 | 
					   - AI provides explanations, NOT decisions
 | 
				
			||||||
 | 
					   - AI generates workflow descriptions based on deterministic selections
 | 
				
			||||||
 | 
					   - AI creates contextual advice around objective tool choices
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### PHASE 3: Transparency & Audit Trail System (Weeks 4-6)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 3.1 Complete Audit Trail Implementation
 | 
				
			||||||
 | 
					**Objective**: Track every decision with forensic-grade documentation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Create audit framework** (`src/audit/AuditTrail.ts`):
 | 
				
			||||||
 | 
					   ```typescript
 | 
				
			||||||
 | 
					   interface ForensicAuditTrail {
 | 
				
			||||||
 | 
					     queryId: string;
 | 
				
			||||||
 | 
					     userQuery: string;
 | 
				
			||||||
 | 
					     processingSteps: AuditStep[];
 | 
				
			||||||
 | 
					     finalRecommendation: RecommendationWithEvidence;
 | 
				
			||||||
 | 
					     reproducibilityHash: string;
 | 
				
			||||||
 | 
					     validationStatus: ValidationStatus;
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					   
 | 
				
			||||||
 | 
					   interface AuditStep {
 | 
				
			||||||
 | 
					     stepName: string;
 | 
				
			||||||
 | 
					     input: any;
 | 
				
			||||||
 | 
					     methodology: string;
 | 
				
			||||||
 | 
					     output: any;
 | 
				
			||||||
 | 
					     evidence: Evidence[];
 | 
				
			||||||
 | 
					     confidence: number;
 | 
				
			||||||
 | 
					     processingTime: number;
 | 
				
			||||||
 | 
					     modelUsed?: string;
 | 
				
			||||||
 | 
					   }
 | 
				
			||||||
 | 
					   ```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Implement evidence citation system**:
 | 
				
			||||||
 | 
					   - Automatic citation generation for all claims
 | 
				
			||||||
 | 
					   - Link to source standards (NIST, ISO, RFC)
 | 
				
			||||||
 | 
					   - Reference scientific papers for methodology choices
 | 
				
			||||||
 | 
					   - Track expert validation contributors
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. **Build explanation generator**:
 | 
				
			||||||
 | 
					   - Human-readable reasoning for every recommendation
 | 
				
			||||||
 | 
					   - "Why this tool" and "Why not alternatives" explanations
 | 
				
			||||||
 | 
					   - Confidence level communication
 | 
				
			||||||
 | 
					   - Uncertainty quantification
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 3.2 Bias Detection & Mitigation System
 | 
				
			||||||
 | 
					**Objective**: Actively detect and correct recommendation biases
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Implement bias detection** (`src/bias/BiasDetector.ts`):
 | 
				
			||||||
 | 
					   - **Popularity bias**: Over-recommendation of well-known tools
 | 
				
			||||||
 | 
					   - **Availability bias**: Preference for easily accessible tools
 | 
				
			||||||
 | 
					   - **Recency bias**: Over-weighting of newest tools
 | 
				
			||||||
 | 
					   - **Cultural bias**: Platform or methodology preferences
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Create mitigation strategies**:
 | 
				
			||||||
 | 
					   - Automatic bias adjustment algorithms
 | 
				
			||||||
 | 
					   - Diversity requirements for recommendations
 | 
				
			||||||
 | 
					   - Fairness metrics across tool categories
 | 
				
			||||||
 | 
					   - Bias reporting in audit trails
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### PHASE 4: Expert Validation & Learning System (Weeks 6-8)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 4.1 Expert Review Integration
 | 
				
			||||||
 | 
					**Objective**: Enable forensic experts to validate and improve recommendations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Build expert validation interface** (`src/validation/ExpertReview.ts`):
 | 
				
			||||||
 | 
					   - Structured feedback collection from forensic practitioners
 | 
				
			||||||
 | 
					   - Agreement/disagreement tracking with detailed reasoning
 | 
				
			||||||
 | 
					   - Expert consensus building over time
 | 
				
			||||||
 | 
					   - Minority opinion preservation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Implement validation loop**:
 | 
				
			||||||
 | 
					   - Flag recommendations requiring expert review
 | 
				
			||||||
 | 
					   - Track expert validation rates and patterns
 | 
				
			||||||
 | 
					   - Update scoring based on real-world feedback
 | 
				
			||||||
 | 
					   - Methodology improvement based on expert input
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 4.2 Real-World Case Learning
 | 
				
			||||||
 | 
					**Objective**: Learn from actual forensic investigations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Create case study integration** (`src/learning/CaseStudyLearner.ts`):
 | 
				
			||||||
 | 
					   - Anonymous case outcome tracking
 | 
				
			||||||
 | 
					   - Tool effectiveness measurement in real scenarios
 | 
				
			||||||
 | 
					   - Methodology success/failure analysis
 | 
				
			||||||
 | 
					   - Continuous improvement based on field results
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Implement feedback loops**:
 | 
				
			||||||
 | 
					   - Post-case recommendation validation
 | 
				
			||||||
 | 
					   - Tool performance tracking in actual investigations
 | 
				
			||||||
 | 
					   - Methodology refinement based on outcomes
 | 
				
			||||||
 | 
					   - Success rate improvement over time
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### PHASE 5: Advanced Features & Scientific Rigor (Weeks 7-10)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 5.1 Confidence & Uncertainty Quantification
 | 
				
			||||||
 | 
					**Objective**: Provide scientific confidence levels for all recommendations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Implement uncertainty quantification** (`src/uncertainty/ConfidenceCalculator.ts`):
 | 
				
			||||||
 | 
					   - Statistical confidence intervals for scores
 | 
				
			||||||
 | 
					   - Uncertainty propagation through scoring pipeline
 | 
				
			||||||
 | 
					   - Risk assessment for recommendation reliability
 | 
				
			||||||
 | 
					   - Alternative recommendation ranking
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Add fallback recommendation system**:
 | 
				
			||||||
 | 
					   - Multiple ranked alternatives for each recommendation
 | 
				
			||||||
 | 
					   - Contingency planning for tool failures
 | 
				
			||||||
 | 
					   - Risk-based recommendation portfolios
 | 
				
			||||||
 | 
					   - Sensitivity analysis for critical decisions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 5.2 Reproducibility Testing Framework
 | 
				
			||||||
 | 
					**Objective**: Ensure consistent results across time and implementations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Build reproducibility testing** (`src/testing/ReproducibilityTester.ts`):
 | 
				
			||||||
 | 
					   - Automated consistency validation
 | 
				
			||||||
 | 
					   - Inter-rater reliability testing
 | 
				
			||||||
 | 
					   - Cross-temporal stability analysis
 | 
				
			||||||
 | 
					   - Version control for methodology changes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Implement quality assurance**:
 | 
				
			||||||
 | 
					   - Continuous integration for reproducibility
 | 
				
			||||||
 | 
					   - Regression testing for methodology changes
 | 
				
			||||||
 | 
					   - Performance monitoring for consistency
 | 
				
			||||||
 | 
					   - Alert system for unexpected variations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### PHASE 6: Integration & Production Readiness (Weeks 9-12)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 6.1 System Integration
 | 
				
			||||||
 | 
					**Objective**: Integrate all forensic-grade components seamlessly
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Update existing components**:
 | 
				
			||||||
 | 
					   - Modify `aiPipeline.ts` to use new scoring framework
 | 
				
			||||||
 | 
					   - Update `embeddings.ts` with evidence tracking
 | 
				
			||||||
 | 
					   - Enhance `rateLimitedQueue.ts` with audit capabilities
 | 
				
			||||||
 | 
					   - Refactor `query.ts` API to return audit trails
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Performance optimization**:
 | 
				
			||||||
 | 
					   - Caching strategies for expensive evidence lookups
 | 
				
			||||||
 | 
					   - Parallel processing for scoring criteria
 | 
				
			||||||
 | 
					   - Efficient storage for audit trails
 | 
				
			||||||
 | 
					   - Load balancing for dual AI models
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#### 6.2 Production Features
 | 
				
			||||||
 | 
					**Objective**: Make system ready for professional forensic use
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					**Tasks**:
 | 
				
			||||||
 | 
					1. **Add professional features**:
 | 
				
			||||||
 | 
					   - Export recommendations to forensic report formats
 | 
				
			||||||
 | 
					   - Integration with existing forensic workflows
 | 
				
			||||||
 | 
					   - Batch processing for multiple scenarios
 | 
				
			||||||
 | 
					   - API endpoints for external tool integration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **Implement monitoring & maintenance**:
 | 
				
			||||||
 | 
					   - Health checks for all system components
 | 
				
			||||||
 | 
					   - Performance monitoring for response times
 | 
				
			||||||
 | 
					   - Error tracking and alerting
 | 
				
			||||||
 | 
					   - Automatic system updates for new evidence
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Technical Implementation Guidelines
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Configuration Management
 | 
				
			||||||
 | 
					- Use YAML files for human-readable configuration
 | 
				
			||||||
 | 
					- Implement JSON Schema validation for all config files
 | 
				
			||||||
 | 
					- Support environment variable overrides
 | 
				
			||||||
 | 
					- Hot-reload for development, restart for production changes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### AI Model Routing Strategy
 | 
				
			||||||
 | 
					```typescript
 | 
				
			||||||
 | 
					// Task Classification for Model Selection
 | 
				
			||||||
 | 
					const AI_TASK_ROUTING = {
 | 
				
			||||||
 | 
					  strategic: ['tool-selection', 'bias-analysis', 'methodology-decisions'],
 | 
				
			||||||
 | 
					  content: ['descriptions', 'explanations', 'micro-tasks', 'workflows']
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Cost Optimization Logic
 | 
				
			||||||
 | 
					if (taskComplexity === 'high' && responseTokens < 500) {
 | 
				
			||||||
 | 
					  useModel = 'large';
 | 
				
			||||||
 | 
					} else if (taskComplexity === 'low' && responseTokens > 1000) {
 | 
				
			||||||
 | 
					  useModel = 'small';
 | 
				
			||||||
 | 
					} else {
 | 
				
			||||||
 | 
					  useModel = config.defaultModel;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Evidence Database Structure
 | 
				
			||||||
 | 
					```typescript
 | 
				
			||||||
 | 
					interface EvidenceSource {
 | 
				
			||||||
 | 
					  type: 'standard' | 'paper' | 'case-law' | 'expert-survey';
 | 
				
			||||||
 | 
					  citation: string;
 | 
				
			||||||
 | 
					  reliability: number;
 | 
				
			||||||
 | 
					  lastValidated: Date;
 | 
				
			||||||
 | 
					  content: string;
 | 
				
			||||||
 | 
					  metadata: Record<string, any>;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					```
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Quality Assurance Requirements
 | 
				
			||||||
 | 
					- All scoring criteria must have documented methodologies
 | 
				
			||||||
 | 
					- Every recommendation must include confidence levels
 | 
				
			||||||
 | 
					- All AI-generated content must be marked as such
 | 
				
			||||||
 | 
					- Reproducibility tests must pass with >95% consistency
 | 
				
			||||||
 | 
					- Expert validation rate must exceed 80% for production use
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Success Metrics
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Forensic Quality Metrics
 | 
				
			||||||
 | 
					- **Transparency**: 100% of decisions traceable to evidence
 | 
				
			||||||
 | 
					- **Objectivity**: <5% variance in scoring between runs
 | 
				
			||||||
 | 
					- **Reproducibility**: >95% identical results for identical inputs
 | 
				
			||||||
 | 
					- **Expert Agreement**: >80% expert validation rate
 | 
				
			||||||
 | 
					- **Bias Reduction**: <10% bias score across all categories
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Performance Metrics  
 | 
				
			||||||
 | 
					- **Response Time**: <30 seconds for workflow recommendations
 | 
				
			||||||
 | 
					- **Accuracy**: >90% real-world case validation success
 | 
				
			||||||
 | 
					- **Coverage**: Support for >95% of common forensic scenarios
 | 
				
			||||||
 | 
					- **Reliability**: <1% system error rate
 | 
				
			||||||
 | 
					- **Cost Efficiency**: <50% cost reduction vs. single large model
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Risk Mitigation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Technical Risks
 | 
				
			||||||
 | 
					- **AI Model Failures**: Implement robust fallback mechanisms
 | 
				
			||||||
 | 
					- **Configuration Errors**: Comprehensive validation and testing
 | 
				
			||||||
 | 
					- **Performance Issues**: Load testing and optimization
 | 
				
			||||||
 | 
					- **Data Corruption**: Backup and recovery procedures
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					### Forensic Risks
 | 
				
			||||||
 | 
					- **Bias Introduction**: Continuous monitoring and expert validation
 | 
				
			||||||
 | 
					- **Methodology Errors**: Peer review and scientific validation
 | 
				
			||||||
 | 
					- **Legal Challenges**: Ensure compliance with admissibility standards
 | 
				
			||||||
 | 
					- **Expert Disagreement**: Transparent uncertainty communication
 | 
				
			||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -91,119 +91,137 @@ const sortedTags = Object.entries(tagFrequency)
 | 
				
			|||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
  </div>
 | 
					  </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  <!-- Advanced Filters Section -->
 | 
					  <!-- Advanced Filters Section - COLLAPSIBLE -->
 | 
				
			||||||
  <div class="filter-section">
 | 
					  <div class="filter-section">
 | 
				
			||||||
    <div class="filter-card-compact">
 | 
					    <div class="filter-card-compact">
 | 
				
			||||||
      <div class="filter-header-compact">
 | 
					      <div class="filter-header-compact">
 | 
				
			||||||
        <h3>⚙️ Erweiterte Filter</h3>
 | 
					        <h3>⚙️ Erweiterte Filter</h3>
 | 
				
			||||||
        <button class="filter-reset" id="reset-advanced" title="Erweiterte Filter zurücksetzen">
 | 
					        <div class="filter-header-controls">
 | 
				
			||||||
          <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
					          <button class="filter-reset" id="reset-advanced" title="Erweiterte Filter zurücksetzen">
 | 
				
			||||||
            <polyline points="1 4 1 10 7 10"/>
 | 
					            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
				
			||||||
            <path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
 | 
					              <polyline points="1 4 1 10 7 10"/>
 | 
				
			||||||
          </svg>
 | 
					              <path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
 | 
				
			||||||
        </button>
 | 
					            </svg>
 | 
				
			||||||
 | 
					          </button>
 | 
				
			||||||
 | 
					          <button class="collapse-toggle" id="toggle-advanced" data-collapsed="true" title="Erweiterte Filter ein/ausblenden">
 | 
				
			||||||
 | 
					            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
				
			||||||
 | 
					              <polyline points="6 9 12 15 18 9"></polyline>
 | 
				
			||||||
 | 
					            </svg>
 | 
				
			||||||
 | 
					          </button>
 | 
				
			||||||
 | 
					        </div>
 | 
				
			||||||
      </div>
 | 
					      </div>
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      <div class="advanced-filters-compact">
 | 
					      <div class="collapsible-content hidden" id="advanced-filters-content">
 | 
				
			||||||
        <div class="filter-grid-compact">
 | 
					        <div class="advanced-filters-compact">
 | 
				
			||||||
          <div class="filter-group">
 | 
					          <div class="filter-grid-compact">
 | 
				
			||||||
            <label class="filter-label">Tool-Typ</label>
 | 
					            <div class="filter-group">
 | 
				
			||||||
            <select id="type-select" class="filter-select">
 | 
					              <label class="filter-label">Tool-Typ</label>
 | 
				
			||||||
              <option value="">Alle Typen</option>
 | 
					              <select id="type-select" class="filter-select">
 | 
				
			||||||
              {toolTypes.map((type: string) => (
 | 
					                <option value="">Alle Typen</option>
 | 
				
			||||||
                <option value={type}>{type}</option>
 | 
					                {toolTypes.map((type: string) => (
 | 
				
			||||||
              ))}
 | 
					                  <option value={type}>{type}</option>
 | 
				
			||||||
            </select>
 | 
					                ))}
 | 
				
			||||||
 | 
					              </select>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            <div class="filter-group">
 | 
				
			||||||
 | 
					              <label class="filter-label">Skill Level</label>
 | 
				
			||||||
 | 
					              <select id="skill-select" class="filter-select">
 | 
				
			||||||
 | 
					                <option value="">Alle Level</option>
 | 
				
			||||||
 | 
					                {skillLevels.map((level: string) => (
 | 
				
			||||||
 | 
					                  <option value={level}>{level}</option>
 | 
				
			||||||
 | 
					                ))}
 | 
				
			||||||
 | 
					              </select>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            <div class="filter-group">
 | 
				
			||||||
 | 
					              <label class="filter-label">Plattform</label>
 | 
				
			||||||
 | 
					              <select id="platform-select" class="filter-select">
 | 
				
			||||||
 | 
					                <option value="">Alle Plattformen</option>
 | 
				
			||||||
 | 
					                {platforms.map((platform: string) => (
 | 
				
			||||||
 | 
					                  <option value={platform}>{platform}</option>
 | 
				
			||||||
 | 
					                ))}
 | 
				
			||||||
 | 
					              </select>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            <div class="filter-group">
 | 
				
			||||||
 | 
					              <label class="filter-label">Lizenztyp</label>
 | 
				
			||||||
 | 
					              <select id="license-select" class="filter-select">
 | 
				
			||||||
 | 
					                <option value="">Alle Lizenzen</option>
 | 
				
			||||||
 | 
					                {licenses.map((license: string) => (
 | 
				
			||||||
 | 
					                  <option value={license}>{license}</option>
 | 
				
			||||||
 | 
					                ))}
 | 
				
			||||||
 | 
					              </select>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            <div class="filter-group">
 | 
				
			||||||
 | 
					              <label class="filter-label">Zugangsart</label>
 | 
				
			||||||
 | 
					              <select id="access-select" class="filter-select">
 | 
				
			||||||
 | 
					                <option value="">Alle Zugangsarten</option>
 | 
				
			||||||
 | 
					                {accessTypes.map((access: string) => (
 | 
				
			||||||
 | 
					                  <option value={access}>{access}</option>
 | 
				
			||||||
 | 
					                ))}
 | 
				
			||||||
 | 
					              </select>
 | 
				
			||||||
 | 
					            </div>
 | 
				
			||||||
          </div>
 | 
					          </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
          <div class="filter-group">
 | 
					          <div class="filter-toggles-compact">
 | 
				
			||||||
            <label class="filter-label">Skill Level</label>
 | 
					            <label class="toggle-wrapper">
 | 
				
			||||||
            <select id="skill-select" class="filter-select">
 | 
					              <input type="checkbox" id="hosted-only" />
 | 
				
			||||||
              <option value="">Alle Level</option>
 | 
					              <span class="toggle-label">🟣 Nur CC24-Server Tools</span>
 | 
				
			||||||
              {skillLevels.map((level: string) => (
 | 
					            </label>
 | 
				
			||||||
                <option value={level}>{level}</option>
 | 
					            
 | 
				
			||||||
              ))}
 | 
					            <label class="toggle-wrapper">
 | 
				
			||||||
            </select>
 | 
					              <input type="checkbox" id="knowledgebase-only" />
 | 
				
			||||||
 | 
					              <span class="toggle-label">📖 Nur Tools mit Knowledgebase</span>
 | 
				
			||||||
 | 
					            </label>
 | 
				
			||||||
          </div>
 | 
					          </div>
 | 
				
			||||||
 | 
					 | 
				
			||||||
          <div class="filter-group">
 | 
					 | 
				
			||||||
            <label class="filter-label">Plattform</label>
 | 
					 | 
				
			||||||
            <select id="platform-select" class="filter-select">
 | 
					 | 
				
			||||||
              <option value="">Alle Plattformen</option>
 | 
					 | 
				
			||||||
              {platforms.map((platform: string) => (
 | 
					 | 
				
			||||||
                <option value={platform}>{platform}</option>
 | 
					 | 
				
			||||||
              ))}
 | 
					 | 
				
			||||||
            </select>
 | 
					 | 
				
			||||||
          </div>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
          <div class="filter-group">
 | 
					 | 
				
			||||||
            <label class="filter-label">Lizenztyp</label>
 | 
					 | 
				
			||||||
            <select id="license-select" class="filter-select">
 | 
					 | 
				
			||||||
              <option value="">Alle Lizenzen</option>
 | 
					 | 
				
			||||||
              {licenses.map((license: string) => (
 | 
					 | 
				
			||||||
                <option value={license}>{license}</option>
 | 
					 | 
				
			||||||
              ))}
 | 
					 | 
				
			||||||
            </select>
 | 
					 | 
				
			||||||
          </div>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
          <div class="filter-group">
 | 
					 | 
				
			||||||
            <label class="filter-label">Zugangsart</label>
 | 
					 | 
				
			||||||
            <select id="access-select" class="filter-select">
 | 
					 | 
				
			||||||
              <option value="">Alle Zugangsarten</option>
 | 
					 | 
				
			||||||
              {accessTypes.map((access: string) => (
 | 
					 | 
				
			||||||
                <option value={access}>{access}</option>
 | 
					 | 
				
			||||||
              ))}
 | 
					 | 
				
			||||||
            </select>
 | 
					 | 
				
			||||||
          </div>
 | 
					 | 
				
			||||||
        </div>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        <div class="filter-toggles-compact">
 | 
					 | 
				
			||||||
          <label class="toggle-wrapper">
 | 
					 | 
				
			||||||
            <input type="checkbox" id="hosted-only" />
 | 
					 | 
				
			||||||
            <span class="toggle-label">🟣 Nur CC24-Server Tools</span>
 | 
					 | 
				
			||||||
          </label>
 | 
					 | 
				
			||||||
          
 | 
					 | 
				
			||||||
          <label class="toggle-wrapper">
 | 
					 | 
				
			||||||
            <input type="checkbox" id="knowledgebase-only" />
 | 
					 | 
				
			||||||
            <span class="toggle-label">📖 Nur Tools mit Knowledgebase</span>
 | 
					 | 
				
			||||||
          </label>
 | 
					 | 
				
			||||||
        </div>
 | 
					        </div>
 | 
				
			||||||
      </div>
 | 
					      </div>
 | 
				
			||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
  </div>
 | 
					  </div>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  <!-- Tag Filters Section -->
 | 
					  <!-- Tag Filters Section - COLLAPSIBLE -->
 | 
				
			||||||
  <div class="filter-section">
 | 
					  <div class="filter-section">
 | 
				
			||||||
    <div class="filter-card-compact">
 | 
					    <div class="filter-card-compact">
 | 
				
			||||||
      <div class="filter-header-compact">
 | 
					      <div class="filter-header-compact">
 | 
				
			||||||
        <h3>🏷️ Tag-Filter</h3>
 | 
					        <h3>🏷️ Tag-Filter</h3>
 | 
				
			||||||
        <div class="tag-controls">
 | 
					        <div class="filter-header-controls">
 | 
				
			||||||
          <button class="filter-reset" id="reset-tags" title="Tags zurücksetzen">
 | 
					          <button class="filter-reset" id="reset-tags" title="Tags zurücksetzen">
 | 
				
			||||||
            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
					            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
				
			||||||
              <polyline points="1 4 1 10 7 10"/>
 | 
					              <polyline points="1 4 1 10 7 10"/>
 | 
				
			||||||
              <path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
 | 
					              <path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
 | 
				
			||||||
            </svg>
 | 
					            </svg>
 | 
				
			||||||
          </button>
 | 
					          </button>
 | 
				
			||||||
          <button id="tag-cloud-toggle" class="tag-toggle" data-expanded="false">
 | 
					          <button class="collapse-toggle" id="toggle-tags" data-collapsed="true" title="Tag-Filter ein/ausblenden">
 | 
				
			||||||
            Mehr zeigen
 | 
					            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
				
			||||||
 | 
					              <polyline points="6 9 12 15 18 9"></polyline>
 | 
				
			||||||
 | 
					            </svg>
 | 
				
			||||||
          </button>
 | 
					          </button>
 | 
				
			||||||
        </div>
 | 
					        </div>
 | 
				
			||||||
      </div>
 | 
					      </div>
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      <div class="tag-section">
 | 
					      <div class="collapsible-content hidden" id="tag-filters-content">
 | 
				
			||||||
        <div class="selected-tags" id="selected-tags"></div>
 | 
					        <div class="tag-section">
 | 
				
			||||||
        <div class="tag-cloud" id="tag-cloud">
 | 
					          <div class="selected-tags" id="selected-tags"></div>
 | 
				
			||||||
          {sortedTags.map((tag, index) => (
 | 
					          <div class="tag-controls">
 | 
				
			||||||
            <button 
 | 
					            <button id="tag-cloud-toggle" class="tag-toggle" data-expanded="false">
 | 
				
			||||||
              class="tag-cloud-item" 
 | 
					              Mehr zeigen
 | 
				
			||||||
              data-tag={tag}
 | 
					 | 
				
			||||||
              data-frequency={tagFrequency[tag]}
 | 
					 | 
				
			||||||
              data-index={index}
 | 
					 | 
				
			||||||
            >
 | 
					 | 
				
			||||||
              {tag}
 | 
					 | 
				
			||||||
              <span class="tag-frequency">({tagFrequency[tag]})</span>
 | 
					 | 
				
			||||||
            </button>
 | 
					            </button>
 | 
				
			||||||
          ))}
 | 
					          </div>
 | 
				
			||||||
 | 
					          <div class="tag-cloud" id="tag-cloud">
 | 
				
			||||||
 | 
					            {sortedTags.map((tag, index) => (
 | 
				
			||||||
 | 
					              <button 
 | 
				
			||||||
 | 
					                class="tag-cloud-item" 
 | 
				
			||||||
 | 
					                data-tag={tag}
 | 
				
			||||||
 | 
					                data-frequency={tagFrequency[tag]}
 | 
				
			||||||
 | 
					                data-index={index}
 | 
				
			||||||
 | 
					              >
 | 
				
			||||||
 | 
					                {tag}
 | 
				
			||||||
 | 
					                <span class="tag-frequency">({tagFrequency[tag]})</span>
 | 
				
			||||||
 | 
					              </button>
 | 
				
			||||||
 | 
					            ))}
 | 
				
			||||||
 | 
					          </div>
 | 
				
			||||||
        </div>
 | 
					        </div>
 | 
				
			||||||
      </div>
 | 
					      </div>
 | 
				
			||||||
    </div>
 | 
					    </div>
 | 
				
			||||||
@ -293,7 +311,12 @@ const sortedTags = Object.entries(tagFrequency)
 | 
				
			|||||||
        advanced: document.getElementById('reset-advanced'),
 | 
					        advanced: document.getElementById('reset-advanced'),
 | 
				
			||||||
        tags: document.getElementById('reset-tags'),
 | 
					        tags: document.getElementById('reset-tags'),
 | 
				
			||||||
        all: document.getElementById('reset-all-filters')
 | 
					        all: document.getElementById('reset-all-filters')
 | 
				
			||||||
      }
 | 
					      },
 | 
				
			||||||
 | 
					      // Collapsible elements
 | 
				
			||||||
 | 
					      toggleAdvanced: document.getElementById('toggle-advanced'),
 | 
				
			||||||
 | 
					      toggleTags: document.getElementById('toggle-tags'),
 | 
				
			||||||
 | 
					      advancedContent: document.getElementById('advanced-filters-content'),
 | 
				
			||||||
 | 
					      tagContent: document.getElementById('tag-filters-content')
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    // Verify critical elements exist
 | 
					    // Verify critical elements exist
 | 
				
			||||||
@ -307,6 +330,52 @@ const sortedTags = Object.entries(tagFrequency)
 | 
				
			|||||||
    let selectedPhase = '';
 | 
					    let selectedPhase = '';
 | 
				
			||||||
    let isTagCloudExpanded = false;
 | 
					    let isTagCloudExpanded = false;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					    // Collapsible functionality
 | 
				
			||||||
 | 
					    function toggleCollapsible(toggleBtn, content, storageKey) {
 | 
				
			||||||
 | 
					      const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true';
 | 
				
			||||||
 | 
					      const newState = !isCollapsed;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      toggleBtn.setAttribute('data-collapsed', newState.toString());
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      if (newState) {
 | 
				
			||||||
 | 
					        // Collapse
 | 
				
			||||||
 | 
					        content.classList.add('hidden');
 | 
				
			||||||
 | 
					        toggleBtn.style.transform = 'rotate(0deg)';
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        // Expand
 | 
				
			||||||
 | 
					        content.classList.remove('hidden');
 | 
				
			||||||
 | 
					        toggleBtn.style.transform = 'rotate(180deg)';
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Store state in sessionStorage
 | 
				
			||||||
 | 
					      sessionStorage.setItem(storageKey, newState.toString());
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Initialize collapsible sections (collapsed by default)
 | 
				
			||||||
 | 
					    function initializeCollapsible() {
 | 
				
			||||||
 | 
					      // Advanced filters
 | 
				
			||||||
 | 
					      const advancedCollapsed = sessionStorage.getItem('advanced-collapsed') !== 'false';
 | 
				
			||||||
 | 
					      elements.toggleAdvanced.setAttribute('data-collapsed', advancedCollapsed.toString());
 | 
				
			||||||
 | 
					      if (advancedCollapsed) {
 | 
				
			||||||
 | 
					        elements.advancedContent.classList.add('hidden');
 | 
				
			||||||
 | 
					        elements.toggleAdvanced.style.transform = 'rotate(0deg)';
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        elements.advancedContent.classList.remove('hidden');
 | 
				
			||||||
 | 
					        elements.toggleAdvanced.style.transform = 'rotate(180deg)';
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Tag filters
 | 
				
			||||||
 | 
					      const tagsCollapsed = sessionStorage.getItem('tags-collapsed') !== 'false';
 | 
				
			||||||
 | 
					      elements.toggleTags.setAttribute('data-collapsed', tagsCollapsed.toString());
 | 
				
			||||||
 | 
					      if (tagsCollapsed) {
 | 
				
			||||||
 | 
					        elements.tagContent.classList.add('hidden');
 | 
				
			||||||
 | 
					        elements.toggleTags.style.transform = 'rotate(0deg)';
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        elements.tagContent.classList.remove('hidden');
 | 
				
			||||||
 | 
					        elements.toggleTags.style.transform = 'rotate(180deg)';
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    // Helper function to check if tool is hosted
 | 
					    // Helper function to check if tool is hosted
 | 
				
			||||||
    function isToolHosted(tool) {
 | 
					    function isToolHosted(tool) {
 | 
				
			||||||
      return tool.projectUrl !== undefined && 
 | 
					      return tool.projectUrl !== undefined && 
 | 
				
			||||||
@ -418,18 +487,23 @@ const sortedTags = Object.entries(tagFrequency)
 | 
				
			|||||||
      });
 | 
					      });
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    // Add/remove tags
 | 
					    // Add/remove tags - FIXED: Update ALL matching elements
 | 
				
			||||||
    function addTag(tag) {
 | 
					    function addTag(tag) {
 | 
				
			||||||
      selectedTags.add(tag);
 | 
					      selectedTags.add(tag);
 | 
				
			||||||
      document.querySelector(`[data-tag="${tag}"]`).classList.add('active');
 | 
					      // FIXED: Use querySelectorAll to update ALL matching tag elements
 | 
				
			||||||
 | 
					      document.querySelectorAll(`[data-tag="${tag}"]`).forEach(element => {
 | 
				
			||||||
 | 
					        element.classList.add('active');
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
      updateSelectedTags();
 | 
					      updateSelectedTags();
 | 
				
			||||||
      filterTools();
 | 
					      filterTools();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    function removeTag(tag) {
 | 
					    function removeTag(tag) {
 | 
				
			||||||
      selectedTags.delete(tag);
 | 
					      selectedTags.delete(tag);
 | 
				
			||||||
      const tagElement = document.querySelector(`[data-tag="${tag}"]`);
 | 
					      // FIXED: Use querySelectorAll to update ALL matching tag elements
 | 
				
			||||||
      if (tagElement) tagElement.classList.remove('active');
 | 
					      document.querySelectorAll(`[data-tag="${tag}"]`).forEach(element => {
 | 
				
			||||||
 | 
					        element.classList.remove('active');
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
      updateSelectedTags();
 | 
					      updateSelectedTags();
 | 
				
			||||||
      filterTools();
 | 
					      filterTools();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -553,7 +627,10 @@ const sortedTags = Object.entries(tagFrequency)
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
    function resetTags() {
 | 
					    function resetTags() {
 | 
				
			||||||
      selectedTags.clear();
 | 
					      selectedTags.clear();
 | 
				
			||||||
      elements.tagCloudItems.forEach(item => item.classList.remove('active'));
 | 
					      // FIXED: Update ALL tag elements
 | 
				
			||||||
 | 
					      document.querySelectorAll('.tag-cloud-item').forEach(item => {
 | 
				
			||||||
 | 
					        item.classList.remove('active');
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
      updateSelectedTags();
 | 
					      updateSelectedTags();
 | 
				
			||||||
      filterTools();
 | 
					      filterTools();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@ -630,11 +707,21 @@ const sortedTags = Object.entries(tagFrequency)
 | 
				
			|||||||
    elements.resetButtons.tags.addEventListener('click', resetTags);
 | 
					    elements.resetButtons.tags.addEventListener('click', resetTags);
 | 
				
			||||||
    elements.resetButtons.all.addEventListener('click', resetAllFilters);
 | 
					    elements.resetButtons.all.addEventListener('click', resetAllFilters);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					    // Collapsible toggle listeners
 | 
				
			||||||
 | 
					    elements.toggleAdvanced.addEventListener('click', () => {
 | 
				
			||||||
 | 
					      toggleCollapsible(elements.toggleAdvanced, elements.advancedContent, 'advanced-collapsed');
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    elements.toggleTags.addEventListener('click', () => {
 | 
				
			||||||
 | 
					      toggleCollapsible(elements.toggleTags, elements.tagContent, 'tags-collapsed');
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    // Expose functions globally for backwards compatibility
 | 
					    // Expose functions globally for backwards compatibility
 | 
				
			||||||
    window.clearTagFilters = resetTags;
 | 
					    window.clearTagFilters = resetTags;
 | 
				
			||||||
    window.clearAllFilters = resetAllFilters;
 | 
					    window.clearAllFilters = resetAllFilters;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    // Initialize
 | 
					    // Initialize
 | 
				
			||||||
 | 
					    initializeCollapsible();
 | 
				
			||||||
    initTagCloud();
 | 
					    initTagCloud();
 | 
				
			||||||
    filterTagCloud();
 | 
					    filterTagCloud();
 | 
				
			||||||
    updateSelectedTags();
 | 
					    updateSelectedTags();
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										22
									
								
								src/pages/api/ai/embeddings.status.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								src/pages/api/ai/embeddings.status.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,22 @@
 | 
				
			|||||||
 | 
					// src/pages/api/ai/embeddings-status.ts
 | 
				
			||||||
 | 
					import type { APIRoute } from 'astro';
 | 
				
			||||||
 | 
					import { embeddingsService } from '../../../utils/embeddings.js';
 | 
				
			||||||
 | 
					import { apiResponse, apiServerError } from '../../../utils/api.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export const prerender = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export const GET: APIRoute = async () => {
 | 
				
			||||||
 | 
					  try {
 | 
				
			||||||
 | 
					    const stats = embeddingsService.getStats();
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return apiResponse.success({
 | 
				
			||||||
 | 
					      embeddings: stats,
 | 
				
			||||||
 | 
					      timestamp: new Date().toISOString(),
 | 
				
			||||||
 | 
					      status: stats.enabled && stats.initialized ? 'ready' : 
 | 
				
			||||||
 | 
					             stats.enabled && !stats.initialized ? 'initializing' : 'disabled'
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					  } catch (error) {
 | 
				
			||||||
 | 
					    console.error('Embeddings status error:', error);
 | 
				
			||||||
 | 
					    return apiServerError.internal('Failed to get embeddings status');
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
@ -1,4 +1,4 @@
 | 
				
			|||||||
// src/pages/api/ai/enhance-input.ts
 | 
					// src/pages/api/ai/enhance-input.ts - ENHANCED with forensics methodology
 | 
				
			||||||
import type { APIRoute } from 'astro';
 | 
					import type { APIRoute } from 'astro';
 | 
				
			||||||
import { withAPIAuth } from '../../../utils/auth.js';
 | 
					import { withAPIAuth } from '../../../utils/auth.js';
 | 
				
			||||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
 | 
					import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
 | 
				
			||||||
@ -14,7 +14,11 @@ function getEnv(key: string): string {
 | 
				
			|||||||
  return value;
 | 
					  return value;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const AI_MODEL = getEnv('AI_MODEL');
 | 
					// Use the analyzer AI for smart prompting (smaller, faster model)
 | 
				
			||||||
 | 
					const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
 | 
				
			||||||
 | 
					const AI_API_KEY = getEnv('AI_ANALYZER_API_KEY');
 | 
				
			||||||
 | 
					const AI_MODEL = getEnv('AI_ANALYZER_MODEL');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
					const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
				
			||||||
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
 | 
					const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
 | 
				
			||||||
const RATE_LIMIT_MAX = 5; // 5 enhancement requests per minute per user
 | 
					const RATE_LIMIT_MAX = 5; // 5 enhancement requests per minute per user
 | 
				
			||||||
@ -59,29 +63,38 @@ function cleanupExpiredRateLimits() {
 | 
				
			|||||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
					setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function createEnhancementPrompt(input: string): string {
 | 
					function createEnhancementPrompt(input: string): string {
 | 
				
			||||||
  return `
 | 
					  return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein forensisches Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte forensische Untersuchung.
 | 
				
			||||||
Du bist eine KI für digitale Forensik. Der Nutzer beschreibt ein forensisches Szenario. Analysiere die Eingabe.
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Wenn die Beschreibung unvollständig oder vage ist, stelle bis zu drei präzise Rückfragen im JSON-Array-Format, um wichtige Details zu klären (z. B. Vorfalltyp, System, Ziel, Datenquellen, Zeit, Beteiligte, rechtlicher Rahmen).
 | 
					ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
 | 
				
			||||||
 | 
					1. **Incident Context**: Was ist passiert? Welche Angriffsvektoren oder technischen Probleme liegen vor?
 | 
				
			||||||
 | 
					2. **Affected Systems**: Welche spezifischen Technologien/Plattformen sind betroffen? (Windows/Linux/ICS/SCADA/Mobile/Cloud/Network Infrastructure)
 | 
				
			||||||
 | 
					3. **Available Evidence**: Welche forensischen Datenquellen stehen zur Verfügung? (RAM-Dumps, Disk-Images, Log-Files, Network-Captures, Registry-Hives)
 | 
				
			||||||
 | 
					4. **Investigation Objectives**: Was soll erreicht werden? (IOC-Extraktion, Timeline-Rekonstruktion, Attribution, Impact-Assessment)
 | 
				
			||||||
 | 
					5. **Timeline Constraints**: Wie zeitkritisch ist die Untersuchung?
 | 
				
			||||||
 | 
					6. **Legal & Compliance**: Rechtliche Anforderungen, Chain of Custody, Compliance-Rahmen (DSGVO, sector-specific regulations)
 | 
				
			||||||
 | 
					7. **Technical Constraints**: Verfügbare Ressourcen, Skills, Infrastrukturbeschränkungen
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Wenn die Eingabe bereits klar, spezifisch und vollständig ist, gib stattdessen nur eine leere Liste [] zurück.
 | 
					WENN die Beschreibung alle kritischen forensischen Aspekte abdeckt: Geben Sie eine leere Liste [] zurück.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Antwortformat strikt:
 | 
					WENN wichtige forensische Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte forensische Analyse schließen.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
\`\`\`json
 | 
					QUALITÄTSKRITERIEN FÜR FRAGEN:
 | 
				
			||||||
 | 
					- Forensisch spezifisch, nicht allgemein (❌ "Mehr Details?" ✅ "Welche forensischen Artefakte (RAM-Dumps, Disk-Images, Logs) stehen zur Verfügung?")
 | 
				
			||||||
 | 
					- Methodisch relevant (❌ "Wann passierte das?" ✅ "Liegen Log-Dateien aus dem Incident-Zeitraum vor, und welche Retention-Policy gilt?")
 | 
				
			||||||
 | 
					- Priorisiert nach Auswirkung auf die forensische Untersuchungsqualität
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
 | 
				
			||||||
[
 | 
					[
 | 
				
			||||||
  "Frage 1?",
 | 
					  "Forensisch spezifische Frage 1?",
 | 
				
			||||||
  "Frage 2?",
 | 
					  "Forensisch spezifische Frage 2?",
 | 
				
			||||||
  "Frage 3?"
 | 
					  "Forensisch spezifische Frage 3?"
 | 
				
			||||||
]
 | 
					]
 | 
				
			||||||
\`\`\`
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
Nutzer-Eingabe:
 | 
					NUTZER-EINGABE:
 | 
				
			||||||
${input}
 | 
					${input}
 | 
				
			||||||
  `.trim();
 | 
					  `.trim();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
export const POST: APIRoute = async ({ request }) => {
 | 
					export const POST: APIRoute = async ({ request }) => {
 | 
				
			||||||
  try {
 | 
					  try {
 | 
				
			||||||
    const authResult = await withAPIAuth(request, 'ai');
 | 
					    const authResult = await withAPIAuth(request, 'ai');
 | 
				
			||||||
@ -98,12 +111,12 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
    const body = await request.json();
 | 
					    const body = await request.json();
 | 
				
			||||||
    const { input } = body;
 | 
					    const { input } = body;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!input || typeof input !== 'string' || input.length < 20) {
 | 
					    if (!input || typeof input !== 'string' || input.length < 40) {
 | 
				
			||||||
      return apiError.badRequest('Input too short for enhancement');
 | 
					      return apiError.badRequest('Input too short for enhancement (minimum 40 characters)');
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const sanitizedInput = sanitizeInput(input);
 | 
					    const sanitizedInput = sanitizeInput(input);
 | 
				
			||||||
    if (sanitizedInput.length < 20) {
 | 
					    if (sanitizedInput.length < 40) {
 | 
				
			||||||
      return apiError.badRequest('Input too short after sanitization');
 | 
					      return apiError.badRequest('Input too short after sanitization');
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -111,11 +124,11 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
    const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
 | 
					    const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    const aiResponse = await enqueueApiCall(() =>
 | 
					    const aiResponse = await enqueueApiCall(() =>
 | 
				
			||||||
      fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
 | 
					      fetch(`${AI_ENDPOINT}/v1/chat/completions`, {
 | 
				
			||||||
        method: 'POST',
 | 
					        method: 'POST',
 | 
				
			||||||
        headers: {
 | 
					        headers: {
 | 
				
			||||||
          'Content-Type': 'application/json',
 | 
					          'Content-Type': 'application/json',
 | 
				
			||||||
          'Authorization': `Bearer ${process.env.AI_API_KEY}`
 | 
					          'Authorization': `Bearer ${AI_API_KEY}`
 | 
				
			||||||
        },
 | 
					        },
 | 
				
			||||||
        body: JSON.stringify({
 | 
					        body: JSON.stringify({
 | 
				
			||||||
          model: AI_MODEL,
 | 
					          model: AI_MODEL,
 | 
				
			||||||
@ -125,8 +138,12 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
              content: systemPrompt
 | 
					              content: systemPrompt
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
          ],
 | 
					          ],
 | 
				
			||||||
          max_tokens: 200,
 | 
					          max_tokens: 300,
 | 
				
			||||||
          temperature: 0.7
 | 
					          temperature: 0.7,
 | 
				
			||||||
 | 
					          // Enhanced: Better parameters for consistent forensics output
 | 
				
			||||||
 | 
					          top_p: 0.9,
 | 
				
			||||||
 | 
					          frequency_penalty: 0.2,
 | 
				
			||||||
 | 
					          presence_penalty: 0.1
 | 
				
			||||||
        })
 | 
					        })
 | 
				
			||||||
      }), taskId);
 | 
					      }), taskId);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -144,36 +161,47 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    let questions;
 | 
					    let questions;
 | 
				
			||||||
    try {
 | 
					    try {
 | 
				
			||||||
    const cleanedContent = aiContent
 | 
					      const cleanedContent = aiContent
 | 
				
			||||||
        .replace(/^```json\s*/i, '')
 | 
					        .replace(/^```json\s*/i, '')
 | 
				
			||||||
        .replace(/\s*```\s*$/, '')
 | 
					        .replace(/\s*```\s*$/, '')
 | 
				
			||||||
        .trim();
 | 
					        .trim();
 | 
				
			||||||
    questions = JSON.parse(cleanedContent);
 | 
					      questions = JSON.parse(cleanedContent);
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      if (!Array.isArray(questions) || questions.length === 0) {
 | 
					      if (!Array.isArray(questions)) {
 | 
				
			||||||
        throw new Error('Invalid questions format');
 | 
					        throw new Error('Response is not an array');
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      
 | 
					      
 | 
				
			||||||
      // Validate and clean questions
 | 
					      // Enhanced validation and cleaning for forensics context
 | 
				
			||||||
      questions = questions
 | 
					      questions = questions
 | 
				
			||||||
        .filter(q => typeof q === 'string' && q.length > 5 && q.length < 120)
 | 
					        .filter(q => typeof q === 'string' && q.length > 20 && q.length < 200) // More appropriate length for forensics questions
 | 
				
			||||||
        .slice(0, 3);
 | 
					        .filter(q => q.includes('?')) // Must be a question
 | 
				
			||||||
 | 
					        .filter(q => {
 | 
				
			||||||
 | 
					          // Enhanced: Filter for forensics-relevant questions
 | 
				
			||||||
 | 
					          const forensicsTerms = ['forensisch', 'log', 'dump', 'image', 'artefakt', 'evidence', 'incident', 'system', 'netzwerk', 'zeitraum', 'verfügbar'];
 | 
				
			||||||
 | 
					          const lowerQ = q.toLowerCase();
 | 
				
			||||||
 | 
					          return forensicsTerms.some(term => lowerQ.includes(term));
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					        .map(q => q.trim())
 | 
				
			||||||
 | 
					        .slice(0, 3); // Max 3 questions
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
 | 
					      // If no valid forensics questions, return empty array (means input is complete)
 | 
				
			||||||
      if (questions.length === 0) {
 | 
					      if (questions.length === 0) {
 | 
				
			||||||
        throw new Error('No valid questions found');
 | 
					        questions = [];
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    } catch (error) {
 | 
					    } catch (error) {
 | 
				
			||||||
      console.error('Failed to parse enhancement response:', aiContent);
 | 
					      console.error('Failed to parse enhancement response:', aiContent);
 | 
				
			||||||
      return apiServerError.unavailable('Invalid enhancement response format');
 | 
					      // If parsing fails, assume input is complete enough
 | 
				
			||||||
 | 
					      questions = [];
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    console.log(`[AI Enhancement] User: ${userId}, Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
 | 
					    console.log(`[AI Enhancement] User: ${userId}, Forensics Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return new Response(JSON.stringify({
 | 
					    return new Response(JSON.stringify({
 | 
				
			||||||
      success: true,
 | 
					      success: true,
 | 
				
			||||||
      questions,
 | 
					      questions,
 | 
				
			||||||
      taskId
 | 
					      taskId,
 | 
				
			||||||
 | 
					      inputComplete: questions.length === 0 // Flag to indicate if input seems complete
 | 
				
			||||||
    }), {
 | 
					    }), {
 | 
				
			||||||
      status: 200,
 | 
					      status: 200,
 | 
				
			||||||
      headers: { 'Content-Type': 'application/json' }
 | 
					      headers: { 'Content-Type': 'application/json' }
 | 
				
			||||||
 | 
				
			|||||||
@ -1,275 +1,105 @@
 | 
				
			|||||||
// src/pages/api/ai/query.ts
 | 
					// src/pages/api/ai/query.ts - FIXED: Rate limiting for micro-task pipeline
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import type { APIRoute } from 'astro';
 | 
					import type { APIRoute } from 'astro';
 | 
				
			||||||
import { withAPIAuth } from '../../../utils/auth.js';
 | 
					import { withAPIAuth } from '../../../utils/auth.js';
 | 
				
			||||||
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
 | 
					 | 
				
			||||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
 | 
					import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
 | 
				
			||||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';   
 | 
					import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';   
 | 
				
			||||||
 | 
					import { aiPipeline } from '../../../utils/aiPipeline.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export const prerender = false;
 | 
					export const prerender = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function getEnv(key: string): string {
 | 
					interface RateLimitData {
 | 
				
			||||||
  const value = process.env[key];
 | 
					  count: number;
 | 
				
			||||||
  if (!value) {
 | 
					  resetTime: number;
 | 
				
			||||||
    throw new Error(`Missing environment variable: ${key}`);
 | 
					  microTaskCount: number; 
 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  return value;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const AI_MODEL = getEnv('AI_MODEL');
 | 
					const rateLimitStore = new Map<string, RateLimitData>();
 | 
				
			||||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
					
 | 
				
			||||||
const RATE_LIMIT_WINDOW = 60 * 1000; 
 | 
					const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
 | 
				
			||||||
const RATE_LIMIT_MAX = 10; 
 | 
					const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '4', 10); 
 | 
				
			||||||
 | 
					const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10); 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function sanitizeInput(input: string): string {
 | 
					function sanitizeInput(input: string): string {
 | 
				
			||||||
  let sanitized = input
 | 
					  let sanitized = input
 | 
				
			||||||
    .replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]') // Remove code blocks
 | 
					    .replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
 | 
				
			||||||
    .replace(/\<\/?[^>]+(>|$)/g, '') // Remove HTML tags
 | 
					    .replace(/\<\/?[^>]+(>|$)/g, '')
 | 
				
			||||||
    .replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
 | 
					    .replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
 | 
				
			||||||
    .replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
 | 
					    .replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
 | 
				
			||||||
    .trim();
 | 
					    .trim();
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
 | 
					  sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  return sanitized;
 | 
					  return sanitized;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function stripMarkdownJson(content: string): string {
 | 
					function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
 | 
				
			||||||
  return content
 | 
					 | 
				
			||||||
    .replace(/^```json\s*/i, '')
 | 
					 | 
				
			||||||
    .replace(/\s*```\s*$/, '')
 | 
					 | 
				
			||||||
    .trim();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
function checkRateLimit(userId: string): boolean {
 | 
					 | 
				
			||||||
  const now = Date.now();
 | 
					  const now = Date.now();
 | 
				
			||||||
  const userLimit = rateLimitStore.get(userId);
 | 
					  const userLimit = rateLimitStore.get(userId);
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  if (!userLimit || now > userLimit.resetTime) {
 | 
					  if (!userLimit || now > userLimit.resetTime) {
 | 
				
			||||||
    rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
 | 
					    rateLimitStore.set(userId, { 
 | 
				
			||||||
    return true;
 | 
					      count: 1, 
 | 
				
			||||||
 | 
					      resetTime: now + RATE_LIMIT_WINDOW,
 | 
				
			||||||
 | 
					      microTaskCount: 0 
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    return { 
 | 
				
			||||||
 | 
					      allowed: true, 
 | 
				
			||||||
 | 
					      microTasksRemaining: MICRO_TASK_TOTAL_LIMIT 
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  if (userLimit.count >= RATE_LIMIT_MAX) {
 | 
					  if (userLimit.count >= MAIN_RATE_LIMIT_MAX) {
 | 
				
			||||||
    return false;
 | 
					    return { 
 | 
				
			||||||
 | 
					      allowed: false, 
 | 
				
			||||||
 | 
					      reason: `Main rate limit exceeded. Max ${MAIN_RATE_LIMIT_MAX} requests per minute.`
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  if (userLimit.microTaskCount >= MICRO_TASK_TOTAL_LIMIT) {
 | 
				
			||||||
 | 
					    return { 
 | 
				
			||||||
 | 
					      allowed: false, 
 | 
				
			||||||
 | 
					      reason: `Micro-task limit exceeded. Max ${MICRO_TASK_TOTAL_LIMIT} AI calls per minute.`
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  userLimit.count++;
 | 
					  userLimit.count++;
 | 
				
			||||||
  return true;
 | 
					  
 | 
				
			||||||
 | 
					  return { 
 | 
				
			||||||
 | 
					    allowed: true, 
 | 
				
			||||||
 | 
					    microTasksRemaining: MICRO_TASK_TOTAL_LIMIT - userLimit.microTaskCount
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function incrementMicroTaskCount(userId: string, aiCallsMade: number): void {
 | 
				
			||||||
 | 
					  const userLimit = rateLimitStore.get(userId);
 | 
				
			||||||
 | 
					  if (userLimit) {
 | 
				
			||||||
 | 
					    userLimit.microTaskCount += aiCallsMade;
 | 
				
			||||||
 | 
					    console.log(`[RATE LIMIT] User ${userId} now at ${userLimit.microTaskCount}/${MICRO_TASK_TOTAL_LIMIT} micro-task calls`);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function cleanupExpiredRateLimits() {
 | 
					function cleanupExpiredRateLimits() {
 | 
				
			||||||
  const now = Date.now();
 | 
					  const now = Date.now();
 | 
				
			||||||
 | 
					  const maxStoreSize = 1000; 
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
  for (const [userId, limit] of rateLimitStore.entries()) {
 | 
					  for (const [userId, limit] of rateLimitStore.entries()) {
 | 
				
			||||||
    if (now > limit.resetTime) {
 | 
					    if (now > limit.resetTime) {
 | 
				
			||||||
      rateLimitStore.delete(userId);
 | 
					      rateLimitStore.delete(userId);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					  
 | 
				
			||||||
 | 
					  if (rateLimitStore.size > maxStoreSize) {
 | 
				
			||||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
					    const entries = Array.from(rateLimitStore.entries());
 | 
				
			||||||
 | 
					    entries.sort((a, b) => a[1].resetTime - b[1].resetTime);
 | 
				
			||||||
async function loadToolsDatabase() {
 | 
					    
 | 
				
			||||||
  try {
 | 
					    const toRemove = entries.slice(0, entries.length - maxStoreSize);
 | 
				
			||||||
    return await getCompressedToolsDataForAI();
 | 
					    toRemove.forEach(([userId]) => rateLimitStore.delete(userId));
 | 
				
			||||||
  } catch (error) {
 | 
					    
 | 
				
			||||||
    console.error('Failed to load tools database:', error);
 | 
					    console.log(`[RATE LIMIT] Cleanup: removed ${toRemove.length} old entries`);
 | 
				
			||||||
    throw new Error('Database unavailable');
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function createWorkflowSystemPrompt(toolsData: any): string {
 | 
					setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
				
			||||||
  const toolsList = toolsData.tools.map((tool: any) => ({
 | 
					 | 
				
			||||||
    name: tool.name,
 | 
					 | 
				
			||||||
    description: tool.description,
 | 
					 | 
				
			||||||
    domains: tool.domains,
 | 
					 | 
				
			||||||
    phases: tool.phases,
 | 
					 | 
				
			||||||
    domainAgnostic: tool['domain-agnostic-software'],
 | 
					 | 
				
			||||||
    platforms: tool.platforms,
 | 
					 | 
				
			||||||
    skillLevel: tool.skillLevel,
 | 
					 | 
				
			||||||
    license: tool.license,
 | 
					 | 
				
			||||||
    tags: tool.tags,
 | 
					 | 
				
			||||||
    related_concepts: tool.related_concepts || []
 | 
					 | 
				
			||||||
  }));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
					 | 
				
			||||||
    name: concept.name,
 | 
					 | 
				
			||||||
    description: concept.description,
 | 
					 | 
				
			||||||
    domains: concept.domains,
 | 
					 | 
				
			||||||
    phases: concept.phases,
 | 
					 | 
				
			||||||
    skillLevel: concept.skillLevel,
 | 
					 | 
				
			||||||
    tags: concept.tags
 | 
					 | 
				
			||||||
  }));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const regularPhases = toolsData.phases || [];
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  const domainAgnosticSoftware = toolsData['domain-agnostic-software'] || [];
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  const allPhaseItems = [
 | 
					 | 
				
			||||||
    ...regularPhases,
 | 
					 | 
				
			||||||
    ...domainAgnosticSoftware
 | 
					 | 
				
			||||||
  ];
 | 
					 | 
				
			||||||
  
 | 
					 | 
				
			||||||
  const phasesDescription = allPhaseItems.map((phase: any) => 
 | 
					 | 
				
			||||||
    `- ${phase.id}: ${phase.name}`
 | 
					 | 
				
			||||||
  ).join('\n');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const domainsDescription = toolsData.domains.map((domain: any) => 
 | 
					 | 
				
			||||||
    `- ${domain.id}: ${domain.name}`
 | 
					 | 
				
			||||||
  ).join('\n');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const phaseDescriptions = regularPhases.map((phase: any) => 
 | 
					 | 
				
			||||||
    `- ${phase.name}: ${phase.description || 'Tools/Methods for this phase'}`
 | 
					 | 
				
			||||||
  ).join('\n');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const domainAgnosticDescriptions = domainAgnosticSoftware.map((section: any) => 
 | 
					 | 
				
			||||||
    `- ${section.name}: ${section.description || 'Cross-cutting software and platforms'}`
 | 
					 | 
				
			||||||
  ).join('\n');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const validPhases = [
 | 
					 | 
				
			||||||
    ...regularPhases.map((p: any) => p.id),
 | 
					 | 
				
			||||||
    ...domainAgnosticSoftware.map((s: any) => s.id)
 | 
					 | 
				
			||||||
  ].join('|');
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der Ermittlern bei der Auswahl von Software und Methoden hilft.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERFÜGBARE TOOLS/METHODEN:
 | 
					 | 
				
			||||||
${JSON.stringify(toolsList, null, 2)}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
 | 
					 | 
				
			||||||
${JSON.stringify(conceptsList, null, 2)}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
UNTERSUCHUNGSPHASEN (NIST Framework):
 | 
					 | 
				
			||||||
${phasesDescription}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
FORENSISCHE DOMÄNEN:
 | 
					 | 
				
			||||||
${domainsDescription}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
WICHTIGE REGELN:
 | 
					 | 
				
			||||||
1. Pro Phase 2-3 Tools/Methoden empfehlen (immer mindestens 2 wenn verfügbar)
 | 
					 | 
				
			||||||
2. Tools/Methoden können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool/Methode für jede Phase zu empfehlen, selbst wenn die Priorität "low" ist.
 | 
					 | 
				
			||||||
3. Für Reporting-Phase: Visualisierungs- und Dokumentationssoftware einschließen
 | 
					 | 
				
			||||||
4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug.
 | 
					 | 
				
			||||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
					 | 
				
			||||||
6. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
 | 
					 | 
				
			||||||
7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an, wenn proprietäre Software besser geeignet ist.
 | 
					 | 
				
			||||||
8. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
					 | 
				
			||||||
9. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ENHANCED CONTEXTUAL ANALYSIS:
 | 
					 | 
				
			||||||
10. Analysiere das Szenario detailliert und identifiziere Schlüsselelemente, Bedrohungen und forensische Herausforderungen
 | 
					 | 
				
			||||||
11. Entwickle einen strategischen Untersuchungsansatz basierend auf dem spezifischen Szenario
 | 
					 | 
				
			||||||
12. Identifiziere zeitkritische oder besonders wichtige Faktoren für diesen Fall
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
SOFTWARE/METHODEN-AUSWAHL NACH PHASE:
 | 
					 | 
				
			||||||
${phaseDescriptions}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
DOMÄNENAGNOSTISCHE SOFTWARE/METHODEN:
 | 
					 | 
				
			||||||
${domainAgnosticDescriptions}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ANTWORT-FORMAT (strict JSON):
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  "scenario_analysis": "Detaillierte Analyse des Szenarios: Erkannte Schlüsselelemente, Art des Vorfalls, betroffene Systeme, potentielle Bedrohungen und forensische Herausforderungen",
 | 
					 | 
				
			||||||
  "investigation_approach": "Strategischer Untersuchungsansatz für dieses spezifische Szenario: Prioritäten, Reihenfolge der Phasen, besondere Überlegungen",
 | 
					 | 
				
			||||||
  "critical_considerations": "Zeitkritische Faktoren, wichtige Sicherheitsaspekte oder besondere Vorsichtsmaßnahmen für diesen Fall",
 | 
					 | 
				
			||||||
  "recommended_tools": [
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
      "name": "EXAKTER Name aus der Tools-Database",
 | 
					 | 
				
			||||||
      "priority": "high|medium|low", 
 | 
					 | 
				
			||||||
      "phase": "${validPhases}",
 | 
					 | 
				
			||||||
      "justification": "Warum diese Methode für diese Phase und dieses spezifische Szenario geeignet ist - mit Bezug zu den erkannten Schlüsselelementen"
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  ],
 | 
					 | 
				
			||||||
  "workflow_suggestion": "Vorgeschlagener Untersuchungsablauf mit konkreten Schritten für dieses Szenario",
 | 
					 | 
				
			||||||
  "background_knowledge": [
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
					 | 
				
			||||||
      "relevance": "Warum dieses Konzept für das Szenario relevant ist, und bei welchen der empfohlenen Methoden/Tools."
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  ],
 | 
					 | 
				
			||||||
  "additional_notes": "Wichtige Überlegungen und Hinweise"
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
function createToolSystemPrompt(toolsData: any): string {
 | 
					 | 
				
			||||||
  const toolsList = toolsData.tools.map((tool: any) => ({
 | 
					 | 
				
			||||||
    name: tool.name,
 | 
					 | 
				
			||||||
    description: tool.description,
 | 
					 | 
				
			||||||
    domains: tool.domains,
 | 
					 | 
				
			||||||
    phases: tool.phases,
 | 
					 | 
				
			||||||
    platforms: tool.platforms,
 | 
					 | 
				
			||||||
    skillLevel: tool.skillLevel,
 | 
					 | 
				
			||||||
    license: tool.license,
 | 
					 | 
				
			||||||
    tags: tool.tags,
 | 
					 | 
				
			||||||
    url: tool.url,
 | 
					 | 
				
			||||||
    projectUrl: tool.projectUrl,
 | 
					 | 
				
			||||||
    related_concepts: tool.related_concepts || []
 | 
					 | 
				
			||||||
  }));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
					 | 
				
			||||||
    name: concept.name,
 | 
					 | 
				
			||||||
    description: concept.description,
 | 
					 | 
				
			||||||
    domains: concept.domains,
 | 
					 | 
				
			||||||
    phases: concept.phases,
 | 
					 | 
				
			||||||
    skillLevel: concept.skillLevel,
 | 
					 | 
				
			||||||
    tags: concept.tags
 | 
					 | 
				
			||||||
  }));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der bei der Auswahl spezifischer Software/Methoden für konkrete Probleme hilft.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERFÜGBARE TOOLS/METHODEN:
 | 
					 | 
				
			||||||
${JSON.stringify(toolsList, null, 2)}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
 | 
					 | 
				
			||||||
${JSON.stringify(conceptsList, null, 2)}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
WICHTIGE REGELN:
 | 
					 | 
				
			||||||
1. Analysiere das spezifische Problem/die Anforderung sorgfältig
 | 
					 | 
				
			||||||
2. Empfehle 1-3 Methoden/Tools, sortiert nach Eignung (beste Empfehlung zuerst)
 | 
					 | 
				
			||||||
3. Gib detaillierte Erklärungen, WARUM und WIE jede Methode/Tool das Problem löst
 | 
					 | 
				
			||||||
4. Berücksichtige praktische Aspekte: Skill Level, Plattformen, Verfügbarkeit
 | 
					 | 
				
			||||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
					 | 
				
			||||||
6. Gib konkrete Anwendungshinweise, nicht nur allgemeine Beschreibungen - Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
 | 
					 | 
				
			||||||
7. Erwähne sowohl Stärken als auch Schwächen/Limitationen
 | 
					 | 
				
			||||||
8. Schlage alternative Ansätze vor, wenn sinnvoll
 | 
					 | 
				
			||||||
9. Gib grundsätzliche Hinweise, WIE die Methode/Tool konkret eingesetzt wird
 | 
					 | 
				
			||||||
10. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
					 | 
				
			||||||
11. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ENHANCED CONTEXTUAL ANALYSIS:
 | 
					 | 
				
			||||||
12. Analysiere das Problem detailliert und identifiziere technische Anforderungen, Herausforderungen und Erfolgsfaktoren
 | 
					 | 
				
			||||||
13. Entwickle einen strategischen Lösungsansatz basierend auf dem spezifischen Problem
 | 
					 | 
				
			||||||
14. Identifiziere wichtige Voraussetzungen oder Warnungen für die Anwendung
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
ANTWORT-FORMAT (strict JSON):
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  "problem_analysis": "Detaillierte Analyse des Problems: Erkannte technische Anforderungen, Herausforderungen, benötigte Fähigkeiten und Erfolgsfaktoren",
 | 
					 | 
				
			||||||
  "investigation_approach": "Strategischer Lösungsansatz für dieses spezifische Problem: Herangehensweise, Prioritäten, optimale Anwendungsreihenfolge",
 | 
					 | 
				
			||||||
  "critical_considerations": "Wichtige Voraussetzungen, potentielle Fallstricke oder Warnungen für die Anwendung der empfohlenen Lösungen",
 | 
					 | 
				
			||||||
  "recommended_tools": [
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
      "name": "EXAKTER Name aus der Tools-Database",
 | 
					 | 
				
			||||||
      "rank": 1,
 | 
					 | 
				
			||||||
      "suitability_score": "high|medium|low",
 | 
					 | 
				
			||||||
      "detailed_explanation": "Detaillierte Erklärung, warum dieses Tool/diese Methode das spezifische Problem löst - mit Bezug zu den erkannten Anforderungen",
 | 
					 | 
				
			||||||
      "implementation_approach": "Konkrete Schritte/Ansatz zur Anwendung für dieses spezifische Problem",
 | 
					 | 
				
			||||||
      "pros": ["Spezifische Vorteile für diesen Anwendungsfall", "Weitere Vorteile"],
 | 
					 | 
				
			||||||
      "cons": ["Potentielle Nachteile oder Limitationen", "Weitere Einschränkungen"],
 | 
					 | 
				
			||||||
      "alternatives": "Alternative Ansätze oder ergänzende Tools/Methoden, falls relevant"
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  ],
 | 
					 | 
				
			||||||
  "background_knowledge": [
 | 
					 | 
				
			||||||
    {
 | 
					 | 
				
			||||||
      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
					 | 
				
			||||||
      "relevance": "Warum dieses Konzept für die empfohlenen Tools/das Problem relevant ist, und für welche der empfohlenen Methoden/Tools."
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  ],
 | 
					 | 
				
			||||||
  "additional_considerations": "Wichtige Überlegungen, Voraussetzungen oder Warnungen"
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
export const POST: APIRoute = async ({ request }) => {
 | 
					export const POST: APIRoute = async ({ request }) => {
 | 
				
			||||||
  try {
 | 
					  try {
 | 
				
			||||||
@ -280,161 +110,100 @@ export const POST: APIRoute = async ({ request }) => {
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
    const userId = authResult.userId;
 | 
					    const userId = authResult.userId;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!checkRateLimit(userId)) {
 | 
					    const rateLimitResult = checkRateLimit(userId);
 | 
				
			||||||
      return apiError.rateLimit('Rate limit exceeded');
 | 
					    if (!rateLimitResult.allowed) {
 | 
				
			||||||
 | 
					      return apiError.rateLimit(rateLimitResult.reason || 'Rate limit exceeded');
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const body = await request.json();
 | 
					    const body = await request.json();
 | 
				
			||||||
    const { query, mode = 'workflow', taskId: clientTaskId } = body;
 | 
					    const { query, mode = 'workflow', taskId: clientTaskId } = body;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // ADD THIS DEBUG LOGGING
 | 
					    console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
 | 
				
			||||||
    console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
 | 
					    console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!query || typeof query !== 'string') {
 | 
					    if (!query || typeof query !== 'string') {
 | 
				
			||||||
      console.log(`[AI API] Invalid query for task ${clientTaskId}`);
 | 
					      console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
 | 
				
			||||||
      return apiError.badRequest('Query required');
 | 
					      return apiError.badRequest('Query required');
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!['workflow', 'tool'].includes(mode)) {
 | 
					    if (!['workflow', 'tool'].includes(mode)) {
 | 
				
			||||||
      console.log(`[AI API] Invalid mode for task ${clientTaskId}: ${mode}`);
 | 
					      console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
 | 
				
			||||||
      return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
 | 
					      return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const sanitizedQuery = sanitizeInput(query);
 | 
					    const sanitizedQuery = sanitizeInput(query);
 | 
				
			||||||
    if (sanitizedQuery.includes('[FILTERED]')) {
 | 
					    if (sanitizedQuery.includes('[FILTERED]')) {
 | 
				
			||||||
      console.log(`[AI API] Filtered input detected for task ${clientTaskId}`);
 | 
					      console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
 | 
				
			||||||
      return apiError.badRequest('Invalid input detected');
 | 
					      return apiError.badRequest('Invalid input detected');
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const toolsData = await loadToolsDatabase();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    const systemPrompt = mode === 'workflow' 
 | 
					 | 
				
			||||||
      ? createWorkflowSystemPrompt(toolsData)
 | 
					 | 
				
			||||||
      : createToolSystemPrompt(toolsData);
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
 | 
					    const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    console.log(`[AI API] About to enqueue task ${taskId}`);
 | 
					    console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    
 | 
					    const result = await enqueueApiCall(() => 
 | 
				
			||||||
    const aiResponse = await enqueueApiCall(() =>
 | 
					      aiPipeline.processQuery(sanitizedQuery, mode)
 | 
				
			||||||
      fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
 | 
					 | 
				
			||||||
        method: 'POST',
 | 
					 | 
				
			||||||
        headers: {
 | 
					 | 
				
			||||||
          'Content-Type': 'application/json',
 | 
					 | 
				
			||||||
          'Authorization': `Bearer ${process.env.AI_API_KEY}`
 | 
					 | 
				
			||||||
        },
 | 
					 | 
				
			||||||
        body: JSON.stringify({
 | 
					 | 
				
			||||||
          model: AI_MODEL,
 | 
					 | 
				
			||||||
          messages: [
 | 
					 | 
				
			||||||
            {
 | 
					 | 
				
			||||||
              role: 'system',
 | 
					 | 
				
			||||||
              content: systemPrompt
 | 
					 | 
				
			||||||
            },
 | 
					 | 
				
			||||||
            {
 | 
					 | 
				
			||||||
              role: 'user',
 | 
					 | 
				
			||||||
              content: sanitizedQuery
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
          ],
 | 
					 | 
				
			||||||
          max_tokens: 3500,
 | 
					 | 
				
			||||||
          temperature: 0.3
 | 
					 | 
				
			||||||
        })
 | 
					 | 
				
			||||||
      })
 | 
					 | 
				
			||||||
    , taskId);
 | 
					    , taskId);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!aiResponse.ok) {
 | 
					    if (!result || !result.recommendation) {
 | 
				
			||||||
      console.error('AI API error:', await aiResponse.text());
 | 
					      return apiServerError.unavailable('No response from micro-task AI pipeline');
 | 
				
			||||||
      return apiServerError.unavailable('AI service unavailable');
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const aiData = await aiResponse.json();
 | 
					    const stats = result.processingStats;
 | 
				
			||||||
    const aiContent = aiData.choices?.[0]?.message?.content;
 | 
					    const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
 | 
				
			||||||
 | 
					    incrementMicroTaskCount(userId, estimatedAICallsMade);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (!aiContent) {
 | 
					    console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
 | 
				
			||||||
      return apiServerError.unavailable('No response from AI');
 | 
					    console.log(`  - Mode: ${mode}`);
 | 
				
			||||||
    }
 | 
					    console.log(`  - User: ${userId}`);
 | 
				
			||||||
 | 
					    console.log(`  - Query length: ${sanitizedQuery.length}`);
 | 
				
			||||||
 | 
					    console.log(`  - Processing time: ${stats.processingTimeMs}ms`);
 | 
				
			||||||
 | 
					    console.log(`  - Micro-tasks completed: ${stats.microTasksCompleted}`);
 | 
				
			||||||
 | 
					    console.log(`  - Micro-tasks failed: ${stats.microTasksFailed}`);
 | 
				
			||||||
 | 
					    console.log(`  - Estimated AI calls: ${estimatedAICallsMade}`);
 | 
				
			||||||
 | 
					    console.log(`  - Embeddings used: ${stats.embeddingsUsed}`);
 | 
				
			||||||
 | 
					    console.log(`  - Final items: ${stats.finalSelectedItems}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let recommendation;
 | 
					    const currentLimit = rateLimitStore.get(userId);
 | 
				
			||||||
    try {
 | 
					    const remainingMicroTasks = currentLimit ? 
 | 
				
			||||||
      const cleanedContent = stripMarkdownJson(aiContent);
 | 
					      MICRO_TASK_TOTAL_LIMIT - currentLimit.microTaskCount : MICRO_TASK_TOTAL_LIMIT;
 | 
				
			||||||
      recommendation = JSON.parse(cleanedContent);
 | 
					 | 
				
			||||||
    } catch (error) {
 | 
					 | 
				
			||||||
      console.error('Failed to parse AI response:', aiContent);
 | 
					 | 
				
			||||||
      return apiServerError.unavailable('Invalid AI response format');
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    const validToolNames = new Set(toolsData.tools.map((t: any) => t.name));
 | 
					 | 
				
			||||||
    const validConceptNames = new Set(toolsData.concepts.map((c: any) => c.name));
 | 
					 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    let validatedRecommendation;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (mode === 'workflow') {
 | 
					 | 
				
			||||||
      validatedRecommendation = {
 | 
					 | 
				
			||||||
        ...recommendation,
 | 
					 | 
				
			||||||
        // Ensure all new fields are included with fallbacks
 | 
					 | 
				
			||||||
        scenario_analysis: recommendation.scenario_analysis || recommendation.problem_analysis || '',
 | 
					 | 
				
			||||||
        investigation_approach: recommendation.investigation_approach || '',
 | 
					 | 
				
			||||||
        critical_considerations: recommendation.critical_considerations || '',
 | 
					 | 
				
			||||||
        recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
 | 
					 | 
				
			||||||
          if (!validToolNames.has(tool.name)) {
 | 
					 | 
				
			||||||
            console.warn(`AI recommended unknown tool: ${tool.name}`);
 | 
					 | 
				
			||||||
            return false;
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
          return true;
 | 
					 | 
				
			||||||
        }) || [],
 | 
					 | 
				
			||||||
        background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
 | 
					 | 
				
			||||||
          if (!validConceptNames.has(concept.concept_name)) {
 | 
					 | 
				
			||||||
            console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
 | 
					 | 
				
			||||||
            return false;
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
          return true;
 | 
					 | 
				
			||||||
        }) || []
 | 
					 | 
				
			||||||
      };
 | 
					 | 
				
			||||||
    } else {
 | 
					 | 
				
			||||||
      validatedRecommendation = {
 | 
					 | 
				
			||||||
        ...recommendation,
 | 
					 | 
				
			||||||
        // Ensure all new fields are included with fallbacks
 | 
					 | 
				
			||||||
        problem_analysis: recommendation.problem_analysis || recommendation.scenario_analysis || '',
 | 
					 | 
				
			||||||
        investigation_approach: recommendation.investigation_approach || '',
 | 
					 | 
				
			||||||
        critical_considerations: recommendation.critical_considerations || '',
 | 
					 | 
				
			||||||
        recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
 | 
					 | 
				
			||||||
          if (!validToolNames.has(tool.name)) {
 | 
					 | 
				
			||||||
            console.warn(`AI recommended unknown tool: ${tool.name}`);
 | 
					 | 
				
			||||||
            return false;
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
          return true;
 | 
					 | 
				
			||||||
        }).map((tool: any, index: number) => ({
 | 
					 | 
				
			||||||
          ...tool,
 | 
					 | 
				
			||||||
          rank: tool.rank || (index + 1),
 | 
					 | 
				
			||||||
          suitability_score: tool.suitability_score || 'medium',
 | 
					 | 
				
			||||||
          pros: Array.isArray(tool.pros) ? tool.pros : [],
 | 
					 | 
				
			||||||
          cons: Array.isArray(tool.cons) ? tool.cons : []
 | 
					 | 
				
			||||||
        })) || [],
 | 
					 | 
				
			||||||
        background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
 | 
					 | 
				
			||||||
          if (!validConceptNames.has(concept.concept_name)) {
 | 
					 | 
				
			||||||
            console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
 | 
					 | 
				
			||||||
            return false;
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
          return true;
 | 
					 | 
				
			||||||
        }) || []
 | 
					 | 
				
			||||||
      };
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}, Tools: ${validatedRecommendation.recommended_tools.length}, Concepts: ${validatedRecommendation.background_knowledge?.length || 0}`);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return new Response(JSON.stringify({
 | 
					    return new Response(JSON.stringify({
 | 
				
			||||||
      success: true,
 | 
					      success: true,
 | 
				
			||||||
      mode,
 | 
					      mode,
 | 
				
			||||||
      taskId,
 | 
					      taskId,
 | 
				
			||||||
      recommendation: validatedRecommendation,
 | 
					      recommendation: result.recommendation,
 | 
				
			||||||
      query: sanitizedQuery
 | 
					      query: sanitizedQuery,
 | 
				
			||||||
 | 
					      processingStats: {
 | 
				
			||||||
 | 
					        ...result.processingStats,
 | 
				
			||||||
 | 
					        pipelineType: 'micro-task',
 | 
				
			||||||
 | 
					        microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
 | 
				
			||||||
 | 
					        averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
 | 
				
			||||||
 | 
					        estimatedAICallsMade
 | 
				
			||||||
 | 
					      },
 | 
				
			||||||
 | 
					      rateLimitInfo: {
 | 
				
			||||||
 | 
					        mainRequestsRemaining: MAIN_RATE_LIMIT_MAX - (currentLimit?.count || 0),
 | 
				
			||||||
 | 
					        microTaskCallsRemaining: remainingMicroTasks,
 | 
				
			||||||
 | 
					        resetTime: Date.now() + RATE_LIMIT_WINDOW
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
    }), {
 | 
					    }), {
 | 
				
			||||||
      status: 200,
 | 
					      status: 200,
 | 
				
			||||||
      headers: { 'Content-Type': 'application/json' }
 | 
					      headers: { 'Content-Type': 'application/json' }
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  } catch (error) {
 | 
					  } catch (error) {
 | 
				
			||||||
    console.error('AI query error:', error);
 | 
					    console.error('[MICRO-TASK API] Pipeline error:', error);
 | 
				
			||||||
    return apiServerError.internal('Internal server error');
 | 
					    
 | 
				
			||||||
 | 
					    if (error.message.includes('embeddings')) {
 | 
				
			||||||
 | 
					      return apiServerError.unavailable('Embeddings service error - using AI fallback');
 | 
				
			||||||
 | 
					    } else if (error.message.includes('micro-task')) {
 | 
				
			||||||
 | 
					      return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed');
 | 
				
			||||||
 | 
					    } else if (error.message.includes('selector')) {
 | 
				
			||||||
 | 
					      return apiServerError.unavailable('AI selector service error');
 | 
				
			||||||
 | 
					    } else if (error.message.includes('rate limit')) {
 | 
				
			||||||
 | 
					      return apiError.rateLimit('AI service rate limits exceeded during micro-task processing');
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      return apiServerError.internal('Micro-task AI pipeline error');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
@ -1263,6 +1263,12 @@ input[type="checkbox"] {
 | 
				
			|||||||
  gap: 0.5rem; 
 | 
					  gap: 0.5rem; 
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.filter-header-controls {
 | 
				
			||||||
 | 
					  display: flex;
 | 
				
			||||||
 | 
					  align-items: center;
 | 
				
			||||||
 | 
					  gap: 0.5rem;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Search Components */
 | 
					/* Search Components */
 | 
				
			||||||
.search-wrapper { 
 | 
					.search-wrapper { 
 | 
				
			||||||
  position: relative; 
 | 
					  position: relative; 
 | 
				
			||||||
@ -1315,6 +1321,64 @@ input[type="checkbox"] {
 | 
				
			|||||||
  color: var(--color-text);
 | 
					  color: var(--color-text);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.collapse-toggle {
 | 
				
			||||||
 | 
					  background: none;
 | 
				
			||||||
 | 
					  border: 1px solid var(--color-border);
 | 
				
			||||||
 | 
					  border-radius: 0.375rem;
 | 
				
			||||||
 | 
					  color: var(--color-text-secondary);
 | 
				
			||||||
 | 
					  cursor: pointer;
 | 
				
			||||||
 | 
					  padding: 0.375rem;
 | 
				
			||||||
 | 
					  transition: var(--transition-fast);
 | 
				
			||||||
 | 
					  display: inline-flex;
 | 
				
			||||||
 | 
					  align-items: center;
 | 
				
			||||||
 | 
					  justify-content: center;
 | 
				
			||||||
 | 
					  width: 32px;
 | 
				
			||||||
 | 
					  height: 32px;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.collapse-toggle:hover {
 | 
				
			||||||
 | 
					  background-color: var(--color-bg-secondary);
 | 
				
			||||||
 | 
					  border-color: var(--color-primary);
 | 
				
			||||||
 | 
					  color: var(--color-text);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.collapse-toggle svg {
 | 
				
			||||||
 | 
					  transition: transform var(--transition-medium);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* When expanded, rotate the chevron */
 | 
				
			||||||
 | 
					.collapse-toggle[data-collapsed="false"] svg {
 | 
				
			||||||
 | 
					  transform: rotate(180deg);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Collapsible Content */
 | 
				
			||||||
 | 
					.collapsible-content {
 | 
				
			||||||
 | 
					  overflow: hidden;
 | 
				
			||||||
 | 
					  transition: all var(--transition-medium);
 | 
				
			||||||
 | 
					  opacity: 1;
 | 
				
			||||||
 | 
					  max-height: 1000px;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.collapsible-content.hidden {
 | 
				
			||||||
 | 
					  opacity: 0;
 | 
				
			||||||
 | 
					  max-height: 0;
 | 
				
			||||||
 | 
					  padding-top: 0;
 | 
				
			||||||
 | 
					  padding-bottom: 0;
 | 
				
			||||||
 | 
					  margin-top: 0;
 | 
				
			||||||
 | 
					  margin-bottom: 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Smooth animation for expanding content */
 | 
				
			||||||
 | 
					.collapsible-content:not(.hidden) {
 | 
				
			||||||
 | 
					  animation: expandContent 0.3s ease-out;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Content spacing when expanded */
 | 
				
			||||||
 | 
					.collapsible-content:not(.hidden) .advanced-filters-compact,
 | 
				
			||||||
 | 
					.collapsible-content:not(.hidden) .tag-section {
 | 
				
			||||||
 | 
					  padding-top: 0.75rem;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Filter Grids & Groups */
 | 
					/* Filter Grids & Groups */
 | 
				
			||||||
.filter-grid-compact { 
 | 
					.filter-grid-compact { 
 | 
				
			||||||
  display: grid; 
 | 
					  display: grid; 
 | 
				
			||||||
@ -1429,11 +1493,9 @@ input[type="checkbox"] {
 | 
				
			|||||||
  user-select: none;
 | 
					  user-select: none;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Tag System */
 | 
					.tag-section .tag-controls {
 | 
				
			||||||
.tag-section {
 | 
					  order: -1; 
 | 
				
			||||||
  display: flex;
 | 
					  margin-bottom: 0.75rem;
 | 
				
			||||||
  flex-direction: column;
 | 
					 | 
				
			||||||
  gap: 1rem;
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.selected-tags { 
 | 
					.selected-tags { 
 | 
				
			||||||
@ -1574,6 +1636,14 @@ input[type="checkbox"] {
 | 
				
			|||||||
  transition: var(--transition-fast); 
 | 
					  transition: var(--transition-fast); 
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.filter-reset {
 | 
				
			||||||
 | 
					  width: 32px;
 | 
				
			||||||
 | 
					  height: 32px;
 | 
				
			||||||
 | 
					  display: inline-flex;
 | 
				
			||||||
 | 
					  align-items: center;
 | 
				
			||||||
 | 
					  justify-content: center;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
.filter-reset:hover { 
 | 
					.filter-reset:hover { 
 | 
				
			||||||
  background-color: var(--color-bg-secondary); 
 | 
					  background-color: var(--color-bg-secondary); 
 | 
				
			||||||
  border-color: var(--color-warning); 
 | 
					  border-color: var(--color-warning); 
 | 
				
			||||||
@ -1591,13 +1661,6 @@ input[type="checkbox"] {
 | 
				
			|||||||
  opacity: 0.9;
 | 
					  opacity: 0.9;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* Tag Controls */
 | 
					 | 
				
			||||||
.tag-controls { 
 | 
					 | 
				
			||||||
  display: flex; 
 | 
					 | 
				
			||||||
  align-items: center; 
 | 
					 | 
				
			||||||
  gap: 0.75rem; 
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.tag-toggle { 
 | 
					.tag-toggle { 
 | 
				
			||||||
  padding: 0.375rem 0.75rem; 
 | 
					  padding: 0.375rem 0.75rem; 
 | 
				
			||||||
  border: 1px solid var(--color-border); 
 | 
					  border: 1px solid var(--color-border); 
 | 
				
			||||||
@ -1818,6 +1881,130 @@ input[type="checkbox"] {
 | 
				
			|||||||
  border-left-color: var(--color-warning);
 | 
					  border-left-color: var(--color-warning);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Add to src/styles/global.css - Micro-Task Progress Styles */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Micro-task progress indicator */
 | 
				
			||||||
 | 
					.micro-task-progress {
 | 
				
			||||||
 | 
					  background-color: var(--color-bg-secondary);
 | 
				
			||||||
 | 
					  border: 1px solid var(--color-border);
 | 
				
			||||||
 | 
					  border-radius: 0.5rem;
 | 
				
			||||||
 | 
					  padding: 1rem;
 | 
				
			||||||
 | 
					  margin: 1rem 0;
 | 
				
			||||||
 | 
					  transition: var(--transition-fast);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.micro-task-header {
 | 
				
			||||||
 | 
					  display: flex;
 | 
				
			||||||
 | 
					  justify-content: space-between;
 | 
				
			||||||
 | 
					  align-items: center;
 | 
				
			||||||
 | 
					  margin-bottom: 0.75rem;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.micro-task-label {
 | 
				
			||||||
 | 
					  font-weight: 600;
 | 
				
			||||||
 | 
					  color: var(--color-primary);
 | 
				
			||||||
 | 
					  font-size: 0.875rem;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.micro-task-counter {
 | 
				
			||||||
 | 
					  background-color: var(--color-primary);
 | 
				
			||||||
 | 
					  color: white;
 | 
				
			||||||
 | 
					  padding: 0.25rem 0.5rem;
 | 
				
			||||||
 | 
					  border-radius: 1rem;
 | 
				
			||||||
 | 
					  font-size: 0.75rem;
 | 
				
			||||||
 | 
					  font-weight: 600;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.micro-task-steps {
 | 
				
			||||||
 | 
					  display: grid;
 | 
				
			||||||
 | 
					  grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
 | 
				
			||||||
 | 
					  gap: 0.5rem;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.micro-step {
 | 
				
			||||||
 | 
					  background-color: var(--color-bg);
 | 
				
			||||||
 | 
					  border: 1px solid var(--color-border);
 | 
				
			||||||
 | 
					  border-radius: 0.375rem;
 | 
				
			||||||
 | 
					  padding: 0.5rem;
 | 
				
			||||||
 | 
					  font-size: 0.75rem;
 | 
				
			||||||
 | 
					  text-align: center;
 | 
				
			||||||
 | 
					  transition: var(--transition-fast);
 | 
				
			||||||
 | 
					  opacity: 0.6;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.micro-step.active {
 | 
				
			||||||
 | 
					  background-color: var(--color-primary);
 | 
				
			||||||
 | 
					  color: white;
 | 
				
			||||||
 | 
					  border-color: var(--color-primary);
 | 
				
			||||||
 | 
					  opacity: 1;
 | 
				
			||||||
 | 
					  transform: scale(1.05);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.micro-step.completed {
 | 
				
			||||||
 | 
					  background-color: var(--color-accent);
 | 
				
			||||||
 | 
					  color: white;
 | 
				
			||||||
 | 
					  border-color: var(--color-accent);
 | 
				
			||||||
 | 
					  opacity: 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.micro-step.failed {
 | 
				
			||||||
 | 
					  background-color: var(--color-error);
 | 
				
			||||||
 | 
					  color: white;
 | 
				
			||||||
 | 
					  border-color: var(--color-error);
 | 
				
			||||||
 | 
					  opacity: 1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Enhanced queue status for micro-tasks */
 | 
				
			||||||
 | 
					.queue-status-card.micro-task-mode {
 | 
				
			||||||
 | 
					  border-left: 4px solid var(--color-primary);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.queue-status-card.micro-task-mode .queue-header {
 | 
				
			||||||
 | 
					  background: linear-gradient(135deg, var(--color-primary) 0%, var(--color-accent) 100%);
 | 
				
			||||||
 | 
					  color: white;
 | 
				
			||||||
 | 
					  margin: -1rem -1rem 1rem -1rem;
 | 
				
			||||||
 | 
					  padding: 1rem;
 | 
				
			||||||
 | 
					  border-radius: 0.5rem 0.5rem 0 0;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Mobile responsive adjustments */
 | 
				
			||||||
 | 
					@media (max-width: 768px) {
 | 
				
			||||||
 | 
					  .micro-task-steps {
 | 
				
			||||||
 | 
					    grid-template-columns: repeat(2, 1fr);
 | 
				
			||||||
 | 
					    gap: 0.375rem;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  .micro-step {
 | 
				
			||||||
 | 
					    font-size: 0.6875rem;
 | 
				
			||||||
 | 
					    padding: 0.375rem;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  .micro-task-header {
 | 
				
			||||||
 | 
					    flex-direction: column;
 | 
				
			||||||
 | 
					    gap: 0.5rem;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Animation for micro-task progress */
 | 
				
			||||||
 | 
					@keyframes micro-task-pulse {
 | 
				
			||||||
 | 
					  0%, 100% { opacity: 1; }
 | 
				
			||||||
 | 
					  50% { opacity: 0.7; }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.micro-step.active {
 | 
				
			||||||
 | 
					  animation: micro-task-pulse 2s ease-in-out infinite;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@keyframes micro-task-complete {
 | 
				
			||||||
 | 
					  0% { transform: scale(1); }
 | 
				
			||||||
 | 
					  50% { transform: scale(1.1); }
 | 
				
			||||||
 | 
					  100% { transform: scale(1); }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.micro-step.completed {
 | 
				
			||||||
 | 
					  animation: micro-task-complete 0.6s ease-out;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* ===================================================================
 | 
					/* ===================================================================
 | 
				
			||||||
   17. WORKFLOW SYSTEM (CONSOLIDATED)
 | 
					   17. WORKFLOW SYSTEM (CONSOLIDATED)
 | 
				
			||||||
   ================================================================= */
 | 
					   ================================================================= */
 | 
				
			||||||
@ -2267,6 +2454,17 @@ footer {
 | 
				
			|||||||
  to { opacity: 1; }
 | 
					  to { opacity: 1; }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@keyframes expandContent {
 | 
				
			||||||
 | 
					  from {
 | 
				
			||||||
 | 
					    opacity: 0;
 | 
				
			||||||
 | 
					    transform: translateY(-10px);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  to {
 | 
				
			||||||
 | 
					    opacity: 1;
 | 
				
			||||||
 | 
					    transform: translateY(0);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@keyframes fadeInUp {
 | 
					@keyframes fadeInUp {
 | 
				
			||||||
  from {
 | 
					  from {
 | 
				
			||||||
    opacity: 0;
 | 
					    opacity: 0;
 | 
				
			||||||
@ -3261,6 +3459,23 @@ footer {
 | 
				
			|||||||
  .view-toggle {
 | 
					  .view-toggle {
 | 
				
			||||||
    justify-content: center;
 | 
					    justify-content: center;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  .filter-header-controls {
 | 
				
			||||||
 | 
					    gap: 0.375rem;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  .collapse-toggle,
 | 
				
			||||||
 | 
					  .filter-reset {
 | 
				
			||||||
 | 
					    width: 28px;
 | 
				
			||||||
 | 
					    height: 28px;
 | 
				
			||||||
 | 
					    padding: 0.25rem;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  .collapse-toggle svg,
 | 
				
			||||||
 | 
					  .filter-reset svg {
 | 
				
			||||||
 | 
					    width: 14px;
 | 
				
			||||||
 | 
					    height: 14px;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@media (width <= 640px) {
 | 
					@media (width <= 640px) {
 | 
				
			||||||
@ -3395,6 +3610,21 @@ footer {
 | 
				
			|||||||
  .filter-card-compact {
 | 
					  .filter-card-compact {
 | 
				
			||||||
    padding: 0.5rem;
 | 
					    padding: 0.5rem;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  .filter-header-compact {
 | 
				
			||||||
 | 
					    flex-wrap: wrap;
 | 
				
			||||||
 | 
					    gap: 0.5rem;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  .filter-header-compact h3 {
 | 
				
			||||||
 | 
					    flex: 1 1 100%;
 | 
				
			||||||
 | 
					    margin-bottom: 0.25rem;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  .filter-header-controls {
 | 
				
			||||||
 | 
					    flex: 1 1 100%;
 | 
				
			||||||
 | 
					    justify-content: flex-end;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										882
									
								
								src/utils/aiPipeline.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										882
									
								
								src/utils/aiPipeline.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,882 @@
 | 
				
			|||||||
 | 
					// src/utils/aiPipeline.ts - FIXED: Critical error corrections
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import { getCompressedToolsDataForAI } from './dataService.js';
 | 
				
			||||||
 | 
					import { embeddingsService, type EmbeddingData } from './embeddings.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface AIConfig {
 | 
				
			||||||
 | 
					  endpoint: string;
 | 
				
			||||||
 | 
					  apiKey: string;
 | 
				
			||||||
 | 
					  model: string;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface MicroTaskResult {
 | 
				
			||||||
 | 
					  taskType: string;
 | 
				
			||||||
 | 
					  content: string;
 | 
				
			||||||
 | 
					  processingTimeMs: number;
 | 
				
			||||||
 | 
					  success: boolean;
 | 
				
			||||||
 | 
					  error?: string;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface AnalysisResult {
 | 
				
			||||||
 | 
					  recommendation: any;
 | 
				
			||||||
 | 
					  processingStats: {
 | 
				
			||||||
 | 
					    embeddingsUsed: boolean;
 | 
				
			||||||
 | 
					    candidatesFromEmbeddings: number;
 | 
				
			||||||
 | 
					    finalSelectedItems: number;
 | 
				
			||||||
 | 
					    processingTimeMs: number;
 | 
				
			||||||
 | 
					    microTasksCompleted: number;
 | 
				
			||||||
 | 
					    microTasksFailed: number;
 | 
				
			||||||
 | 
					    contextContinuityUsed: boolean;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface AnalysisContext {
 | 
				
			||||||
 | 
					  userQuery: string;
 | 
				
			||||||
 | 
					  mode: string;
 | 
				
			||||||
 | 
					  filteredData: any;
 | 
				
			||||||
 | 
					  contextHistory: string[];
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // FIXED: Add max context length tracking
 | 
				
			||||||
 | 
					  maxContextLength: number;
 | 
				
			||||||
 | 
					  currentContextLength: number;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  scenarioAnalysis?: string;
 | 
				
			||||||
 | 
					  problemAnalysis?: string;
 | 
				
			||||||
 | 
					  investigationApproach?: string;
 | 
				
			||||||
 | 
					  criticalConsiderations?: string;
 | 
				
			||||||
 | 
					  selectedTools?: Array<{tool: any, phase: string, priority: string, justification?: string}>;
 | 
				
			||||||
 | 
					  backgroundKnowledge?: Array<{concept: any, relevance: string}>;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // FIXED: Add seen tools tracking to prevent duplicates
 | 
				
			||||||
 | 
					  seenToolNames: Set<string>;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ImprovedMicroTaskAIPipeline {
 | 
				
			||||||
 | 
					  private config: AIConfig;
 | 
				
			||||||
 | 
					  private maxSelectedItems: number;
 | 
				
			||||||
 | 
					  private embeddingCandidates: number;
 | 
				
			||||||
 | 
					  private similarityThreshold: number;
 | 
				
			||||||
 | 
					  private microTaskDelay: number;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // FIXED: Add proper token management
 | 
				
			||||||
 | 
					  private maxContextTokens: number;
 | 
				
			||||||
 | 
					  private maxPromptTokens: number;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  constructor() {
 | 
				
			||||||
 | 
					    this.config = {
 | 
				
			||||||
 | 
					      endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
 | 
				
			||||||
 | 
					      apiKey: this.getEnv('AI_ANALYZER_API_KEY'), 
 | 
				
			||||||
 | 
					      model: this.getEnv('AI_ANALYZER_MODEL')
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
 | 
				
			||||||
 | 
					    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '60', 10); 
 | 
				
			||||||
 | 
					    this.similarityThreshold = 0.3; 
 | 
				
			||||||
 | 
					    this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // FIXED: Token management
 | 
				
			||||||
 | 
					    this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
 | 
				
			||||||
 | 
					    this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private getEnv(key: string): string {
 | 
				
			||||||
 | 
					    const value = process.env[key];
 | 
				
			||||||
 | 
					    if (!value) {
 | 
				
			||||||
 | 
					      throw new Error(`Missing environment variable: ${key}`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return value;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // FIXED: Estimate token count (rough approximation)
 | 
				
			||||||
 | 
					  private estimateTokens(text: string): number {
 | 
				
			||||||
 | 
					    return Math.ceil(text.length / 4); // Rough estimate: 4 chars per token
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // FIXED: Manage context history with token limits
 | 
				
			||||||
 | 
					  private addToContextHistory(context: AnalysisContext, newEntry: string): void {
 | 
				
			||||||
 | 
					    const entryTokens = this.estimateTokens(newEntry);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Add new entry
 | 
				
			||||||
 | 
					    context.contextHistory.push(newEntry);
 | 
				
			||||||
 | 
					    context.currentContextLength += entryTokens;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Prune old entries if exceeding limits
 | 
				
			||||||
 | 
					    while (context.currentContextLength > this.maxContextTokens && context.contextHistory.length > 1) {
 | 
				
			||||||
 | 
					      const removed = context.contextHistory.shift()!;
 | 
				
			||||||
 | 
					      context.currentContextLength -= this.estimateTokens(removed);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // FIXED: Safe JSON parsing with validation
 | 
				
			||||||
 | 
					  private safeParseJSON(jsonString: string, fallback: any = null): any {
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      const cleaned = jsonString
 | 
				
			||||||
 | 
					        .replace(/^```json\s*/i, '')
 | 
				
			||||||
 | 
					        .replace(/\s*```\s*$/g, '')
 | 
				
			||||||
 | 
					        .trim();
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      const parsed = JSON.parse(cleaned);
 | 
				
			||||||
 | 
					      return parsed;
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.warn('[AI PIPELINE] JSON parsing failed:', error.message);
 | 
				
			||||||
 | 
					      console.warn('[AI PIPELINE] Raw content:', jsonString.slice(0, 200));
 | 
				
			||||||
 | 
					      return fallback;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // FIXED: Add tool deduplication
 | 
				
			||||||
 | 
					  private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
 | 
				
			||||||
 | 
					    if (context.seenToolNames.has(tool.name)) {
 | 
				
			||||||
 | 
					      console.log(`[AI PIPELINE] Skipping duplicate tool: ${tool.name}`);
 | 
				
			||||||
 | 
					      return false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    context.seenToolNames.add(tool.name);
 | 
				
			||||||
 | 
					    if (!context.selectedTools) context.selectedTools = [];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    context.selectedTools.push({
 | 
				
			||||||
 | 
					      tool,
 | 
				
			||||||
 | 
					      phase,
 | 
				
			||||||
 | 
					      priority,
 | 
				
			||||||
 | 
					      justification
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return true;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
 | 
				
			||||||
 | 
					    let candidateTools: any[] = [];
 | 
				
			||||||
 | 
					    let candidateConcepts: any[] = [];
 | 
				
			||||||
 | 
					    let selectionMethod = 'unknown';
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (embeddingsService.isEnabled()) {
 | 
				
			||||||
 | 
					      const similarItems = await embeddingsService.findSimilar(
 | 
				
			||||||
 | 
					        userQuery, 
 | 
				
			||||||
 | 
					        this.embeddingCandidates, 
 | 
				
			||||||
 | 
					        this.similarityThreshold
 | 
				
			||||||
 | 
					      );
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      const toolNames = new Set<string>();
 | 
				
			||||||
 | 
					      const conceptNames = new Set<string>();
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      similarItems.forEach(item => {
 | 
				
			||||||
 | 
					        if (item.type === 'tool') toolNames.add(item.name);
 | 
				
			||||||
 | 
					        if (item.type === 'concept') conceptNames.add(item.name);
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      console.log(`[IMPROVED PIPELINE] Embeddings found: ${toolNames.size} tools, ${conceptNames.size} concepts`);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // FIXED: Use your expected flow - get full data of embeddings results
 | 
				
			||||||
 | 
					      if (toolNames.size >= 15) { // Reasonable threshold for quality
 | 
				
			||||||
 | 
					        candidateTools = toolsData.tools.filter((tool: any) => toolNames.has(tool.name));
 | 
				
			||||||
 | 
					        candidateConcepts = toolsData.concepts.filter((concept: any) => conceptNames.has(concept.name));
 | 
				
			||||||
 | 
					        selectionMethod = 'embeddings_candidates';
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        console.log(`[IMPROVED PIPELINE] Using embeddings candidates: ${candidateTools.length} tools`);
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${toolNames.size} < 15), using full dataset`);
 | 
				
			||||||
 | 
					        candidateTools = toolsData.tools;
 | 
				
			||||||
 | 
					        candidateConcepts = toolsData.concepts;
 | 
				
			||||||
 | 
					        selectionMethod = 'full_dataset';
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
 | 
				
			||||||
 | 
					      candidateTools = toolsData.tools;
 | 
				
			||||||
 | 
					      candidateConcepts = toolsData.concepts;
 | 
				
			||||||
 | 
					      selectionMethod = 'full_dataset';
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // FIXED: NOW AI ANALYZES FULL DATA of the candidates
 | 
				
			||||||
 | 
					    console.log(`[IMPROVED PIPELINE] AI will analyze FULL DATA of ${candidateTools.length} candidate tools`);
 | 
				
			||||||
 | 
					    const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return {
 | 
				
			||||||
 | 
					      tools: finalSelection.selectedTools,
 | 
				
			||||||
 | 
					      concepts: finalSelection.selectedConcepts,
 | 
				
			||||||
 | 
					      domains: toolsData.domains,
 | 
				
			||||||
 | 
					      phases: toolsData.phases,
 | 
				
			||||||
 | 
					      'domain-agnostic-software': toolsData['domain-agnostic-software']
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// src/utils/aiPipeline.ts - FIXED: De-biased AI selection prompt
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async aiSelectionWithFullData(
 | 
				
			||||||
 | 
					    userQuery: string, 
 | 
				
			||||||
 | 
					    candidateTools: any[], 
 | 
				
			||||||
 | 
					    candidateConcepts: any[], 
 | 
				
			||||||
 | 
					    mode: string,
 | 
				
			||||||
 | 
					    selectionMethod: string
 | 
				
			||||||
 | 
					  ) {
 | 
				
			||||||
 | 
					    const modeInstruction = mode === 'workflow' 
 | 
				
			||||||
 | 
					      ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases. Select 15-25 tools that cover the full investigation lifecycle.'
 | 
				
			||||||
 | 
					      : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem. Select 3-8 tools that are most relevant and effective.';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // FIXED: Give AI the COMPLETE tool data, not truncated
 | 
				
			||||||
 | 
					    const toolsWithFullData = candidateTools.map((tool: any) => ({
 | 
				
			||||||
 | 
					      name: tool.name,
 | 
				
			||||||
 | 
					      type: tool.type,
 | 
				
			||||||
 | 
					      description: tool.description,
 | 
				
			||||||
 | 
					      domains: tool.domains,
 | 
				
			||||||
 | 
					      phases: tool.phases,
 | 
				
			||||||
 | 
					      platforms: tool.platforms || [],
 | 
				
			||||||
 | 
					      tags: tool.tags || [],
 | 
				
			||||||
 | 
					      skillLevel: tool.skillLevel,
 | 
				
			||||||
 | 
					      license: tool.license,
 | 
				
			||||||
 | 
					      accessType: tool.accessType,
 | 
				
			||||||
 | 
					      projectUrl: tool.projectUrl,
 | 
				
			||||||
 | 
					      knowledgebase: tool.knowledgebase,
 | 
				
			||||||
 | 
					      related_concepts: tool.related_concepts || [],
 | 
				
			||||||
 | 
					      related_software: tool.related_software || []
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const conceptsWithFullData = candidateConcepts.map((concept: any) => ({
 | 
				
			||||||
 | 
					      name: concept.name,
 | 
				
			||||||
 | 
					      type: 'concept',
 | 
				
			||||||
 | 
					      description: concept.description,
 | 
				
			||||||
 | 
					      domains: concept.domains,
 | 
				
			||||||
 | 
					      phases: concept.phases,
 | 
				
			||||||
 | 
					      tags: concept.tags || [],
 | 
				
			||||||
 | 
					      skillLevel: concept.skillLevel,
 | 
				
			||||||
 | 
					      related_concepts: concept.related_concepts || [],
 | 
				
			||||||
 | 
					      related_software: concept.related_software || []
 | 
				
			||||||
 | 
					    }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const prompt = `You are a DFIR expert with access to the complete forensics tool database. You need to select the most relevant tools and concepts for this specific query.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECTION METHOD: ${selectionMethod}
 | 
				
			||||||
 | 
					${selectionMethod === 'embeddings_candidates' ? 
 | 
				
			||||||
 | 
					  'These tools were pre-filtered by vector similarity, so they are already relevant. Your job is to select the BEST ones from this relevant set.' :
 | 
				
			||||||
 | 
					  'You have access to the full tool database. Select the most relevant tools for the query.'}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					${modeInstruction}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					USER QUERY: "${userQuery}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CRITICAL SELECTION PRINCIPLES:
 | 
				
			||||||
 | 
					1. **CONTEXT OVER POPULARITY**: Don't default to "famous" tools like Volatility, Wireshark, or Autopsy just because they're well-known. Choose based on SPECIFIC scenario needs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					2. **METHODOLOGY vs SOFTWARE**: 
 | 
				
			||||||
 | 
					   - For RAPID/URGENT scenarios → Prioritize METHODS and rapid response approaches
 | 
				
			||||||
 | 
					   - For TIME-CRITICAL incidents → Choose triage methods over deep analysis tools
 | 
				
			||||||
 | 
					   - For COMPREHENSIVE analysis → Then consider detailed software tools
 | 
				
			||||||
 | 
					   - METHODS (type: "method") are often better than SOFTWARE for procedural guidance
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					3. **SCENARIO-SPECIFIC LOGIC**:
 | 
				
			||||||
 | 
					   - "Rapid/Quick/Urgent/Triage" scenarios → Rapid Incident Response and Triage METHOD > Volatility
 | 
				
			||||||
 | 
					   - "Industrial/SCADA/ICS" scenarios → Specialized ICS tools > generic network tools
 | 
				
			||||||
 | 
					   - "Mobile/Android/iOS" scenarios → Mobile-specific tools > desktop forensics tools
 | 
				
			||||||
 | 
					   - "Memory analysis needed urgently" → Quick memory tools/methods > comprehensive Volatility analysis
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					4. **AVOID TOOL BIAS**:
 | 
				
			||||||
 | 
					   - Volatility is NOT always the answer for memory analysis
 | 
				
			||||||
 | 
					   - Wireshark is NOT always the answer for network analysis  
 | 
				
			||||||
 | 
					   - Autopsy is NOT always the answer for disk analysis
 | 
				
			||||||
 | 
					   - Consider lighter, faster, more appropriate alternatives
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					AVAILABLE TOOLS (with complete data):
 | 
				
			||||||
 | 
					${JSON.stringify(toolsWithFullData.slice(0, 30), null, 2)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					AVAILABLE CONCEPTS (with complete data):
 | 
				
			||||||
 | 
					${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					ANALYSIS INSTRUCTIONS:
 | 
				
			||||||
 | 
					1. Read the FULL description of each tool/concept
 | 
				
			||||||
 | 
					2. Consider ALL tags, platforms, related tools, and metadata
 | 
				
			||||||
 | 
					3. **MATCH URGENCY LEVEL**: Rapid scenarios need rapid methods, not deep analysis tools
 | 
				
			||||||
 | 
					4. **MATCH SPECIFICITY**: Specialized scenarios need specialized tools, not generic ones
 | 
				
			||||||
 | 
					5. **CONSIDER TYPE**: Methods provide procedural guidance, software provides technical capability
 | 
				
			||||||
 | 
					6. For SCADA/ICS queries: prioritize specialized ICS tools over generic network tools
 | 
				
			||||||
 | 
					7. For mobile queries: prioritize mobile-specific tools over desktop tools
 | 
				
			||||||
 | 
					8. For rapid/urgent queries: prioritize methodology and triage approaches
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					BIAS PREVENTION:
 | 
				
			||||||
 | 
					- If query mentions "rapid", "quick", "urgent", "triage" → Strongly favor METHODS over deep analysis SOFTWARE
 | 
				
			||||||
 | 
					- If query mentions specific technologies (SCADA, Android, etc.) → Strongly favor specialized tools
 | 
				
			||||||
 | 
					- Don't recommend Volatility unless deep memory analysis is specifically needed AND time allows
 | 
				
			||||||
 | 
					- Don't recommend generic tools when specialized ones are available
 | 
				
			||||||
 | 
					- Consider the SKILL LEVEL and TIME CONSTRAINTS implied by the query
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Select the most relevant items (max ${this.maxSelectedItems} total).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Respond with ONLY this JSON format:
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
 | 
				
			||||||
 | 
					  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
 | 
				
			||||||
 | 
					  "reasoning": "Detailed explanation of why these specific tools were selected for this query, addressing why certain popular tools were NOT selected if they were inappropriate for the scenario context"
 | 
				
			||||||
 | 
					}`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      const response = await this.callAI(prompt, 2500); // More tokens for bias prevention logic
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      const result = this.safeParseJSON(response, null);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
 | 
				
			||||||
 | 
					        console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
 | 
				
			||||||
 | 
					        throw new Error('AI selection failed to return valid tool selection');
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
 | 
				
			||||||
 | 
					      if (totalSelected === 0) {
 | 
				
			||||||
 | 
					        console.error('[IMPROVED PIPELINE] AI selection returned no tools');
 | 
				
			||||||
 | 
					        throw new Error('AI selection returned empty selection');
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      console.log(`[IMPROVED PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts`);
 | 
				
			||||||
 | 
					      console.log(`[IMPROVED PIPELINE] AI reasoning: ${result.reasoning}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Return the actual tool/concept objects
 | 
				
			||||||
 | 
					      const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
 | 
				
			||||||
 | 
					      const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        selectedTools,
 | 
				
			||||||
 | 
					        selectedConcepts
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.error('[IMPROVED PIPELINE] AI selection failed:', error);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Emergency fallback with bias awareness
 | 
				
			||||||
 | 
					      console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
 | 
				
			||||||
 | 
					      return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
 | 
				
			||||||
 | 
					    const queryLower = userQuery.toLowerCase();
 | 
				
			||||||
 | 
					    const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // Score tools based on keyword matches in full data
 | 
				
			||||||
 | 
					    const scoredTools = candidateTools.map(tool => {
 | 
				
			||||||
 | 
					      const toolText = (
 | 
				
			||||||
 | 
					        tool.name + ' ' + 
 | 
				
			||||||
 | 
					        tool.description + ' ' + 
 | 
				
			||||||
 | 
					        (tool.tags || []).join(' ') + ' ' +
 | 
				
			||||||
 | 
					        (tool.platforms || []).join(' ') + ' ' +
 | 
				
			||||||
 | 
					        (tool.domains || []).join(' ')
 | 
				
			||||||
 | 
					      ).toLowerCase();
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      const score = keywords.reduce((acc, keyword) => {
 | 
				
			||||||
 | 
					        return acc + (toolText.includes(keyword) ? 1 : 0);
 | 
				
			||||||
 | 
					      }, 0);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      return { tool, score };
 | 
				
			||||||
 | 
					    }).filter(item => item.score > 0)
 | 
				
			||||||
 | 
					      .sort((a, b) => b.score - a.score);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const maxTools = mode === 'workflow' ? 20 : 8;
 | 
				
			||||||
 | 
					    const selectedTools = scoredTools.slice(0, maxTools).map(item => item.tool);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return {
 | 
				
			||||||
 | 
					      selectedTools,
 | 
				
			||||||
 | 
					      selectedConcepts: candidateConcepts.slice(0, 3)
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async delay(ms: number): Promise<void> {
 | 
				
			||||||
 | 
					    return new Promise(resolve => setTimeout(resolve, ms));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 300): Promise<MicroTaskResult> {
 | 
				
			||||||
 | 
					    const startTime = Date.now();
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    // FIXED: Build context prompt with token management
 | 
				
			||||||
 | 
					    let contextPrompt = prompt;
 | 
				
			||||||
 | 
					    if (context.contextHistory.length > 0) {
 | 
				
			||||||
 | 
					      const contextSection = `BISHERIGE ANALYSE:\n${context.contextHistory.join('\n\n')}\n\nAKTUELLE AUFGABE:\n`;
 | 
				
			||||||
 | 
					      const combinedPrompt = contextSection + prompt;
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Check if combined prompt exceeds limits
 | 
				
			||||||
 | 
					      if (this.estimateTokens(combinedPrompt) <= this.maxPromptTokens) {
 | 
				
			||||||
 | 
					        contextPrompt = combinedPrompt;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        console.warn('[AI PIPELINE] Context too long, using prompt only');
 | 
				
			||||||
 | 
					        // Could implement smarter context truncation here
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      const response = await this.callAI(contextPrompt, maxTokens);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        taskType: 'micro-task',
 | 
				
			||||||
 | 
					        content: response.trim(),
 | 
				
			||||||
 | 
					        processingTimeMs: Date.now() - startTime,
 | 
				
			||||||
 | 
					        success: true
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        taskType: 'micro-task',
 | 
				
			||||||
 | 
					        content: '',
 | 
				
			||||||
 | 
					        processingTimeMs: Date.now() - startTime,
 | 
				
			||||||
 | 
					        success: false,
 | 
				
			||||||
 | 
					        error: error.message
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
				
			||||||
 | 
					    const isWorkflow = context.mode === 'workflow';
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const prompt = `Sie sind ein erfahrener DFIR-Experte. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					${isWorkflow ? 'FORENSISCHES SZENARIO' : 'TECHNISCHES PROBLEM'}: "${context.userQuery}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Führen Sie eine systematische ${isWorkflow ? 'Szenario-Analyse' : 'Problem-Analyse'} durch und berücksichtigen Sie dabei:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					${isWorkflow ? 
 | 
				
			||||||
 | 
					  `- Angriffsvektoren und Bedrohungsmodellierung nach MITRE ATT&CK
 | 
				
			||||||
 | 
					- Betroffene Systeme und kritische Infrastrukturen
 | 
				
			||||||
 | 
					- Zeitkritische Faktoren und Beweiserhaltung
 | 
				
			||||||
 | 
					- Forensische Artefakte und Datenquellen` :
 | 
				
			||||||
 | 
					  `- Spezifische forensische Herausforderungen
 | 
				
			||||||
 | 
					- Verfügbare Datenquellen und deren Integrität
 | 
				
			||||||
 | 
					- Methodische Anforderungen für rechtssichere Analyse`
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Maximum 150 Wörter.`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const result = await this.callMicroTaskAI(prompt, context, 220);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (result.success) {
 | 
				
			||||||
 | 
					      if (isWorkflow) {
 | 
				
			||||||
 | 
					        context.scenarioAnalysis = result.content;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        context.problemAnalysis = result.content;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // FIXED: Use new context management
 | 
				
			||||||
 | 
					      this.addToContextHistory(context, `${isWorkflow ? 'Szenario' : 'Problem'}-Analyse: ${result.content.slice(0, 200)}...`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async generateApproach(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
				
			||||||
 | 
					    const isWorkflow = context.mode === 'workflow';
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} nach NIST SP 800-86 Methodik.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Entwickeln Sie einen systematischen ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} unter Berücksichtigung von:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					${isWorkflow ?
 | 
				
			||||||
 | 
					  `- Triage-Prioritäten nach forensischer Dringlichkeit
 | 
				
			||||||
 | 
					- Phasenabfolge nach NIST-Methodik
 | 
				
			||||||
 | 
					- Kontaminationsvermeidung und forensische Isolierung` :
 | 
				
			||||||
 | 
					  `- Methodik-Auswahl nach wissenschaftlichen Kriterien
 | 
				
			||||||
 | 
					- Validierung und Verifizierung der gewählten Ansätze
 | 
				
			||||||
 | 
					- Integration in bestehende forensische Workflows`
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 150 Wörter.`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const result = await this.callMicroTaskAI(prompt, context, 220);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (result.success) {
 | 
				
			||||||
 | 
					      context.investigationApproach = result.content;
 | 
				
			||||||
 | 
					      this.addToContextHistory(context, `${isWorkflow ? 'Untersuchungs' : 'Lösungs'}ansatz: ${result.content.slice(0, 200)}...`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async generateCriticalConsiderations(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
				
			||||||
 | 
					    const isWorkflow = context.mode === 'workflow';
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Berücksichtigen Sie folgende forensische Aspekte:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					${isWorkflow ?
 | 
				
			||||||
 | 
					  `- Time-sensitive evidence preservation
 | 
				
			||||||
 | 
					- Chain of custody requirements und rechtliche Verwertbarkeit
 | 
				
			||||||
 | 
					- Incident containment vs. evidence preservation Dilemma
 | 
				
			||||||
 | 
					- Privacy- und Compliance-Anforderungen` :
 | 
				
			||||||
 | 
					  `- Tool-Validierung und Nachvollziehbarkeit
 | 
				
			||||||
 | 
					- False positive/negative Risiken bei der gewählten Methodik
 | 
				
			||||||
 | 
					- Qualifikationsanforderungen für die Durchführung
 | 
				
			||||||
 | 
					- Dokumentations- und Reporting-Standards`
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const result = await this.callMicroTaskAI(prompt, context, 180);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (result.success) {
 | 
				
			||||||
 | 
					      context.criticalConsiderations = result.content;
 | 
				
			||||||
 | 
					      this.addToContextHistory(context, `Kritische Überlegungen: ${result.content.slice(0, 200)}...`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async selectToolsForPhase(context: AnalysisContext, phase: any): Promise<MicroTaskResult> {
 | 
				
			||||||
 | 
					    const phaseTools = context.filteredData.tools.filter((tool: any) => 
 | 
				
			||||||
 | 
					      tool.phases && tool.phases.includes(phase.id)
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (phaseTools.length === 0) {
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        taskType: 'tool-selection',
 | 
				
			||||||
 | 
					        content: JSON.stringify([]),
 | 
				
			||||||
 | 
					        processingTimeMs: 0,
 | 
				
			||||||
 | 
					        success: true
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const prompt = `Wählen Sie 2-3 Methoden/Tools für die Phase "${phase.name}" basierend auf objektiven, fallbezogenen Kriterien.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SZENARIO: "${context.userQuery}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					VERFÜGBARE TOOLS FÜR ${phase.name.toUpperCase()}:
 | 
				
			||||||
 | 
					${phaseTools.map((tool: any) => `- ${tool.name}: ${tool.description.slice(0, 100)}...`).join('\n')}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Wählen Sie Methoden/Tools nach forensischen Kriterien aus:
 | 
				
			||||||
 | 
					- Court admissibility und Chain of Custody Kompatibilität  
 | 
				
			||||||
 | 
					- Integration in forensische Standard-Workflows
 | 
				
			||||||
 | 
					- Reproduzierbarkeit und Dokumentationsqualität
 | 
				
			||||||
 | 
					- Objektivität
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
 | 
				
			||||||
 | 
					[
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    "toolName": "Exakter Methoden/Tool-Name",
 | 
				
			||||||
 | 
					    "priority": "high|medium|low", 
 | 
				
			||||||
 | 
					    "justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					]`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const result = await this.callMicroTaskAI(prompt, context, 450);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (result.success) {
 | 
				
			||||||
 | 
					      // FIXED: Safe JSON parsing with validation
 | 
				
			||||||
 | 
					      const selections = this.safeParseJSON(result.content, []);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      if (Array.isArray(selections)) {
 | 
				
			||||||
 | 
					        const validSelections = selections.filter((sel: any) => 
 | 
				
			||||||
 | 
					          sel.toolName && phaseTools.some((tool: any) => tool.name === sel.toolName)
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        validSelections.forEach((sel: any) => {
 | 
				
			||||||
 | 
					          const tool = phaseTools.find((t: any) => t.name === sel.toolName);
 | 
				
			||||||
 | 
					          if (tool) {
 | 
				
			||||||
 | 
					            // FIXED: Use deduplication helper
 | 
				
			||||||
 | 
					            this.addToolToSelection(context, tool, phase.id, sel.priority, sel.justification);
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
 | 
				
			||||||
 | 
					    const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem nach forensischen Qualitätskriterien.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					PROBLEM: "${context.userQuery}"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TOOL: ${tool.name}
 | 
				
			||||||
 | 
					BESCHREIBUNG: ${tool.description}
 | 
				
			||||||
 | 
					PLATTFORMEN: ${tool.platforms?.join(', ') || 'N/A'}
 | 
				
			||||||
 | 
					SKILL LEVEL: ${tool.skillLevel}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					  "suitability_score": "high|medium|low",
 | 
				
			||||||
 | 
					  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
 | 
				
			||||||
 | 
					  "implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
 | 
				
			||||||
 | 
					  "pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
 | 
				
			||||||
 | 
					  "cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
 | 
				
			||||||
 | 
					  "alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
 | 
				
			||||||
 | 
					}`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const result = await this.callMicroTaskAI(prompt, context, 650);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (result.success) {
 | 
				
			||||||
 | 
					      // FIXED: Safe JSON parsing
 | 
				
			||||||
 | 
					      const evaluation = this.safeParseJSON(result.content, {
 | 
				
			||||||
 | 
					        suitability_score: 'medium',
 | 
				
			||||||
 | 
					        detailed_explanation: 'Evaluation failed',
 | 
				
			||||||
 | 
					        implementation_approach: '',
 | 
				
			||||||
 | 
					        pros: [],
 | 
				
			||||||
 | 
					        cons: [],
 | 
				
			||||||
 | 
					        alternatives: ''
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // FIXED: Use deduplication helper
 | 
				
			||||||
 | 
					      this.addToolToSelection(context, {
 | 
				
			||||||
 | 
					        ...tool,
 | 
				
			||||||
 | 
					        evaluation: {
 | 
				
			||||||
 | 
					          ...evaluation,
 | 
				
			||||||
 | 
					          rank
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }, 'evaluation', evaluation.suitability_score);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async selectBackgroundKnowledge(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
				
			||||||
 | 
					    const availableConcepts = context.filteredData.concepts;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (availableConcepts.length === 0) {
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        taskType: 'background-knowledge',
 | 
				
			||||||
 | 
					        content: JSON.stringify([]),
 | 
				
			||||||
 | 
					        processingTimeMs: 0,
 | 
				
			||||||
 | 
					        success: true
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const prompt = `Wählen Sie relevante forensische Konzepte für das Verständnis der empfohlenen Methodik.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					${context.mode === 'workflow' ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
 | 
				
			||||||
 | 
					EMPFOHLENE TOOLS: ${selectedToolNames.join(', ')}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					VERFÜGBARE KONZEPTE:
 | 
				
			||||||
 | 
					${availableConcepts.slice(0, 15).map((concept: any) => `- ${concept.name}: ${concept.description.slice(0, 80)}...`).join('\n')}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Wählen Sie 2-4 Konzepte aus, die für das Verständnis der forensischen Methodik essentiell sind.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
				
			||||||
 | 
					[
 | 
				
			||||||
 | 
					  {
 | 
				
			||||||
 | 
					    "conceptName": "Exakter Konzept-Name",
 | 
				
			||||||
 | 
					    "relevance": "Forensische Relevanz: Warum dieses Konzept für das Verständnis der Methodik kritisch ist"
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					]`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const result = await this.callMicroTaskAI(prompt, context, 400);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (result.success) {
 | 
				
			||||||
 | 
					      // FIXED: Safe JSON parsing
 | 
				
			||||||
 | 
					      const selections = this.safeParseJSON(result.content, []);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      if (Array.isArray(selections)) {
 | 
				
			||||||
 | 
					        context.backgroundKnowledge = selections.filter((sel: any) => 
 | 
				
			||||||
 | 
					          sel.conceptName && availableConcepts.some((concept: any) => concept.name === sel.conceptName)
 | 
				
			||||||
 | 
					        ).map((sel: any) => ({
 | 
				
			||||||
 | 
					          concept: availableConcepts.find((c: any) => c.name === sel.conceptName),
 | 
				
			||||||
 | 
					          relevance: sel.relevance
 | 
				
			||||||
 | 
					        }));
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async generateFinalRecommendations(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
				
			||||||
 | 
					    const isWorkflow = context.mode === 'workflow';
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const prompt = isWorkflow ? 
 | 
				
			||||||
 | 
					      `Erstellen Sie eine forensisch fundierte Workflow-Empfehlung basierend auf DFIR-Prinzipien.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SZENARIO: "${context.userQuery}"
 | 
				
			||||||
 | 
					AUSGEWÄHLTE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Tools ausgewählt'}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Erstellen Sie konkrete methodische Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung forensischer Best Practices, Objektivität und rechtlicher Verwertbarkeit.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.` :
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      `Erstellen Sie wichtige methodische Überlegungen für die korrekte Methoden-/Tool-Anwendung.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					PROBLEM: "${context.userQuery}"
 | 
				
			||||||
 | 
					EMPFOHLENE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Methoden/Tools ausgewählt'}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Geben Sie kritische methodische Überlegungen, Validierungsanforderungen und Qualitätssicherungsmaßnahmen für die korrekte Anwendung der empfohlenen Methoden/Tools.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 100 Wörter.`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const result = await this.callMicroTaskAI(prompt, context, 180);
 | 
				
			||||||
 | 
					    return result;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
 | 
				
			||||||
 | 
					    const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
 | 
				
			||||||
 | 
					      method: 'POST',
 | 
				
			||||||
 | 
					      headers: {
 | 
				
			||||||
 | 
					        'Content-Type': 'application/json',
 | 
				
			||||||
 | 
					        'Authorization': `Bearer ${this.config.apiKey}`
 | 
				
			||||||
 | 
					      },
 | 
				
			||||||
 | 
					      body: JSON.stringify({
 | 
				
			||||||
 | 
					        model: this.config.model,
 | 
				
			||||||
 | 
					        messages: [{ role: 'user', content: prompt }],
 | 
				
			||||||
 | 
					        max_tokens: maxTokens,
 | 
				
			||||||
 | 
					        temperature: 0.3
 | 
				
			||||||
 | 
					      })
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!response.ok) {
 | 
				
			||||||
 | 
					      const errorText = await response.text();
 | 
				
			||||||
 | 
					      throw new Error(`AI API error: ${response.status} - ${errorText}`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const data = await response.json();
 | 
				
			||||||
 | 
					    const content = data.choices?.[0]?.message?.content;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    if (!content) {
 | 
				
			||||||
 | 
					      throw new Error('No response from AI model');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return content;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
 | 
				
			||||||
 | 
					    const startTime = Date.now();
 | 
				
			||||||
 | 
					    let completedTasks = 0;
 | 
				
			||||||
 | 
					    let failedTasks = 0;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      // Stage 1: Get intelligent candidates (embeddings + AI selection)
 | 
				
			||||||
 | 
					      const toolsData = await getCompressedToolsDataForAI();
 | 
				
			||||||
 | 
					      const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // FIXED: Initialize context with proper state management
 | 
				
			||||||
 | 
					      const context: AnalysisContext = {
 | 
				
			||||||
 | 
					        userQuery,
 | 
				
			||||||
 | 
					        mode,
 | 
				
			||||||
 | 
					        filteredData,
 | 
				
			||||||
 | 
					        contextHistory: [],
 | 
				
			||||||
 | 
					        maxContextLength: this.maxContextTokens,
 | 
				
			||||||
 | 
					        currentContextLength: 0,
 | 
				
			||||||
 | 
					        seenToolNames: new Set<string>() // FIXED: Add deduplication tracking
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // MICRO-TASK SEQUENCE
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Task 1: Scenario/Problem Analysis
 | 
				
			||||||
 | 
					      const analysisResult = await this.analyzeScenario(context);
 | 
				
			||||||
 | 
					      if (analysisResult.success) completedTasks++; else failedTasks++;
 | 
				
			||||||
 | 
					      await this.delay(this.microTaskDelay);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Task 2: Investigation/Solution Approach
 | 
				
			||||||
 | 
					      const approachResult = await this.generateApproach(context);
 | 
				
			||||||
 | 
					      if (approachResult.success) completedTasks++; else failedTasks++;
 | 
				
			||||||
 | 
					      await this.delay(this.microTaskDelay);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Task 3: Critical Considerations
 | 
				
			||||||
 | 
					      const considerationsResult = await this.generateCriticalConsiderations(context);
 | 
				
			||||||
 | 
					      if (considerationsResult.success) completedTasks++; else failedTasks++;
 | 
				
			||||||
 | 
					      await this.delay(this.microTaskDelay);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Task 4: Tool Selection/Evaluation (mode-dependent)
 | 
				
			||||||
 | 
					      if (mode === 'workflow') {
 | 
				
			||||||
 | 
					        // Select tools for each phase
 | 
				
			||||||
 | 
					        const phases = toolsData.phases || [];
 | 
				
			||||||
 | 
					        for (const phase of phases) {
 | 
				
			||||||
 | 
					          const toolSelectionResult = await this.selectToolsForPhase(context, phase);
 | 
				
			||||||
 | 
					          if (toolSelectionResult.success) completedTasks++; else failedTasks++;
 | 
				
			||||||
 | 
					          await this.delay(this.microTaskDelay);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        // Evaluate top 3 tools for specific problem
 | 
				
			||||||
 | 
					        const topTools = filteredData.tools.slice(0, 3);
 | 
				
			||||||
 | 
					        for (let i = 0; i < topTools.length; i++) {
 | 
				
			||||||
 | 
					          const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
 | 
				
			||||||
 | 
					          if (evaluationResult.success) completedTasks++; else failedTasks++;
 | 
				
			||||||
 | 
					          await this.delay(this.microTaskDelay);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Task 5: Background Knowledge Selection
 | 
				
			||||||
 | 
					      const knowledgeResult = await this.selectBackgroundKnowledge(context);
 | 
				
			||||||
 | 
					      if (knowledgeResult.success) completedTasks++; else failedTasks++;
 | 
				
			||||||
 | 
					      await this.delay(this.microTaskDelay);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Task 6: Final Recommendations
 | 
				
			||||||
 | 
					      const finalResult = await this.generateFinalRecommendations(context);
 | 
				
			||||||
 | 
					      if (finalResult.success) completedTasks++; else failedTasks++;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Build final recommendation
 | 
				
			||||||
 | 
					      const recommendation = this.buildRecommendation(context, mode, finalResult.content);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const processingStats = {
 | 
				
			||||||
 | 
					        embeddingsUsed: embeddingsService.isEnabled(),
 | 
				
			||||||
 | 
					        candidatesFromEmbeddings: filteredData.tools.length,
 | 
				
			||||||
 | 
					        finalSelectedItems: (context.selectedTools?.length || 0) + 
 | 
				
			||||||
 | 
					                           (context.backgroundKnowledge?.length || 0),
 | 
				
			||||||
 | 
					        processingTimeMs: Date.now() - startTime,
 | 
				
			||||||
 | 
					        microTasksCompleted: completedTasks,
 | 
				
			||||||
 | 
					        microTasksFailed: failedTasks,
 | 
				
			||||||
 | 
					        contextContinuityUsed: true
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
 | 
				
			||||||
 | 
					      console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        recommendation,
 | 
				
			||||||
 | 
					        processingStats
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.error('[IMPROVED PIPELINE] Processing failed:', error);
 | 
				
			||||||
 | 
					      throw error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Build recommendation (same structure but using fixed context)
 | 
				
			||||||
 | 
					  private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
 | 
				
			||||||
 | 
					    const isWorkflow = mode === 'workflow';
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const base = {
 | 
				
			||||||
 | 
					      [isWorkflow ? 'scenario_analysis' : 'problem_analysis']: 
 | 
				
			||||||
 | 
					        isWorkflow ? context.scenarioAnalysis : context.problemAnalysis,
 | 
				
			||||||
 | 
					      investigation_approach: context.investigationApproach,
 | 
				
			||||||
 | 
					      critical_considerations: context.criticalConsiderations,
 | 
				
			||||||
 | 
					      background_knowledge: context.backgroundKnowledge?.map(bk => ({
 | 
				
			||||||
 | 
					        concept_name: bk.concept.name,
 | 
				
			||||||
 | 
					        relevance: bk.relevance
 | 
				
			||||||
 | 
					      })) || []
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (isWorkflow) {
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        ...base,
 | 
				
			||||||
 | 
					        recommended_tools: context.selectedTools?.map(st => ({
 | 
				
			||||||
 | 
					          name: st.tool.name,
 | 
				
			||||||
 | 
					          phase: st.phase,
 | 
				
			||||||
 | 
					          priority: st.priority,
 | 
				
			||||||
 | 
					          justification: st.justification || `Empfohlen für ${st.phase}`
 | 
				
			||||||
 | 
					        })) || [],
 | 
				
			||||||
 | 
					        workflow_suggestion: finalContent
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					      return {
 | 
				
			||||||
 | 
					        ...base,
 | 
				
			||||||
 | 
					        recommended_tools: context.selectedTools?.map(st => ({
 | 
				
			||||||
 | 
					          name: st.tool.name,
 | 
				
			||||||
 | 
					          rank: st.tool.evaluation?.rank || 1,
 | 
				
			||||||
 | 
					          suitability_score: st.priority,
 | 
				
			||||||
 | 
					          detailed_explanation: st.tool.evaluation?.detailed_explanation || '',
 | 
				
			||||||
 | 
					          implementation_approach: st.tool.evaluation?.implementation_approach || '',
 | 
				
			||||||
 | 
					          pros: st.tool.evaluation?.pros || [],
 | 
				
			||||||
 | 
					          cons: st.tool.evaluation?.cons || [],
 | 
				
			||||||
 | 
					          alternatives: st.tool.evaluation?.alternatives || ''
 | 
				
			||||||
 | 
					        })) || [],
 | 
				
			||||||
 | 
					        additional_considerations: finalContent
 | 
				
			||||||
 | 
					      };
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Global instance
 | 
				
			||||||
 | 
					const aiPipeline = new ImprovedMicroTaskAIPipeline();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export { aiPipeline, type AnalysisResult };
 | 
				
			||||||
@ -1,3 +1,4 @@
 | 
				
			|||||||
 | 
					// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
 | 
				
			||||||
import { promises as fs } from 'fs';
 | 
					import { promises as fs } from 'fs';
 | 
				
			||||||
import { load } from 'js-yaml';
 | 
					import { load } from 'js-yaml';
 | 
				
			||||||
import path from 'path';
 | 
					import path from 'path';
 | 
				
			||||||
@ -21,30 +22,44 @@ const ToolSchema = z.object({
 | 
				
			|||||||
  accessType: z.string().optional().nullable(),
 | 
					  accessType: z.string().optional().nullable(),
 | 
				
			||||||
  'domain-agnostic-software': z.array(z.string()).optional().nullable(),
 | 
					  'domain-agnostic-software': z.array(z.string()).optional().nullable(),
 | 
				
			||||||
  related_concepts: z.array(z.string()).optional().nullable().default([]),
 | 
					  related_concepts: z.array(z.string()).optional().nullable().default([]),
 | 
				
			||||||
  related_software: z.array(z.string()).optional().nullable().default([]), // Added this line
 | 
					  related_software: z.array(z.string()).optional().nullable().default([]),
 | 
				
			||||||
});
 | 
					});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const ToolsDataSchema = z.object({
 | 
					const ToolsDataSchema = z.object({
 | 
				
			||||||
  tools: z.array(ToolSchema),
 | 
					  tools: z.array(ToolSchema),
 | 
				
			||||||
  domains: z.array(z.object({
 | 
					  domains: z.array(z.object({
 | 
				
			||||||
    id: z.string(),
 | 
					    id: z.string(),
 | 
				
			||||||
    name: z.string()
 | 
					    name: z.string(),
 | 
				
			||||||
 | 
					    description: z.string().optional() 
 | 
				
			||||||
  })),
 | 
					  })),
 | 
				
			||||||
  phases: z.array(z.object({
 | 
					  phases: z.array(z.object({
 | 
				
			||||||
    id: z.string(), 
 | 
					    id: z.string(), 
 | 
				
			||||||
    name: z.string(),
 | 
					    name: z.string(),
 | 
				
			||||||
    description: z.string().optional()
 | 
					    description: z.string().optional(),
 | 
				
			||||||
 | 
					    typical_tools: z.array(z.string()).optional().default([]), 
 | 
				
			||||||
 | 
					    key_activities: z.array(z.string()).optional().default([]) 
 | 
				
			||||||
  })),
 | 
					  })),
 | 
				
			||||||
  'domain-agnostic-software': z.array(z.object({
 | 
					  'domain-agnostic-software': z.array(z.object({
 | 
				
			||||||
    id: z.string(),
 | 
					    id: z.string(),
 | 
				
			||||||
    name: z.string(),
 | 
					    name: z.string(),
 | 
				
			||||||
    description: z.string().optional()
 | 
					    description: z.string().optional(),
 | 
				
			||||||
 | 
					    use_cases: z.array(z.string()).optional().default([]) 
 | 
				
			||||||
  })).optional().default([]),
 | 
					  })).optional().default([]),
 | 
				
			||||||
  scenarios: z.array(z.object({
 | 
					  scenarios: z.array(z.object({
 | 
				
			||||||
    id: z.string(),
 | 
					    id: z.string(),
 | 
				
			||||||
    icon: z.string(),
 | 
					    icon: z.string(),
 | 
				
			||||||
    friendly_name: z.string()
 | 
					    friendly_name: z.string(),
 | 
				
			||||||
 | 
					    description: z.string().optional(), 
 | 
				
			||||||
 | 
					    typical_phases: z.array(z.string()).optional().default([]), 
 | 
				
			||||||
 | 
					    complexity: z.enum(['low', 'medium', 'high']).optional() 
 | 
				
			||||||
  })).optional().default([]),
 | 
					  })).optional().default([]),
 | 
				
			||||||
 | 
					  skill_levels: z.object({
 | 
				
			||||||
 | 
					    novice: z.string().optional(),
 | 
				
			||||||
 | 
					    beginner: z.string().optional(), 
 | 
				
			||||||
 | 
					    intermediate: z.string().optional(),
 | 
				
			||||||
 | 
					    advanced: z.string().optional(),
 | 
				
			||||||
 | 
					    expert: z.string().optional()
 | 
				
			||||||
 | 
					  }).optional().default({})
 | 
				
			||||||
});
 | 
					});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
interface ToolsData {
 | 
					interface ToolsData {
 | 
				
			||||||
@ -53,20 +68,49 @@ interface ToolsData {
 | 
				
			|||||||
  phases: any[];
 | 
					  phases: any[];
 | 
				
			||||||
  'domain-agnostic-software': any[];
 | 
					  'domain-agnostic-software': any[];
 | 
				
			||||||
  scenarios: any[];
 | 
					  scenarios: any[];
 | 
				
			||||||
 | 
					  skill_levels?: any;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
interface CompressedToolsData {
 | 
					interface EnhancedCompressedToolsData {
 | 
				
			||||||
  tools: any[];
 | 
					  tools: any[];
 | 
				
			||||||
  concepts: any[];
 | 
					  concepts: any[];
 | 
				
			||||||
  domains: any[];
 | 
					  domains: any[];
 | 
				
			||||||
  phases: any[];
 | 
					  phases: any[];
 | 
				
			||||||
  'domain-agnostic-software': any[];
 | 
					  'domain-agnostic-software': any[];
 | 
				
			||||||
 | 
					  scenarios?: any[]; // Optional for AI processing
 | 
				
			||||||
 | 
					  skill_levels: any;
 | 
				
			||||||
 | 
					  // Enhanced context for micro-tasks
 | 
				
			||||||
 | 
					  domain_relationships: DomainRelationship[];
 | 
				
			||||||
 | 
					  phase_dependencies: PhaseDependency[];
 | 
				
			||||||
 | 
					  tool_compatibility_matrix: CompatibilityMatrix[];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface DomainRelationship {
 | 
				
			||||||
 | 
					  domain_id: string;
 | 
				
			||||||
 | 
					  tool_count: number;
 | 
				
			||||||
 | 
					  common_tags: string[];
 | 
				
			||||||
 | 
					  skill_distribution: Record<string, number>;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface PhaseDependency {
 | 
				
			||||||
 | 
					  phase_id: string;
 | 
				
			||||||
 | 
					  order: number;
 | 
				
			||||||
 | 
					  depends_on: string | null;
 | 
				
			||||||
 | 
					  enables: string | null;
 | 
				
			||||||
 | 
					  is_parallel_capable: boolean;
 | 
				
			||||||
 | 
					  typical_duration: string;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface CompatibilityMatrix {
 | 
				
			||||||
 | 
					  type: string;
 | 
				
			||||||
 | 
					  groups: Record<string, string[]>;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
let cachedData: ToolsData | null = null;
 | 
					let cachedData: ToolsData | null = null;
 | 
				
			||||||
let cachedRandomizedData: ToolsData | null = null;
 | 
					let cachedRandomizedData: ToolsData | null = null;
 | 
				
			||||||
let cachedCompressedData: CompressedToolsData | null = null;
 | 
					let cachedCompressedData: EnhancedCompressedToolsData | null = null;
 | 
				
			||||||
let lastRandomizationDate: string | null = null;
 | 
					let lastRandomizationDate: string | null = null;
 | 
				
			||||||
 | 
					let dataVersion: string | null = null;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
function seededRandom(seed: number): () => number {
 | 
					function seededRandom(seed: number): () => number {
 | 
				
			||||||
  let x = Math.sin(seed) * 10000;
 | 
					  let x = Math.sin(seed) * 10000;
 | 
				
			||||||
@ -91,6 +135,115 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
 | 
				
			|||||||
  return shuffled;
 | 
					  return shuffled;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					function generateDataVersion(data: any): string {
 | 
				
			||||||
 | 
					  const str = JSON.stringify(data, Object.keys(data).sort());
 | 
				
			||||||
 | 
					  let hash = 0;
 | 
				
			||||||
 | 
					  for (let i = 0; i < str.length; i++) {
 | 
				
			||||||
 | 
					    const char = str.charCodeAt(i);
 | 
				
			||||||
 | 
					    hash = ((hash << 5) - hash) + char;
 | 
				
			||||||
 | 
					    hash = hash & hash;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  return Math.abs(hash).toString(36);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Enhanced: Generate domain relationships for better AI understanding
 | 
				
			||||||
 | 
					function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
 | 
				
			||||||
 | 
					  const relationships: DomainRelationship[] = [];
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  for (const domain of domains) {
 | 
				
			||||||
 | 
					    const domainTools = tools.filter(tool => 
 | 
				
			||||||
 | 
					      tool.domains && tool.domains.includes(domain.id)
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const commonTags = domainTools
 | 
				
			||||||
 | 
					      .flatMap(tool => tool.tags || [])
 | 
				
			||||||
 | 
					      .reduce((acc: any, tag: string) => {
 | 
				
			||||||
 | 
					        acc[tag] = (acc[tag] || 0) + 1;
 | 
				
			||||||
 | 
					        return acc;
 | 
				
			||||||
 | 
					      }, {});
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					    const topTags = Object.entries(commonTags)
 | 
				
			||||||
 | 
					      .sort(([,a], [,b]) => (b as number) - (a as number))
 | 
				
			||||||
 | 
					      .slice(0, 5)
 | 
				
			||||||
 | 
					      .map(([tag]) => tag);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    relationships.push({
 | 
				
			||||||
 | 
					      domain_id: domain.id,
 | 
				
			||||||
 | 
					      tool_count: domainTools.length,
 | 
				
			||||||
 | 
					      common_tags: topTags,
 | 
				
			||||||
 | 
					      skill_distribution: domainTools.reduce((acc: any, tool: any) => {
 | 
				
			||||||
 | 
					        acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
 | 
				
			||||||
 | 
					        return acc;
 | 
				
			||||||
 | 
					      }, {})
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  return relationships;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Enhanced: Generate phase dependencies
 | 
				
			||||||
 | 
					function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
 | 
				
			||||||
 | 
					  const dependencies: PhaseDependency[] = [];
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  for (let i = 0; i < phases.length; i++) {
 | 
				
			||||||
 | 
					    const phase = phases[i];
 | 
				
			||||||
 | 
					    const nextPhase = phases[i + 1];
 | 
				
			||||||
 | 
					    const prevPhase = phases[i - 1];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    dependencies.push({
 | 
				
			||||||
 | 
					      phase_id: phase.id,
 | 
				
			||||||
 | 
					      order: i + 1,
 | 
				
			||||||
 | 
					      depends_on: prevPhase?.id || null,
 | 
				
			||||||
 | 
					      enables: nextPhase?.id || null,
 | 
				
			||||||
 | 
					      is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
 | 
				
			||||||
 | 
					      typical_duration: phase.id === 'data-collection' ? 'hours-days' :
 | 
				
			||||||
 | 
					                       phase.id === 'examination' ? 'hours-weeks' :
 | 
				
			||||||
 | 
					                       phase.id === 'analysis' ? 'days-weeks' :
 | 
				
			||||||
 | 
					                       'hours-days'
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  return dependencies;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Enhanced: Generate tool compatibility matrix
 | 
				
			||||||
 | 
					function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
 | 
				
			||||||
 | 
					  const matrix: CompatibilityMatrix[] = [];
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  // Group tools by common characteristics
 | 
				
			||||||
 | 
					  const platformGroups = tools.reduce((acc: any, tool: any) => {
 | 
				
			||||||
 | 
					    if (tool.platforms) {
 | 
				
			||||||
 | 
					      tool.platforms.forEach((platform: string) => {
 | 
				
			||||||
 | 
					        if (!acc[platform]) acc[platform] = [];
 | 
				
			||||||
 | 
					        acc[platform].push(tool.name);
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return acc;
 | 
				
			||||||
 | 
					  }, {});
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  const phaseGroups = tools.reduce((acc: any, tool: any) => {
 | 
				
			||||||
 | 
					    if (tool.phases) {
 | 
				
			||||||
 | 
					      tool.phases.forEach((phase: string) => {
 | 
				
			||||||
 | 
					        if (!acc[phase]) acc[phase] = [];
 | 
				
			||||||
 | 
					        acc[phase].push(tool.name);
 | 
				
			||||||
 | 
					      });
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return acc;
 | 
				
			||||||
 | 
					  }, {});
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  matrix.push({
 | 
				
			||||||
 | 
					    type: 'platform_compatibility',
 | 
				
			||||||
 | 
					    groups: platformGroups
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  matrix.push({
 | 
				
			||||||
 | 
					    type: 'phase_synergy',
 | 
				
			||||||
 | 
					    groups: phaseGroups
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  return matrix;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
async function loadRawData(): Promise<ToolsData> {
 | 
					async function loadRawData(): Promise<ToolsData> {
 | 
				
			||||||
  if (!cachedData) {
 | 
					  if (!cachedData) {
 | 
				
			||||||
    const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
 | 
					    const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
 | 
				
			||||||
@ -99,6 +252,21 @@ async function loadRawData(): Promise<ToolsData> {
 | 
				
			|||||||
    
 | 
					    
 | 
				
			||||||
    try {
 | 
					    try {
 | 
				
			||||||
      cachedData = ToolsDataSchema.parse(rawData);
 | 
					      cachedData = ToolsDataSchema.parse(rawData);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Enhanced: Add default skill level descriptions if not provided
 | 
				
			||||||
 | 
					      if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
 | 
				
			||||||
 | 
					        cachedData.skill_levels = {
 | 
				
			||||||
 | 
					          novice: "Minimal technical background required, guided interfaces",
 | 
				
			||||||
 | 
					          beginner: "Basic IT knowledge, some command-line familiarity helpful",
 | 
				
			||||||
 | 
					          intermediate: "Solid technical foundation, comfortable with various tools",
 | 
				
			||||||
 | 
					          advanced: "Extensive experience, deep technical understanding required",
 | 
				
			||||||
 | 
					          expert: "Specialist knowledge, cutting-edge techniques and complex scenarios"
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      dataVersion = generateDataVersion(cachedData);
 | 
				
			||||||
 | 
					      console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
    } catch (error) {
 | 
					    } catch (error) {
 | 
				
			||||||
      console.error('YAML validation failed:', error);
 | 
					      console.error('YAML validation failed:', error);
 | 
				
			||||||
      throw new Error('Invalid tools.yaml structure');
 | 
					      throw new Error('Invalid tools.yaml structure');
 | 
				
			||||||
@ -123,47 +291,88 @@ export async function getToolsData(): Promise<ToolsData> {
 | 
				
			|||||||
    };
 | 
					    };
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    lastRandomizationDate = today;
 | 
					    lastRandomizationDate = today;
 | 
				
			||||||
    
 | 
					 | 
				
			||||||
    cachedCompressedData = null;
 | 
					    cachedCompressedData = null;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  return cachedRandomizedData;
 | 
					  return cachedRandomizedData;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData> {
 | 
					export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedToolsData> {
 | 
				
			||||||
  if (!cachedCompressedData) {
 | 
					  if (!cachedCompressedData) {
 | 
				
			||||||
    const data = await getToolsData();
 | 
					    const data = await getToolsData();
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					    // Enhanced: More detailed tool information for micro-tasks
 | 
				
			||||||
    const compressedTools = data.tools
 | 
					    const compressedTools = data.tools
 | 
				
			||||||
      .filter(tool => tool.type !== 'concept') 
 | 
					      .filter(tool => tool.type !== 'concept') 
 | 
				
			||||||
      .map(tool => {
 | 
					      .map(tool => {
 | 
				
			||||||
        const { projectUrl, statusUrl, ...compressedTool } = tool;
 | 
					        const { projectUrl, statusUrl, ...compressedTool } = tool;
 | 
				
			||||||
        return compressedTool;
 | 
					        return {
 | 
				
			||||||
 | 
					          ...compressedTool,
 | 
				
			||||||
 | 
					          // Enhanced: Add computed fields for AI
 | 
				
			||||||
 | 
					          is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
 | 
				
			||||||
 | 
					          is_open_source: tool.license && tool.license !== 'Proprietary',
 | 
				
			||||||
 | 
					          complexity_score: tool.skillLevel === 'expert' ? 5 :
 | 
				
			||||||
 | 
					                           tool.skillLevel === 'advanced' ? 4 :
 | 
				
			||||||
 | 
					                           tool.skillLevel === 'intermediate' ? 3 :
 | 
				
			||||||
 | 
					                           tool.skillLevel === 'beginner' ? 2 : 1,
 | 
				
			||||||
 | 
					          // Enhanced: Phase-specific suitability hints
 | 
				
			||||||
 | 
					          phase_suitability: tool.phases?.map(phase => ({
 | 
				
			||||||
 | 
					            phase,
 | 
				
			||||||
 | 
					            primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
 | 
				
			||||||
 | 
					          })) || []
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
      });
 | 
					      });
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    const concepts = data.tools
 | 
					    const concepts = data.tools
 | 
				
			||||||
      .filter(tool => tool.type === 'concept')
 | 
					      .filter(tool => tool.type === 'concept')
 | 
				
			||||||
      .map(concept => {
 | 
					      .map(concept => {
 | 
				
			||||||
        const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
 | 
					        const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
 | 
				
			||||||
        return compressedConcept;
 | 
					        return {
 | 
				
			||||||
 | 
					          ...compressedConcept,
 | 
				
			||||||
 | 
					          // Enhanced: Learning difficulty indicator
 | 
				
			||||||
 | 
					          learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
 | 
				
			||||||
 | 
					                              concept.skillLevel === 'advanced' ? 'high' :
 | 
				
			||||||
 | 
					                              concept.skillLevel === 'intermediate' ? 'medium' :
 | 
				
			||||||
 | 
					                              'low'
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
      });
 | 
					      });
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
 | 
					    // Enhanced: Add rich context data
 | 
				
			||||||
 | 
					    const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
 | 
				
			||||||
 | 
					    const phaseDependencies = generatePhaseDependencies(data.phases);
 | 
				
			||||||
 | 
					    const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
    cachedCompressedData = {
 | 
					    cachedCompressedData = {
 | 
				
			||||||
      tools: compressedTools,
 | 
					      tools: compressedTools,
 | 
				
			||||||
      concepts: concepts,
 | 
					      concepts: concepts,
 | 
				
			||||||
      domains: data.domains,
 | 
					      domains: data.domains,
 | 
				
			||||||
      phases: data.phases,
 | 
					      phases: data.phases,
 | 
				
			||||||
      'domain-agnostic-software': data['domain-agnostic-software']
 | 
					      'domain-agnostic-software': data['domain-agnostic-software'],
 | 
				
			||||||
      // scenarios intentionally excluded from AI data
 | 
					      scenarios: data.scenarios, // Include scenarios for context
 | 
				
			||||||
 | 
					      skill_levels: data.skill_levels || {},
 | 
				
			||||||
 | 
					      // Enhanced context for micro-tasks
 | 
				
			||||||
 | 
					      domain_relationships: domainRelationships,
 | 
				
			||||||
 | 
					      phase_dependencies: phaseDependencies,
 | 
				
			||||||
 | 
					      tool_compatibility_matrix: toolCompatibilityMatrix
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
 | 
				
			||||||
 | 
					    console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
  
 | 
					  
 | 
				
			||||||
  return cachedCompressedData;
 | 
					  return cachedCompressedData;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export function getDataVersion(): string | null {
 | 
				
			||||||
 | 
					  return dataVersion;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export function clearCache(): void {
 | 
					export function clearCache(): void {
 | 
				
			||||||
  cachedData = null;
 | 
					  cachedData = null;
 | 
				
			||||||
  cachedRandomizedData = null;
 | 
					  cachedRandomizedData = null;
 | 
				
			||||||
  cachedCompressedData = null;
 | 
					  cachedCompressedData = null;
 | 
				
			||||||
  lastRandomizationDate = null;
 | 
					  lastRandomizationDate = null;
 | 
				
			||||||
 | 
					  dataVersion = null;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  console.log('[DATA SERVICE] Enhanced cache cleared');
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
							
								
								
									
										267
									
								
								src/utils/embeddings.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										267
									
								
								src/utils/embeddings.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,267 @@
 | 
				
			|||||||
 | 
					// src/utils/embeddings.ts
 | 
				
			||||||
 | 
					import { promises as fs } from 'fs';
 | 
				
			||||||
 | 
					import path from 'path';
 | 
				
			||||||
 | 
					import { getCompressedToolsDataForAI } from './dataService.js';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface EmbeddingData {
 | 
				
			||||||
 | 
					  id: string;
 | 
				
			||||||
 | 
					  type: 'tool' | 'concept';
 | 
				
			||||||
 | 
					  name: string;
 | 
				
			||||||
 | 
					  content: string;
 | 
				
			||||||
 | 
					  embedding: number[];
 | 
				
			||||||
 | 
					  metadata: {
 | 
				
			||||||
 | 
					    domains?: string[];
 | 
				
			||||||
 | 
					    phases?: string[];
 | 
				
			||||||
 | 
					    tags?: string[];
 | 
				
			||||||
 | 
					    skillLevel?: string;
 | 
				
			||||||
 | 
					    type?: string;
 | 
				
			||||||
 | 
					  };
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					interface EmbeddingsDatabase {
 | 
				
			||||||
 | 
					  version: string;
 | 
				
			||||||
 | 
					  lastUpdated: number;
 | 
				
			||||||
 | 
					  embeddings: EmbeddingData[];
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class EmbeddingsService {
 | 
				
			||||||
 | 
					  private embeddings: EmbeddingData[] = [];
 | 
				
			||||||
 | 
					  private isInitialized = false;
 | 
				
			||||||
 | 
					  private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
 | 
				
			||||||
 | 
					  private readonly batchSize: number;
 | 
				
			||||||
 | 
					  private readonly batchDelay: number;
 | 
				
			||||||
 | 
					  private readonly enabled: boolean;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  constructor() {
 | 
				
			||||||
 | 
					    this.enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
 | 
				
			||||||
 | 
					    this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
 | 
				
			||||||
 | 
					    this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  async initialize(): Promise<void> {
 | 
				
			||||||
 | 
					    if (!this.enabled) {
 | 
				
			||||||
 | 
					      console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
 | 
				
			||||||
 | 
					      return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      console.log('[EMBEDDINGS] Initializing embeddings system...');
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Create data directory if it doesn't exist
 | 
				
			||||||
 | 
					      await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      const toolsData = await getCompressedToolsDataForAI();
 | 
				
			||||||
 | 
					      const currentDataHash = this.hashData(toolsData);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      // Try to load existing embeddings
 | 
				
			||||||
 | 
					      const existingEmbeddings = await this.loadEmbeddings();
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      if (existingEmbeddings && existingEmbeddings.version === currentDataHash) {
 | 
				
			||||||
 | 
					        console.log('[EMBEDDINGS] Using cached embeddings');
 | 
				
			||||||
 | 
					        this.embeddings = existingEmbeddings.embeddings;
 | 
				
			||||||
 | 
					      } else {
 | 
				
			||||||
 | 
					        console.log('[EMBEDDINGS] Generating new embeddings...');
 | 
				
			||||||
 | 
					        await this.generateEmbeddings(toolsData, currentDataHash);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      this.isInitialized = true;
 | 
				
			||||||
 | 
					      console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings`);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.error('[EMBEDDINGS] Failed to initialize:', error);
 | 
				
			||||||
 | 
					      this.isInitialized = false;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private hashData(data: any): string {
 | 
				
			||||||
 | 
					    return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      const data = await fs.readFile(this.embeddingsPath, 'utf8');
 | 
				
			||||||
 | 
					      return JSON.parse(data);
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.log('[EMBEDDINGS] No existing embeddings found');
 | 
				
			||||||
 | 
					      return null;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async saveEmbeddings(version: string): Promise<void> {
 | 
				
			||||||
 | 
					    const database: EmbeddingsDatabase = {
 | 
				
			||||||
 | 
					      version,
 | 
				
			||||||
 | 
					      lastUpdated: Date.now(),
 | 
				
			||||||
 | 
					      embeddings: this.embeddings
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
 | 
				
			||||||
 | 
					    console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private createContentString(item: any): string {
 | 
				
			||||||
 | 
					    const parts = [
 | 
				
			||||||
 | 
					      item.name,
 | 
				
			||||||
 | 
					      item.description || '',
 | 
				
			||||||
 | 
					      ...(item.tags || []),
 | 
				
			||||||
 | 
					      ...(item.domains || []),
 | 
				
			||||||
 | 
					      ...(item.phases || [])
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return parts.filter(Boolean).join(' ').toLowerCase();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
 | 
				
			||||||
 | 
					    const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
 | 
				
			||||||
 | 
					    const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
 | 
				
			||||||
 | 
					    const model = process.env.AI_EMBEDDINGS_MODEL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!endpoint || !apiKey || !model) {
 | 
				
			||||||
 | 
					      throw new Error('Missing embeddings API configuration');
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const response = await fetch(endpoint, {
 | 
				
			||||||
 | 
					      method: 'POST',
 | 
				
			||||||
 | 
					      headers: {
 | 
				
			||||||
 | 
					        'Content-Type': 'application/json',
 | 
				
			||||||
 | 
					        'Authorization': `Bearer ${apiKey}`
 | 
				
			||||||
 | 
					      },
 | 
				
			||||||
 | 
					      body: JSON.stringify({
 | 
				
			||||||
 | 
					        model,
 | 
				
			||||||
 | 
					        input: contents
 | 
				
			||||||
 | 
					      })
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!response.ok) {
 | 
				
			||||||
 | 
					      const error = await response.text();
 | 
				
			||||||
 | 
					      throw new Error(`Embeddings API error: ${response.status} - ${error}`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const data = await response.json();
 | 
				
			||||||
 | 
					    return data.data.map((item: any) => item.embedding);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private async generateEmbeddings(toolsData: any, version: string): Promise<void> {
 | 
				
			||||||
 | 
					    const allItems = [
 | 
				
			||||||
 | 
					      ...toolsData.tools.map((tool: any) => ({ ...tool, type: 'tool' })),
 | 
				
			||||||
 | 
					      ...toolsData.concepts.map((concept: any) => ({ ...concept, type: 'concept' }))
 | 
				
			||||||
 | 
					    ];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const contents = allItems.map(item => this.createContentString(item));
 | 
				
			||||||
 | 
					    this.embeddings = [];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Process in batches to respect rate limits
 | 
				
			||||||
 | 
					    for (let i = 0; i < contents.length; i += this.batchSize) {
 | 
				
			||||||
 | 
					      const batch = contents.slice(i, i + this.batchSize);
 | 
				
			||||||
 | 
					      const batchItems = allItems.slice(i, i + this.batchSize);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      try {
 | 
				
			||||||
 | 
					        const embeddings = await this.generateEmbeddingsBatch(batch);
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        embeddings.forEach((embedding, index) => {
 | 
				
			||||||
 | 
					          const item = batchItems[index];
 | 
				
			||||||
 | 
					          this.embeddings.push({
 | 
				
			||||||
 | 
					            id: `${item.type}_${item.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
 | 
				
			||||||
 | 
					            type: item.type,
 | 
				
			||||||
 | 
					            name: item.name,
 | 
				
			||||||
 | 
					            content: batch[index],
 | 
				
			||||||
 | 
					            embedding,
 | 
				
			||||||
 | 
					            metadata: {
 | 
				
			||||||
 | 
					              domains: item.domains,
 | 
				
			||||||
 | 
					              phases: item.phases,
 | 
				
			||||||
 | 
					              tags: item.tags,
 | 
				
			||||||
 | 
					              skillLevel: item.skillLevel,
 | 
				
			||||||
 | 
					              type: item.type
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					          });
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        // Rate limiting delay between batches
 | 
				
			||||||
 | 
					        if (i + this.batchSize < contents.length) {
 | 
				
			||||||
 | 
					          await new Promise(resolve => setTimeout(resolve, this.batchDelay));
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					      } catch (error) {
 | 
				
			||||||
 | 
					        console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
 | 
				
			||||||
 | 
					        throw error;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    await this.saveEmbeddings(version);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  public async embedText(text: string): Promise<number[]> {
 | 
				
			||||||
 | 
					    // Re‑use the private batch helper to avoid auth duplication
 | 
				
			||||||
 | 
					    const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
 | 
				
			||||||
 | 
					    return embedding;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private cosineSimilarity(a: number[], b: number[]): number {
 | 
				
			||||||
 | 
					    let dotProduct = 0;
 | 
				
			||||||
 | 
					    let normA = 0;
 | 
				
			||||||
 | 
					    let normB = 0;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    for (let i = 0; i < a.length; i++) {
 | 
				
			||||||
 | 
					      dotProduct += a[i] * b[i];
 | 
				
			||||||
 | 
					      normA += a[i] * a[i];
 | 
				
			||||||
 | 
					      normB += b[i] * b[i];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<EmbeddingData[]> {
 | 
				
			||||||
 | 
					    if (!this.enabled || !this.isInitialized || this.embeddings.length === 0) {
 | 
				
			||||||
 | 
					      return [];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try {
 | 
				
			||||||
 | 
					      // Generate embedding for query
 | 
				
			||||||
 | 
					      const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
 | 
				
			||||||
 | 
					      const queryEmbedding = queryEmbeddings[0];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Calculate similarities
 | 
				
			||||||
 | 
					      const similarities = this.embeddings.map(item => ({
 | 
				
			||||||
 | 
					        ...item,
 | 
				
			||||||
 | 
					        similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
 | 
				
			||||||
 | 
					      }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      // Filter by threshold and sort by similarity
 | 
				
			||||||
 | 
					      return similarities
 | 
				
			||||||
 | 
					        .filter(item => item.similarity >= threshold)
 | 
				
			||||||
 | 
					        .sort((a, b) => b.similarity - a.similarity)
 | 
				
			||||||
 | 
					        .slice(0, maxResults);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    } catch (error) {
 | 
				
			||||||
 | 
					      console.error('[EMBEDDINGS] Failed to find similar items:', error);
 | 
				
			||||||
 | 
					      return [];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  isEnabled(): boolean {
 | 
				
			||||||
 | 
					    return this.enabled && this.isInitialized;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  getStats(): { enabled: boolean; initialized: boolean; count: number } {
 | 
				
			||||||
 | 
					    return {
 | 
				
			||||||
 | 
					      enabled: this.enabled,
 | 
				
			||||||
 | 
					      initialized: this.isInitialized,
 | 
				
			||||||
 | 
					      count: this.embeddings.length
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Global instance
 | 
				
			||||||
 | 
					const embeddingsService = new EmbeddingsService();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export { embeddingsService, type EmbeddingData };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Auto-initialize on import in server environment
 | 
				
			||||||
 | 
					if (typeof window === 'undefined' && process.env.NODE_ENV !== 'test') {
 | 
				
			||||||
 | 
					  embeddingsService.initialize().catch(error => {
 | 
				
			||||||
 | 
					    console.error('[EMBEDDINGS] Auto-initialization failed:', error);
 | 
				
			||||||
 | 
					  });
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
@ -1,4 +1,4 @@
 | 
				
			|||||||
// src/utils/rateLimitedQueue.ts
 | 
					// src/utils/rateLimitedQueue.ts - FIXED: Memory leak and better cleanup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import dotenv from "dotenv";
 | 
					import dotenv from "dotenv";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -31,6 +31,43 @@ class RateLimitedQueue {
 | 
				
			|||||||
  private delayMs = RATE_LIMIT_DELAY_MS;
 | 
					  private delayMs = RATE_LIMIT_DELAY_MS;
 | 
				
			||||||
  private lastProcessedAt = 0;
 | 
					  private lastProcessedAt = 0;
 | 
				
			||||||
  private currentlyProcessingTaskId: string | null = null;
 | 
					  private currentlyProcessingTaskId: string | null = null;
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  private cleanupInterval: NodeJS.Timeout;
 | 
				
			||||||
 | 
					  private readonly TASK_RETENTION_MS = 30000; 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  constructor() {
 | 
				
			||||||
 | 
					    this.cleanupInterval = setInterval(() => {
 | 
				
			||||||
 | 
					      this.cleanupOldTasks();
 | 
				
			||||||
 | 
					    }, 30000);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  private cleanupOldTasks(): void {
 | 
				
			||||||
 | 
					    const now = Date.now();
 | 
				
			||||||
 | 
					    const initialLength = this.tasks.length;
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    this.tasks = this.tasks.filter(task => {
 | 
				
			||||||
 | 
					      if (task.status === 'queued' || task.status === 'processing') {
 | 
				
			||||||
 | 
					        return true;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      if (task.completedAt && (now - task.completedAt) > this.TASK_RETENTION_MS) {
 | 
				
			||||||
 | 
					        return false;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      
 | 
				
			||||||
 | 
					      return true;
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    const cleaned = initialLength - this.tasks.length;
 | 
				
			||||||
 | 
					    if (cleaned > 0) {
 | 
				
			||||||
 | 
					      console.log(`[QUEUE] Cleaned up ${cleaned} old tasks, ${this.tasks.length} remaining`);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  public shutdown(): void {
 | 
				
			||||||
 | 
					    if (this.cleanupInterval) {
 | 
				
			||||||
 | 
					      clearInterval(this.cleanupInterval);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  add<T>(task: Task<T>, taskId?: string): Promise<T> {
 | 
					  add<T>(task: Task<T>, taskId?: string): Promise<T> {
 | 
				
			||||||
    const id = taskId || this.generateTaskId();
 | 
					    const id = taskId || this.generateTaskId();
 | 
				
			||||||
@ -103,7 +140,6 @@ class RateLimitedQueue {
 | 
				
			|||||||
            const processingOffset = processingTasks.length > 0 ? 1 : 0;
 | 
					            const processingOffset = processingTasks.length > 0 ? 1 : 0;
 | 
				
			||||||
            status.currentPosition = processingOffset + positionInQueue + 1;
 | 
					            status.currentPosition = processingOffset + positionInQueue + 1;
 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
        } else if (task.status === 'completed' || task.status === 'failed') {
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
      } else {        
 | 
					      } else {        
 | 
				
			||||||
        const taskTimestamp = taskId.match(/ai_(\d+)_/)?.[1];
 | 
					        const taskTimestamp = taskId.match(/ai_(\d+)_/)?.[1];
 | 
				
			||||||
@ -152,7 +188,6 @@ class RateLimitedQueue {
 | 
				
			|||||||
        this.currentlyProcessingTaskId = nextTask.id;
 | 
					        this.currentlyProcessingTaskId = nextTask.id;
 | 
				
			||||||
        this.lastProcessedAt = Date.now();
 | 
					        this.lastProcessedAt = Date.now();
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        try {
 | 
					        try {
 | 
				
			||||||
          await nextTask.task();
 | 
					          await nextTask.task();
 | 
				
			||||||
          nextTask.status = 'completed';
 | 
					          nextTask.status = 'completed';
 | 
				
			||||||
@ -166,14 +201,6 @@ class RateLimitedQueue {
 | 
				
			|||||||
        
 | 
					        
 | 
				
			||||||
        this.currentlyProcessingTaskId = null;
 | 
					        this.currentlyProcessingTaskId = null;
 | 
				
			||||||
        
 | 
					        
 | 
				
			||||||
        setTimeout(() => {
 | 
					 | 
				
			||||||
          const index = this.tasks.findIndex(t => t.id === nextTask.id);
 | 
					 | 
				
			||||||
          if (index >= 0) {
 | 
					 | 
				
			||||||
            console.log(`[QUEUE] Removing completed task ${nextTask.id}`);
 | 
					 | 
				
			||||||
            this.tasks.splice(index, 1);
 | 
					 | 
				
			||||||
          }
 | 
					 | 
				
			||||||
        }, 10000); 
 | 
					 | 
				
			||||||
        
 | 
					 | 
				
			||||||
        const hasMoreQueued = this.tasks.some(t => t.status === 'queued');
 | 
					        const hasMoreQueued = this.tasks.some(t => t.status === 'queued');
 | 
				
			||||||
        if (hasMoreQueued) {
 | 
					        if (hasMoreQueued) {
 | 
				
			||||||
          console.log(`[QUEUE] Waiting ${this.delayMs}ms before next task`);
 | 
					          console.log(`[QUEUE] Waiting ${this.delayMs}ms before next task`);
 | 
				
			||||||
@ -201,4 +228,8 @@ export function getQueueStatus(taskId?: string): QueueStatus {
 | 
				
			|||||||
  return queue.getStatus(taskId);
 | 
					  return queue.getStatus(taskId);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export function shutdownQueue(): void {
 | 
				
			||||||
 | 
					  queue.shutdown();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
export default queue;
 | 
					export default queue;
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user