Merge pull request 'embeddings-1' (#2) from embeddings-1 into main
Reviewed-on: #2
This commit is contained in:
		
						commit
						f329955c62
					
				
							
								
								
									
										84
									
								
								.env.example
									
									
									
									
									
								
							
							
						
						
									
										84
									
								
								.env.example
									
									
									
									
									
								
							@ -2,34 +2,74 @@
 | 
			
		||||
# ForensicPathways Environment Configuration
 | 
			
		||||
# ===========================================
 | 
			
		||||
 | 
			
		||||
# Authentication & OIDC (Required)
 | 
			
		||||
AUTH_SECRET=change-this-to-a-strong-secret-key-in-production
 | 
			
		||||
# === Authentication Configuration ===
 | 
			
		||||
AUTHENTICATION_NECESSARY=false
 | 
			
		||||
AUTHENTICATION_NECESSARY_CONTRIBUTIONS=false
 | 
			
		||||
AUTHENTICATION_NECESSARY_AI=false
 | 
			
		||||
AUTH_SECRET=your-secret-key-change-in-production
 | 
			
		||||
 | 
			
		||||
# OIDC Configuration (if authentication enabled)
 | 
			
		||||
OIDC_ENDPOINT=https://your-oidc-provider.com
 | 
			
		||||
OIDC_CLIENT_ID=your-oidc-client-id
 | 
			
		||||
OIDC_CLIENT_SECRET=your-oidc-client-secret
 | 
			
		||||
OIDC_CLIENT_ID=your-client-id
 | 
			
		||||
OIDC_CLIENT_SECRET=your-client-secret
 | 
			
		||||
 | 
			
		||||
# Auth Scopes - set to true in prod
 | 
			
		||||
AUTHENTICATION_NECESSARY_CONTRIBUTIONS=true
 | 
			
		||||
AUTHENTICATION_NECESSARY_AI=true
 | 
			
		||||
# ===================================================================
 | 
			
		||||
# AI CONFIGURATION - Complete Reference for Improved Pipeline
 | 
			
		||||
# ===================================================================
 | 
			
		||||
 | 
			
		||||
# Application Configuration (Required)
 | 
			
		||||
PUBLIC_BASE_URL=https://your-domain.com
 | 
			
		||||
NODE_ENV=production
 | 
			
		||||
# === CORE AI ENDPOINTS & MODELS ===
 | 
			
		||||
AI_API_ENDPOINT=https://llm.mikoshi.de
 | 
			
		||||
AI_API_KEY=sREDACTED3w
 | 
			
		||||
AI_MODEL='mistral/mistral-small-latest'
 | 
			
		||||
 | 
			
		||||
# AI Service Configuration (Required for AI features)
 | 
			
		||||
AI_MODEL=mistral-large-latest
 | 
			
		||||
AI_API_ENDPOINT=https://api.mistral.ai
 | 
			
		||||
AI_API_KEY=your-mistral-api-key
 | 
			
		||||
AI_RATE_LIMIT_DELAY_MS=1000
 | 
			
		||||
# === IMPROVED PIPELINE: Use separate analyzer model (mistral-small is fine) ===
 | 
			
		||||
AI_ANALYZER_ENDPOINT=https://llm.mikoshi.de
 | 
			
		||||
AI_ANALYZER_API_KEY=skREDACTEDw3w  
 | 
			
		||||
AI_ANALYZER_MODEL='mistral/mistral-small-latest'
 | 
			
		||||
 | 
			
		||||
# Git Integration (Required for contributions)
 | 
			
		||||
GIT_REPO_URL=https://git.cc24.dev/mstoeck3/forensic-pathways
 | 
			
		||||
GIT_PROVIDER=gitea
 | 
			
		||||
GIT_API_ENDPOINT=https://git.cc24.dev/api/v1
 | 
			
		||||
GIT_API_TOKEN=your-git-api-token
 | 
			
		||||
# === EMBEDDINGS CONFIGURATION ===
 | 
			
		||||
AI_EMBEDDINGS_ENABLED=true
 | 
			
		||||
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
 | 
			
		||||
AI_EMBEDDINGS_API_KEY=ZREDACTED3wL
 | 
			
		||||
AI_EMBEDDINGS_MODEL=mistral-embed
 | 
			
		||||
AI_EMBEDDINGS_BATCH_SIZE=20
 | 
			
		||||
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
 | 
			
		||||
 | 
			
		||||
# File Upload Configuration (Optional)
 | 
			
		||||
LOCAL_UPLOAD_PATH=./public/uploads
 | 
			
		||||
# === PIPELINE: VectorIndex (HNSW) Configuration ===
 | 
			
		||||
AI_MAX_SELECTED_ITEMS=60                    # Tools visible to each micro-task 
 | 
			
		||||
AI_EMBEDDING_CANDIDATES=60                  # VectorIndex candidates (HNSW is more efficient)
 | 
			
		||||
AI_SIMILARITY_THRESHOLD=0.3                # Not used by VectorIndex (uses cosine distance internally)
 | 
			
		||||
 | 
			
		||||
# === MICRO-TASK CONFIGURATION ===
 | 
			
		||||
AI_MICRO_TASK_DELAY_MS=500                 # Delay between micro-tasks  
 | 
			
		||||
AI_MICRO_TASK_TIMEOUT_MS=25000             # Timeout per micro-task (increased for full context)
 | 
			
		||||
 | 
			
		||||
# === RATE LIMITING ===
 | 
			
		||||
AI_RATE_LIMIT_DELAY_MS=3000                # Main rate limit delay
 | 
			
		||||
AI_RATE_LIMIT_MAX_REQUESTS=6               # Main requests per minute (reduced - fewer but richer calls)
 | 
			
		||||
AI_MICRO_TASK_RATE_LIMIT=15                # Micro-task requests per minute (was 30)
 | 
			
		||||
 | 
			
		||||
# === QUEUE MANAGEMENT ===
 | 
			
		||||
AI_QUEUE_MAX_SIZE=50
 | 
			
		||||
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
 | 
			
		||||
 | 
			
		||||
# === PERFORMANCE & MONITORING ===
 | 
			
		||||
AI_MICRO_TASK_DEBUG=true
 | 
			
		||||
AI_PERFORMANCE_METRICS=true
 | 
			
		||||
AI_RESPONSE_CACHE_TTL_MS=3600000
 | 
			
		||||
 | 
			
		||||
# ===================================================================
 | 
			
		||||
# LEGACY VARIABLES (still used but less important)
 | 
			
		||||
# ===================================================================
 | 
			
		||||
 | 
			
		||||
# These are still used by other parts of the system:
 | 
			
		||||
AI_RESPONSE_CACHE_TTL_MS=3600000           # For caching responses
 | 
			
		||||
AI_QUEUE_MAX_SIZE=50                       # Queue management
 | 
			
		||||
AI_QUEUE_CLEANUP_INTERVAL_MS=300000       # Queue cleanup
 | 
			
		||||
 | 
			
		||||
# === Application Configuration ===
 | 
			
		||||
PUBLIC_BASE_URL=http://localhost:4321
 | 
			
		||||
NODE_ENV=development
 | 
			
		||||
 | 
			
		||||
# Nextcloud Integration (Optional)
 | 
			
		||||
NEXTCLOUD_ENDPOINT=https://your-nextcloud.com
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@ -85,3 +85,4 @@ temp/
 | 
			
		||||
.astro/data-store.json
 | 
			
		||||
.astro/content.d.ts
 | 
			
		||||
prompt.md
 | 
			
		||||
data/embeddings.json
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										358
									
								
								RAG-Roadmap.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										358
									
								
								RAG-Roadmap.md
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,358 @@
 | 
			
		||||
# Forensic-Grade RAG Implementation Roadmap
 | 
			
		||||
 | 
			
		||||
## Context & Current State Analysis
 | 
			
		||||
 | 
			
		||||
You have access to a forensic tools recommendation system built with:
 | 
			
		||||
- **Embeddings-based retrieval** (src/utils/embeddings.ts)
 | 
			
		||||
- **Multi-stage AI pipeline** (src/utils/aiPipeline.ts) 
 | 
			
		||||
- **Micro-task processing** for detailed analysis
 | 
			
		||||
- **Rate limiting and queue management** (src/utils/rateLimitedQueue.ts)
 | 
			
		||||
- **YAML-based tool database** (src/data/tools.yaml)
 | 
			
		||||
 | 
			
		||||
**Current Architecture**: Basic RAG (Retrieve → AI Selection → Micro-task Generation)
 | 
			
		||||
 | 
			
		||||
**Target Architecture**: Forensic-Grade RAG with transparency, objectivity, and reproducibility
 | 
			
		||||
 | 
			
		||||
## Implementation Roadmap
 | 
			
		||||
 | 
			
		||||
### PHASE 1: Configuration Externalization & AI Architecture Enhancement (Weeks 1-2)
 | 
			
		||||
 | 
			
		||||
#### 1.1 Complete Configuration Externalization
 | 
			
		||||
**Objective**: Remove all hard-coded values from codebase (except AI prompts)
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Create comprehensive configuration schema** in `src/config/`
 | 
			
		||||
   - `forensic-scoring.yaml` - All scoring criteria, weights, thresholds
 | 
			
		||||
   - `ai-models.yaml` - AI model configurations and routing
 | 
			
		||||
   - `system-parameters.yaml` - Rate limits, queue settings, processing parameters
 | 
			
		||||
   - `validation-criteria.yaml` - Expert validation rules, bias detection parameters
 | 
			
		||||
 | 
			
		||||
2. **Implement configuration loader** (`src/utils/configLoader.ts`)
 | 
			
		||||
   - Hot-reload capability for configuration changes
 | 
			
		||||
   - Environment-specific overrides (dev/staging/prod)
 | 
			
		||||
   - Configuration validation and schema enforcement
 | 
			
		||||
   - Default fallbacks for missing values
 | 
			
		||||
 | 
			
		||||
3. **Audit existing codebase** for hard-coded values:
 | 
			
		||||
   - Search for literal numbers, strings, arrays in TypeScript files
 | 
			
		||||
   - Extract to configuration files with meaningful names
 | 
			
		||||
   - Ensure all thresholds (similarity scores, rate limits, token counts) are configurable
 | 
			
		||||
 | 
			
		||||
#### 1.2 Dual AI Model Architecture Implementation
 | 
			
		||||
**Objective**: Implement large + small model strategy for optimal cost/performance
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Extend environment configuration**:
 | 
			
		||||
   ```
 | 
			
		||||
   # Strategic Analysis Model (Large, Few Tokens)
 | 
			
		||||
   AI_STRATEGIC_ENDPOINT=
 | 
			
		||||
   AI_STRATEGIC_API_KEY=
 | 
			
		||||
   AI_STRATEGIC_MODEL=mistral-large-latest
 | 
			
		||||
   AI_STRATEGIC_MAX_TOKENS=500
 | 
			
		||||
   AI_STRATEGIC_CONTEXT_WINDOW=32000
 | 
			
		||||
   
 | 
			
		||||
   # Content Generation Model (Small, Many Tokens)  
 | 
			
		||||
   AI_CONTENT_ENDPOINT=
 | 
			
		||||
   AI_CONTENT_API_KEY=
 | 
			
		||||
   AI_CONTENT_MODEL=mistral-small-latest
 | 
			
		||||
   AI_CONTENT_MAX_TOKENS=2000
 | 
			
		||||
   AI_CONTENT_CONTEXT_WINDOW=8000
 | 
			
		||||
   ```
 | 
			
		||||
 | 
			
		||||
2. **Create AI router** (`src/utils/aiRouter.ts`):
 | 
			
		||||
   - Route different task types to appropriate models
 | 
			
		||||
   - **Strategic tasks** → Large model: tool selection, bias analysis, methodology decisions
 | 
			
		||||
   - **Content tasks** → Small model: descriptions, explanations, micro-task outputs
 | 
			
		||||
   - Automatic fallback logic if primary model fails
 | 
			
		||||
   - Usage tracking and cost optimization
 | 
			
		||||
 | 
			
		||||
3. **Update aiPipeline.ts**:
 | 
			
		||||
   - Replace single `callAI()` method with task-specific methods
 | 
			
		||||
   - Implement intelligent routing based on task complexity
 | 
			
		||||
   - Add token estimation for optimal model selection
 | 
			
		||||
 | 
			
		||||
### PHASE 2: Evidence-Based Scoring Framework (Weeks 3-5)
 | 
			
		||||
 | 
			
		||||
#### 2.1 Forensic Scoring Engine Implementation
 | 
			
		||||
**Objective**: Replace subjective AI selection with objective, measurable criteria
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Create scoring framework** (`src/scoring/ForensicScorer.ts`):
 | 
			
		||||
   ```typescript
 | 
			
		||||
   interface ScoringCriterion {
 | 
			
		||||
     name: string;
 | 
			
		||||
     weight: number;
 | 
			
		||||
     methodology: string;
 | 
			
		||||
     dataSources: string[];
 | 
			
		||||
     calculator: (tool: Tool, scenario: Scenario) => Promise<CriterionScore>;
 | 
			
		||||
   }
 | 
			
		||||
   
 | 
			
		||||
   interface CriterionScore {
 | 
			
		||||
     value: number;           // 0-100
 | 
			
		||||
     confidence: number;      // 0-100  
 | 
			
		||||
     evidence: Evidence[];
 | 
			
		||||
     lastUpdated: Date;
 | 
			
		||||
   }
 | 
			
		||||
   ```
 | 
			
		||||
 | 
			
		||||
2. **Implement core scoring criteria**:
 | 
			
		||||
   - **Court Admissibility Scorer**: Based on legal precedent database
 | 
			
		||||
   - **Scientific Validity Scorer**: Based on peer-reviewed research citations
 | 
			
		||||
   - **Methodology Alignment Scorer**: NIST SP 800-86 compliance assessment
 | 
			
		||||
   - **Expert Consensus Scorer**: Practitioner survey data integration
 | 
			
		||||
   - **Error Rate Scorer**: Known false positive/negative rates
 | 
			
		||||
 | 
			
		||||
3. **Build evidence provenance system**:
 | 
			
		||||
   - Track source of every score component
 | 
			
		||||
   - Maintain citation database for all claims
 | 
			
		||||
   - Version control for scoring methodologies
 | 
			
		||||
   - Automatic staleness detection for outdated evidence
 | 
			
		||||
 | 
			
		||||
#### 2.2 Deterministic Core Implementation  
 | 
			
		||||
**Objective**: Ensure reproducible results for identical inputs
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Implement deterministic pipeline** (`src/analysis/DeterministicAnalyzer.ts`):
 | 
			
		||||
   - Rule-based scenario classification (SCADA/Mobile/Network/etc.)
 | 
			
		||||
   - Mathematical scoring combination (weighted averages, not AI decisions)
 | 
			
		||||
   - Consistent tool ranking algorithms
 | 
			
		||||
   - Reproducibility validation tests
 | 
			
		||||
 | 
			
		||||
2. **Add AI enhancement layer**:
 | 
			
		||||
   - AI provides explanations, NOT decisions
 | 
			
		||||
   - AI generates workflow descriptions based on deterministic selections
 | 
			
		||||
   - AI creates contextual advice around objective tool choices
 | 
			
		||||
 | 
			
		||||
### PHASE 3: Transparency & Audit Trail System (Weeks 4-6)
 | 
			
		||||
 | 
			
		||||
#### 3.1 Complete Audit Trail Implementation
 | 
			
		||||
**Objective**: Track every decision with forensic-grade documentation
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Create audit framework** (`src/audit/AuditTrail.ts`):
 | 
			
		||||
   ```typescript
 | 
			
		||||
   interface ForensicAuditTrail {
 | 
			
		||||
     queryId: string;
 | 
			
		||||
     userQuery: string;
 | 
			
		||||
     processingSteps: AuditStep[];
 | 
			
		||||
     finalRecommendation: RecommendationWithEvidence;
 | 
			
		||||
     reproducibilityHash: string;
 | 
			
		||||
     validationStatus: ValidationStatus;
 | 
			
		||||
   }
 | 
			
		||||
   
 | 
			
		||||
   interface AuditStep {
 | 
			
		||||
     stepName: string;
 | 
			
		||||
     input: any;
 | 
			
		||||
     methodology: string;
 | 
			
		||||
     output: any;
 | 
			
		||||
     evidence: Evidence[];
 | 
			
		||||
     confidence: number;
 | 
			
		||||
     processingTime: number;
 | 
			
		||||
     modelUsed?: string;
 | 
			
		||||
   }
 | 
			
		||||
   ```
 | 
			
		||||
 | 
			
		||||
2. **Implement evidence citation system**:
 | 
			
		||||
   - Automatic citation generation for all claims
 | 
			
		||||
   - Link to source standards (NIST, ISO, RFC)
 | 
			
		||||
   - Reference scientific papers for methodology choices
 | 
			
		||||
   - Track expert validation contributors
 | 
			
		||||
 | 
			
		||||
3. **Build explanation generator**:
 | 
			
		||||
   - Human-readable reasoning for every recommendation
 | 
			
		||||
   - "Why this tool" and "Why not alternatives" explanations
 | 
			
		||||
   - Confidence level communication
 | 
			
		||||
   - Uncertainty quantification
 | 
			
		||||
 | 
			
		||||
#### 3.2 Bias Detection & Mitigation System
 | 
			
		||||
**Objective**: Actively detect and correct recommendation biases
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Implement bias detection** (`src/bias/BiasDetector.ts`):
 | 
			
		||||
   - **Popularity bias**: Over-recommendation of well-known tools
 | 
			
		||||
   - **Availability bias**: Preference for easily accessible tools
 | 
			
		||||
   - **Recency bias**: Over-weighting of newest tools
 | 
			
		||||
   - **Cultural bias**: Platform or methodology preferences
 | 
			
		||||
 | 
			
		||||
2. **Create mitigation strategies**:
 | 
			
		||||
   - Automatic bias adjustment algorithms
 | 
			
		||||
   - Diversity requirements for recommendations
 | 
			
		||||
   - Fairness metrics across tool categories
 | 
			
		||||
   - Bias reporting in audit trails
 | 
			
		||||
 | 
			
		||||
### PHASE 4: Expert Validation & Learning System (Weeks 6-8)
 | 
			
		||||
 | 
			
		||||
#### 4.1 Expert Review Integration
 | 
			
		||||
**Objective**: Enable forensic experts to validate and improve recommendations
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Build expert validation interface** (`src/validation/ExpertReview.ts`):
 | 
			
		||||
   - Structured feedback collection from forensic practitioners
 | 
			
		||||
   - Agreement/disagreement tracking with detailed reasoning
 | 
			
		||||
   - Expert consensus building over time
 | 
			
		||||
   - Minority opinion preservation
 | 
			
		||||
 | 
			
		||||
2. **Implement validation loop**:
 | 
			
		||||
   - Flag recommendations requiring expert review
 | 
			
		||||
   - Track expert validation rates and patterns
 | 
			
		||||
   - Update scoring based on real-world feedback
 | 
			
		||||
   - Methodology improvement based on expert input
 | 
			
		||||
 | 
			
		||||
#### 4.2 Real-World Case Learning
 | 
			
		||||
**Objective**: Learn from actual forensic investigations
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Create case study integration** (`src/learning/CaseStudyLearner.ts`):
 | 
			
		||||
   - Anonymous case outcome tracking
 | 
			
		||||
   - Tool effectiveness measurement in real scenarios
 | 
			
		||||
   - Methodology success/failure analysis
 | 
			
		||||
   - Continuous improvement based on field results
 | 
			
		||||
 | 
			
		||||
2. **Implement feedback loops**:
 | 
			
		||||
   - Post-case recommendation validation
 | 
			
		||||
   - Tool performance tracking in actual investigations
 | 
			
		||||
   - Methodology refinement based on outcomes
 | 
			
		||||
   - Success rate improvement over time
 | 
			
		||||
 | 
			
		||||
### PHASE 5: Advanced Features & Scientific Rigor (Weeks 7-10)
 | 
			
		||||
 | 
			
		||||
#### 5.1 Confidence & Uncertainty Quantification
 | 
			
		||||
**Objective**: Provide scientific confidence levels for all recommendations
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Implement uncertainty quantification** (`src/uncertainty/ConfidenceCalculator.ts`):
 | 
			
		||||
   - Statistical confidence intervals for scores
 | 
			
		||||
   - Uncertainty propagation through scoring pipeline
 | 
			
		||||
   - Risk assessment for recommendation reliability
 | 
			
		||||
   - Alternative recommendation ranking
 | 
			
		||||
 | 
			
		||||
2. **Add fallback recommendation system**:
 | 
			
		||||
   - Multiple ranked alternatives for each recommendation
 | 
			
		||||
   - Contingency planning for tool failures
 | 
			
		||||
   - Risk-based recommendation portfolios
 | 
			
		||||
   - Sensitivity analysis for critical decisions
 | 
			
		||||
 | 
			
		||||
#### 5.2 Reproducibility Testing Framework
 | 
			
		||||
**Objective**: Ensure consistent results across time and implementations
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Build reproducibility testing** (`src/testing/ReproducibilityTester.ts`):
 | 
			
		||||
   - Automated consistency validation
 | 
			
		||||
   - Inter-rater reliability testing
 | 
			
		||||
   - Cross-temporal stability analysis
 | 
			
		||||
   - Version control for methodology changes
 | 
			
		||||
 | 
			
		||||
2. **Implement quality assurance**:
 | 
			
		||||
   - Continuous integration for reproducibility
 | 
			
		||||
   - Regression testing for methodology changes
 | 
			
		||||
   - Performance monitoring for consistency
 | 
			
		||||
   - Alert system for unexpected variations
 | 
			
		||||
 | 
			
		||||
### PHASE 6: Integration & Production Readiness (Weeks 9-12)
 | 
			
		||||
 | 
			
		||||
#### 6.1 System Integration
 | 
			
		||||
**Objective**: Integrate all forensic-grade components seamlessly
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Update existing components**:
 | 
			
		||||
   - Modify `aiPipeline.ts` to use new scoring framework
 | 
			
		||||
   - Update `embeddings.ts` with evidence tracking
 | 
			
		||||
   - Enhance `rateLimitedQueue.ts` with audit capabilities
 | 
			
		||||
   - Refactor `query.ts` API to return audit trails
 | 
			
		||||
 | 
			
		||||
2. **Performance optimization**:
 | 
			
		||||
   - Caching strategies for expensive evidence lookups
 | 
			
		||||
   - Parallel processing for scoring criteria
 | 
			
		||||
   - Efficient storage for audit trails
 | 
			
		||||
   - Load balancing for dual AI models
 | 
			
		||||
 | 
			
		||||
#### 6.2 Production Features
 | 
			
		||||
**Objective**: Make system ready for professional forensic use
 | 
			
		||||
 | 
			
		||||
**Tasks**:
 | 
			
		||||
1. **Add professional features**:
 | 
			
		||||
   - Export recommendations to forensic report formats
 | 
			
		||||
   - Integration with existing forensic workflows
 | 
			
		||||
   - Batch processing for multiple scenarios
 | 
			
		||||
   - API endpoints for external tool integration
 | 
			
		||||
 | 
			
		||||
2. **Implement monitoring & maintenance**:
 | 
			
		||||
   - Health checks for all system components
 | 
			
		||||
   - Performance monitoring for response times
 | 
			
		||||
   - Error tracking and alerting
 | 
			
		||||
   - Automatic system updates for new evidence
 | 
			
		||||
 | 
			
		||||
## Technical Implementation Guidelines
 | 
			
		||||
 | 
			
		||||
### Configuration Management
 | 
			
		||||
- Use YAML files for human-readable configuration
 | 
			
		||||
- Implement JSON Schema validation for all config files
 | 
			
		||||
- Support environment variable overrides
 | 
			
		||||
- Hot-reload for development, restart for production changes
 | 
			
		||||
 | 
			
		||||
### AI Model Routing Strategy
 | 
			
		||||
```typescript
 | 
			
		||||
// Task Classification for Model Selection
 | 
			
		||||
const AI_TASK_ROUTING = {
 | 
			
		||||
  strategic: ['tool-selection', 'bias-analysis', 'methodology-decisions'],
 | 
			
		||||
  content: ['descriptions', 'explanations', 'micro-tasks', 'workflows']
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Cost Optimization Logic
 | 
			
		||||
if (taskComplexity === 'high' && responseTokens < 500) {
 | 
			
		||||
  useModel = 'large';
 | 
			
		||||
} else if (taskComplexity === 'low' && responseTokens > 1000) {
 | 
			
		||||
  useModel = 'small';
 | 
			
		||||
} else {
 | 
			
		||||
  useModel = config.defaultModel;
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Evidence Database Structure
 | 
			
		||||
```typescript
 | 
			
		||||
interface EvidenceSource {
 | 
			
		||||
  type: 'standard' | 'paper' | 'case-law' | 'expert-survey';
 | 
			
		||||
  citation: string;
 | 
			
		||||
  reliability: number;
 | 
			
		||||
  lastValidated: Date;
 | 
			
		||||
  content: string;
 | 
			
		||||
  metadata: Record<string, any>;
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Quality Assurance Requirements
 | 
			
		||||
- All scoring criteria must have documented methodologies
 | 
			
		||||
- Every recommendation must include confidence levels
 | 
			
		||||
- All AI-generated content must be marked as such
 | 
			
		||||
- Reproducibility tests must pass with >95% consistency
 | 
			
		||||
- Expert validation rate must exceed 80% for production use
 | 
			
		||||
 | 
			
		||||
## Success Metrics
 | 
			
		||||
 | 
			
		||||
### Forensic Quality Metrics
 | 
			
		||||
- **Transparency**: 100% of decisions traceable to evidence
 | 
			
		||||
- **Objectivity**: <5% variance in scoring between runs
 | 
			
		||||
- **Reproducibility**: >95% identical results for identical inputs
 | 
			
		||||
- **Expert Agreement**: >80% expert validation rate
 | 
			
		||||
- **Bias Reduction**: <10% bias score across all categories
 | 
			
		||||
 | 
			
		||||
### Performance Metrics  
 | 
			
		||||
- **Response Time**: <30 seconds for workflow recommendations
 | 
			
		||||
- **Accuracy**: >90% real-world case validation success
 | 
			
		||||
- **Coverage**: Support for >95% of common forensic scenarios
 | 
			
		||||
- **Reliability**: <1% system error rate
 | 
			
		||||
- **Cost Efficiency**: <50% cost reduction vs. single large model
 | 
			
		||||
 | 
			
		||||
## Risk Mitigation
 | 
			
		||||
 | 
			
		||||
### Technical Risks
 | 
			
		||||
- **AI Model Failures**: Implement robust fallback mechanisms
 | 
			
		||||
- **Configuration Errors**: Comprehensive validation and testing
 | 
			
		||||
- **Performance Issues**: Load testing and optimization
 | 
			
		||||
- **Data Corruption**: Backup and recovery procedures
 | 
			
		||||
 | 
			
		||||
### Forensic Risks
 | 
			
		||||
- **Bias Introduction**: Continuous monitoring and expert validation
 | 
			
		||||
- **Methodology Errors**: Peer review and scientific validation
 | 
			
		||||
- **Legal Challenges**: Ensure compliance with admissibility standards
 | 
			
		||||
- **Expert Disagreement**: Transparent uncertainty communication
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -91,119 +91,137 @@ const sortedTags = Object.entries(tagFrequency)
 | 
			
		||||
    </div>
 | 
			
		||||
  </div>
 | 
			
		||||
 | 
			
		||||
  <!-- Advanced Filters Section -->
 | 
			
		||||
  <!-- Advanced Filters Section - COLLAPSIBLE -->
 | 
			
		||||
  <div class="filter-section">
 | 
			
		||||
    <div class="filter-card-compact">
 | 
			
		||||
      <div class="filter-header-compact">
 | 
			
		||||
        <h3>⚙️ Erweiterte Filter</h3>
 | 
			
		||||
        <button class="filter-reset" id="reset-advanced" title="Erweiterte Filter zurücksetzen">
 | 
			
		||||
          <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
			
		||||
            <polyline points="1 4 1 10 7 10"/>
 | 
			
		||||
            <path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
 | 
			
		||||
          </svg>
 | 
			
		||||
        </button>
 | 
			
		||||
        <div class="filter-header-controls">
 | 
			
		||||
          <button class="filter-reset" id="reset-advanced" title="Erweiterte Filter zurücksetzen">
 | 
			
		||||
            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
			
		||||
              <polyline points="1 4 1 10 7 10"/>
 | 
			
		||||
              <path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
 | 
			
		||||
            </svg>
 | 
			
		||||
          </button>
 | 
			
		||||
          <button class="collapse-toggle" id="toggle-advanced" data-collapsed="true" title="Erweiterte Filter ein/ausblenden">
 | 
			
		||||
            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
			
		||||
              <polyline points="6 9 12 15 18 9"></polyline>
 | 
			
		||||
            </svg>
 | 
			
		||||
          </button>
 | 
			
		||||
        </div>
 | 
			
		||||
      </div>
 | 
			
		||||
      
 | 
			
		||||
      <div class="advanced-filters-compact">
 | 
			
		||||
        <div class="filter-grid-compact">
 | 
			
		||||
          <div class="filter-group">
 | 
			
		||||
            <label class="filter-label">Tool-Typ</label>
 | 
			
		||||
            <select id="type-select" class="filter-select">
 | 
			
		||||
              <option value="">Alle Typen</option>
 | 
			
		||||
              {toolTypes.map((type: string) => (
 | 
			
		||||
                <option value={type}>{type}</option>
 | 
			
		||||
              ))}
 | 
			
		||||
            </select>
 | 
			
		||||
      <div class="collapsible-content hidden" id="advanced-filters-content">
 | 
			
		||||
        <div class="advanced-filters-compact">
 | 
			
		||||
          <div class="filter-grid-compact">
 | 
			
		||||
            <div class="filter-group">
 | 
			
		||||
              <label class="filter-label">Tool-Typ</label>
 | 
			
		||||
              <select id="type-select" class="filter-select">
 | 
			
		||||
                <option value="">Alle Typen</option>
 | 
			
		||||
                {toolTypes.map((type: string) => (
 | 
			
		||||
                  <option value={type}>{type}</option>
 | 
			
		||||
                ))}
 | 
			
		||||
              </select>
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
            <div class="filter-group">
 | 
			
		||||
              <label class="filter-label">Skill Level</label>
 | 
			
		||||
              <select id="skill-select" class="filter-select">
 | 
			
		||||
                <option value="">Alle Level</option>
 | 
			
		||||
                {skillLevels.map((level: string) => (
 | 
			
		||||
                  <option value={level}>{level}</option>
 | 
			
		||||
                ))}
 | 
			
		||||
              </select>
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
            <div class="filter-group">
 | 
			
		||||
              <label class="filter-label">Plattform</label>
 | 
			
		||||
              <select id="platform-select" class="filter-select">
 | 
			
		||||
                <option value="">Alle Plattformen</option>
 | 
			
		||||
                {platforms.map((platform: string) => (
 | 
			
		||||
                  <option value={platform}>{platform}</option>
 | 
			
		||||
                ))}
 | 
			
		||||
              </select>
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
            <div class="filter-group">
 | 
			
		||||
              <label class="filter-label">Lizenztyp</label>
 | 
			
		||||
              <select id="license-select" class="filter-select">
 | 
			
		||||
                <option value="">Alle Lizenzen</option>
 | 
			
		||||
                {licenses.map((license: string) => (
 | 
			
		||||
                  <option value={license}>{license}</option>
 | 
			
		||||
                ))}
 | 
			
		||||
              </select>
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
            <div class="filter-group">
 | 
			
		||||
              <label class="filter-label">Zugangsart</label>
 | 
			
		||||
              <select id="access-select" class="filter-select">
 | 
			
		||||
                <option value="">Alle Zugangsarten</option>
 | 
			
		||||
                {accessTypes.map((access: string) => (
 | 
			
		||||
                  <option value={access}>{access}</option>
 | 
			
		||||
                ))}
 | 
			
		||||
              </select>
 | 
			
		||||
            </div>
 | 
			
		||||
          </div>
 | 
			
		||||
 | 
			
		||||
          <div class="filter-group">
 | 
			
		||||
            <label class="filter-label">Skill Level</label>
 | 
			
		||||
            <select id="skill-select" class="filter-select">
 | 
			
		||||
              <option value="">Alle Level</option>
 | 
			
		||||
              {skillLevels.map((level: string) => (
 | 
			
		||||
                <option value={level}>{level}</option>
 | 
			
		||||
              ))}
 | 
			
		||||
            </select>
 | 
			
		||||
          <div class="filter-toggles-compact">
 | 
			
		||||
            <label class="toggle-wrapper">
 | 
			
		||||
              <input type="checkbox" id="hosted-only" />
 | 
			
		||||
              <span class="toggle-label">🟣 Nur CC24-Server Tools</span>
 | 
			
		||||
            </label>
 | 
			
		||||
            
 | 
			
		||||
            <label class="toggle-wrapper">
 | 
			
		||||
              <input type="checkbox" id="knowledgebase-only" />
 | 
			
		||||
              <span class="toggle-label">📖 Nur Tools mit Knowledgebase</span>
 | 
			
		||||
            </label>
 | 
			
		||||
          </div>
 | 
			
		||||
 | 
			
		||||
          <div class="filter-group">
 | 
			
		||||
            <label class="filter-label">Plattform</label>
 | 
			
		||||
            <select id="platform-select" class="filter-select">
 | 
			
		||||
              <option value="">Alle Plattformen</option>
 | 
			
		||||
              {platforms.map((platform: string) => (
 | 
			
		||||
                <option value={platform}>{platform}</option>
 | 
			
		||||
              ))}
 | 
			
		||||
            </select>
 | 
			
		||||
          </div>
 | 
			
		||||
 | 
			
		||||
          <div class="filter-group">
 | 
			
		||||
            <label class="filter-label">Lizenztyp</label>
 | 
			
		||||
            <select id="license-select" class="filter-select">
 | 
			
		||||
              <option value="">Alle Lizenzen</option>
 | 
			
		||||
              {licenses.map((license: string) => (
 | 
			
		||||
                <option value={license}>{license}</option>
 | 
			
		||||
              ))}
 | 
			
		||||
            </select>
 | 
			
		||||
          </div>
 | 
			
		||||
 | 
			
		||||
          <div class="filter-group">
 | 
			
		||||
            <label class="filter-label">Zugangsart</label>
 | 
			
		||||
            <select id="access-select" class="filter-select">
 | 
			
		||||
              <option value="">Alle Zugangsarten</option>
 | 
			
		||||
              {accessTypes.map((access: string) => (
 | 
			
		||||
                <option value={access}>{access}</option>
 | 
			
		||||
              ))}
 | 
			
		||||
            </select>
 | 
			
		||||
          </div>
 | 
			
		||||
        </div>
 | 
			
		||||
 | 
			
		||||
        <div class="filter-toggles-compact">
 | 
			
		||||
          <label class="toggle-wrapper">
 | 
			
		||||
            <input type="checkbox" id="hosted-only" />
 | 
			
		||||
            <span class="toggle-label">🟣 Nur CC24-Server Tools</span>
 | 
			
		||||
          </label>
 | 
			
		||||
          
 | 
			
		||||
          <label class="toggle-wrapper">
 | 
			
		||||
            <input type="checkbox" id="knowledgebase-only" />
 | 
			
		||||
            <span class="toggle-label">📖 Nur Tools mit Knowledgebase</span>
 | 
			
		||||
          </label>
 | 
			
		||||
        </div>
 | 
			
		||||
      </div>
 | 
			
		||||
    </div>
 | 
			
		||||
  </div>
 | 
			
		||||
 | 
			
		||||
  <!-- Tag Filters Section -->
 | 
			
		||||
  <!-- Tag Filters Section - COLLAPSIBLE -->
 | 
			
		||||
  <div class="filter-section">
 | 
			
		||||
    <div class="filter-card-compact">
 | 
			
		||||
      <div class="filter-header-compact">
 | 
			
		||||
        <h3>🏷️ Tag-Filter</h3>
 | 
			
		||||
        <div class="tag-controls">
 | 
			
		||||
        <div class="filter-header-controls">
 | 
			
		||||
          <button class="filter-reset" id="reset-tags" title="Tags zurücksetzen">
 | 
			
		||||
            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
			
		||||
              <polyline points="1 4 1 10 7 10"/>
 | 
			
		||||
              <path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
 | 
			
		||||
            </svg>
 | 
			
		||||
          </button>
 | 
			
		||||
          <button id="tag-cloud-toggle" class="tag-toggle" data-expanded="false">
 | 
			
		||||
            Mehr zeigen
 | 
			
		||||
          <button class="collapse-toggle" id="toggle-tags" data-collapsed="true" title="Tag-Filter ein/ausblenden">
 | 
			
		||||
            <svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
 | 
			
		||||
              <polyline points="6 9 12 15 18 9"></polyline>
 | 
			
		||||
            </svg>
 | 
			
		||||
          </button>
 | 
			
		||||
        </div>
 | 
			
		||||
      </div>
 | 
			
		||||
      
 | 
			
		||||
      <div class="tag-section">
 | 
			
		||||
        <div class="selected-tags" id="selected-tags"></div>
 | 
			
		||||
        <div class="tag-cloud" id="tag-cloud">
 | 
			
		||||
          {sortedTags.map((tag, index) => (
 | 
			
		||||
            <button 
 | 
			
		||||
              class="tag-cloud-item" 
 | 
			
		||||
              data-tag={tag}
 | 
			
		||||
              data-frequency={tagFrequency[tag]}
 | 
			
		||||
              data-index={index}
 | 
			
		||||
            >
 | 
			
		||||
              {tag}
 | 
			
		||||
              <span class="tag-frequency">({tagFrequency[tag]})</span>
 | 
			
		||||
      <div class="collapsible-content hidden" id="tag-filters-content">
 | 
			
		||||
        <div class="tag-section">
 | 
			
		||||
          <div class="selected-tags" id="selected-tags"></div>
 | 
			
		||||
          <div class="tag-controls">
 | 
			
		||||
            <button id="tag-cloud-toggle" class="tag-toggle" data-expanded="false">
 | 
			
		||||
              Mehr zeigen
 | 
			
		||||
            </button>
 | 
			
		||||
          ))}
 | 
			
		||||
          </div>
 | 
			
		||||
          <div class="tag-cloud" id="tag-cloud">
 | 
			
		||||
            {sortedTags.map((tag, index) => (
 | 
			
		||||
              <button 
 | 
			
		||||
                class="tag-cloud-item" 
 | 
			
		||||
                data-tag={tag}
 | 
			
		||||
                data-frequency={tagFrequency[tag]}
 | 
			
		||||
                data-index={index}
 | 
			
		||||
              >
 | 
			
		||||
                {tag}
 | 
			
		||||
                <span class="tag-frequency">({tagFrequency[tag]})</span>
 | 
			
		||||
              </button>
 | 
			
		||||
            ))}
 | 
			
		||||
          </div>
 | 
			
		||||
        </div>
 | 
			
		||||
      </div>
 | 
			
		||||
    </div>
 | 
			
		||||
@ -293,7 +311,12 @@ const sortedTags = Object.entries(tagFrequency)
 | 
			
		||||
        advanced: document.getElementById('reset-advanced'),
 | 
			
		||||
        tags: document.getElementById('reset-tags'),
 | 
			
		||||
        all: document.getElementById('reset-all-filters')
 | 
			
		||||
      }
 | 
			
		||||
      },
 | 
			
		||||
      // Collapsible elements
 | 
			
		||||
      toggleAdvanced: document.getElementById('toggle-advanced'),
 | 
			
		||||
      toggleTags: document.getElementById('toggle-tags'),
 | 
			
		||||
      advancedContent: document.getElementById('advanced-filters-content'),
 | 
			
		||||
      tagContent: document.getElementById('tag-filters-content')
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    // Verify critical elements exist
 | 
			
		||||
@ -307,6 +330,52 @@ const sortedTags = Object.entries(tagFrequency)
 | 
			
		||||
    let selectedPhase = '';
 | 
			
		||||
    let isTagCloudExpanded = false;
 | 
			
		||||
    
 | 
			
		||||
    // Collapsible functionality
 | 
			
		||||
    function toggleCollapsible(toggleBtn, content, storageKey) {
 | 
			
		||||
      const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true';
 | 
			
		||||
      const newState = !isCollapsed;
 | 
			
		||||
      
 | 
			
		||||
      toggleBtn.setAttribute('data-collapsed', newState.toString());
 | 
			
		||||
      
 | 
			
		||||
      if (newState) {
 | 
			
		||||
        // Collapse
 | 
			
		||||
        content.classList.add('hidden');
 | 
			
		||||
        toggleBtn.style.transform = 'rotate(0deg)';
 | 
			
		||||
      } else {
 | 
			
		||||
        // Expand
 | 
			
		||||
        content.classList.remove('hidden');
 | 
			
		||||
        toggleBtn.style.transform = 'rotate(180deg)';
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // Store state in sessionStorage
 | 
			
		||||
      sessionStorage.setItem(storageKey, newState.toString());
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // Initialize collapsible sections (collapsed by default)
 | 
			
		||||
    function initializeCollapsible() {
 | 
			
		||||
      // Advanced filters
 | 
			
		||||
      const advancedCollapsed = sessionStorage.getItem('advanced-collapsed') !== 'false';
 | 
			
		||||
      elements.toggleAdvanced.setAttribute('data-collapsed', advancedCollapsed.toString());
 | 
			
		||||
      if (advancedCollapsed) {
 | 
			
		||||
        elements.advancedContent.classList.add('hidden');
 | 
			
		||||
        elements.toggleAdvanced.style.transform = 'rotate(0deg)';
 | 
			
		||||
      } else {
 | 
			
		||||
        elements.advancedContent.classList.remove('hidden');
 | 
			
		||||
        elements.toggleAdvanced.style.transform = 'rotate(180deg)';
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // Tag filters
 | 
			
		||||
      const tagsCollapsed = sessionStorage.getItem('tags-collapsed') !== 'false';
 | 
			
		||||
      elements.toggleTags.setAttribute('data-collapsed', tagsCollapsed.toString());
 | 
			
		||||
      if (tagsCollapsed) {
 | 
			
		||||
        elements.tagContent.classList.add('hidden');
 | 
			
		||||
        elements.toggleTags.style.transform = 'rotate(0deg)';
 | 
			
		||||
      } else {
 | 
			
		||||
        elements.tagContent.classList.remove('hidden');
 | 
			
		||||
        elements.toggleTags.style.transform = 'rotate(180deg)';
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // Helper function to check if tool is hosted
 | 
			
		||||
    function isToolHosted(tool) {
 | 
			
		||||
      return tool.projectUrl !== undefined && 
 | 
			
		||||
@ -418,18 +487,23 @@ const sortedTags = Object.entries(tagFrequency)
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    // Add/remove tags
 | 
			
		||||
    // Add/remove tags - FIXED: Update ALL matching elements
 | 
			
		||||
    function addTag(tag) {
 | 
			
		||||
      selectedTags.add(tag);
 | 
			
		||||
      document.querySelector(`[data-tag="${tag}"]`).classList.add('active');
 | 
			
		||||
      // FIXED: Use querySelectorAll to update ALL matching tag elements
 | 
			
		||||
      document.querySelectorAll(`[data-tag="${tag}"]`).forEach(element => {
 | 
			
		||||
        element.classList.add('active');
 | 
			
		||||
      });
 | 
			
		||||
      updateSelectedTags();
 | 
			
		||||
      filterTools();
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    function removeTag(tag) {
 | 
			
		||||
      selectedTags.delete(tag);
 | 
			
		||||
      const tagElement = document.querySelector(`[data-tag="${tag}"]`);
 | 
			
		||||
      if (tagElement) tagElement.classList.remove('active');
 | 
			
		||||
      // FIXED: Use querySelectorAll to update ALL matching tag elements
 | 
			
		||||
      document.querySelectorAll(`[data-tag="${tag}"]`).forEach(element => {
 | 
			
		||||
        element.classList.remove('active');
 | 
			
		||||
      });
 | 
			
		||||
      updateSelectedTags();
 | 
			
		||||
      filterTools();
 | 
			
		||||
    }
 | 
			
		||||
@ -553,7 +627,10 @@ const sortedTags = Object.entries(tagFrequency)
 | 
			
		||||
    
 | 
			
		||||
    function resetTags() {
 | 
			
		||||
      selectedTags.clear();
 | 
			
		||||
      elements.tagCloudItems.forEach(item => item.classList.remove('active'));
 | 
			
		||||
      // FIXED: Update ALL tag elements
 | 
			
		||||
      document.querySelectorAll('.tag-cloud-item').forEach(item => {
 | 
			
		||||
        item.classList.remove('active');
 | 
			
		||||
      });
 | 
			
		||||
      updateSelectedTags();
 | 
			
		||||
      filterTools();
 | 
			
		||||
    }
 | 
			
		||||
@ -630,11 +707,21 @@ const sortedTags = Object.entries(tagFrequency)
 | 
			
		||||
    elements.resetButtons.tags.addEventListener('click', resetTags);
 | 
			
		||||
    elements.resetButtons.all.addEventListener('click', resetAllFilters);
 | 
			
		||||
    
 | 
			
		||||
    // Collapsible toggle listeners
 | 
			
		||||
    elements.toggleAdvanced.addEventListener('click', () => {
 | 
			
		||||
      toggleCollapsible(elements.toggleAdvanced, elements.advancedContent, 'advanced-collapsed');
 | 
			
		||||
    });
 | 
			
		||||
    
 | 
			
		||||
    elements.toggleTags.addEventListener('click', () => {
 | 
			
		||||
      toggleCollapsible(elements.toggleTags, elements.tagContent, 'tags-collapsed');
 | 
			
		||||
    });
 | 
			
		||||
    
 | 
			
		||||
    // Expose functions globally for backwards compatibility
 | 
			
		||||
    window.clearTagFilters = resetTags;
 | 
			
		||||
    window.clearAllFilters = resetAllFilters;
 | 
			
		||||
    
 | 
			
		||||
    // Initialize
 | 
			
		||||
    initializeCollapsible();
 | 
			
		||||
    initTagCloud();
 | 
			
		||||
    filterTagCloud();
 | 
			
		||||
    updateSelectedTags();
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										22
									
								
								src/pages/api/ai/embeddings.status.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								src/pages/api/ai/embeddings.status.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,22 @@
 | 
			
		||||
// src/pages/api/ai/embeddings-status.ts
 | 
			
		||||
import type { APIRoute } from 'astro';
 | 
			
		||||
import { embeddingsService } from '../../../utils/embeddings.js';
 | 
			
		||||
import { apiResponse, apiServerError } from '../../../utils/api.js';
 | 
			
		||||
 | 
			
		||||
export const prerender = false;
 | 
			
		||||
 | 
			
		||||
export const GET: APIRoute = async () => {
 | 
			
		||||
  try {
 | 
			
		||||
    const stats = embeddingsService.getStats();
 | 
			
		||||
    
 | 
			
		||||
    return apiResponse.success({
 | 
			
		||||
      embeddings: stats,
 | 
			
		||||
      timestamp: new Date().toISOString(),
 | 
			
		||||
      status: stats.enabled && stats.initialized ? 'ready' : 
 | 
			
		||||
             stats.enabled && !stats.initialized ? 'initializing' : 'disabled'
 | 
			
		||||
    });
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    console.error('Embeddings status error:', error);
 | 
			
		||||
    return apiServerError.internal('Failed to get embeddings status');
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
@ -1,4 +1,4 @@
 | 
			
		||||
// src/pages/api/ai/enhance-input.ts
 | 
			
		||||
// src/pages/api/ai/enhance-input.ts - ENHANCED with forensics methodology
 | 
			
		||||
import type { APIRoute } from 'astro';
 | 
			
		||||
import { withAPIAuth } from '../../../utils/auth.js';
 | 
			
		||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
 | 
			
		||||
@ -14,7 +14,11 @@ function getEnv(key: string): string {
 | 
			
		||||
  return value;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const AI_MODEL = getEnv('AI_MODEL');
 | 
			
		||||
// Use the analyzer AI for smart prompting (smaller, faster model)
 | 
			
		||||
const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
 | 
			
		||||
const AI_API_KEY = getEnv('AI_ANALYZER_API_KEY');
 | 
			
		||||
const AI_MODEL = getEnv('AI_ANALYZER_MODEL');
 | 
			
		||||
 | 
			
		||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
			
		||||
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
 | 
			
		||||
const RATE_LIMIT_MAX = 5; // 5 enhancement requests per minute per user
 | 
			
		||||
@ -59,29 +63,38 @@ function cleanupExpiredRateLimits() {
 | 
			
		||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
			
		||||
 | 
			
		||||
function createEnhancementPrompt(input: string): string {
 | 
			
		||||
  return `
 | 
			
		||||
Du bist eine KI für digitale Forensik. Der Nutzer beschreibt ein forensisches Szenario. Analysiere die Eingabe.
 | 
			
		||||
  return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein forensisches Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte forensische Untersuchung.
 | 
			
		||||
 | 
			
		||||
Wenn die Beschreibung unvollständig oder vage ist, stelle bis zu drei präzise Rückfragen im JSON-Array-Format, um wichtige Details zu klären (z. B. Vorfalltyp, System, Ziel, Datenquellen, Zeit, Beteiligte, rechtlicher Rahmen).
 | 
			
		||||
ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
 | 
			
		||||
1. **Incident Context**: Was ist passiert? Welche Angriffsvektoren oder technischen Probleme liegen vor?
 | 
			
		||||
2. **Affected Systems**: Welche spezifischen Technologien/Plattformen sind betroffen? (Windows/Linux/ICS/SCADA/Mobile/Cloud/Network Infrastructure)
 | 
			
		||||
3. **Available Evidence**: Welche forensischen Datenquellen stehen zur Verfügung? (RAM-Dumps, Disk-Images, Log-Files, Network-Captures, Registry-Hives)
 | 
			
		||||
4. **Investigation Objectives**: Was soll erreicht werden? (IOC-Extraktion, Timeline-Rekonstruktion, Attribution, Impact-Assessment)
 | 
			
		||||
5. **Timeline Constraints**: Wie zeitkritisch ist die Untersuchung?
 | 
			
		||||
6. **Legal & Compliance**: Rechtliche Anforderungen, Chain of Custody, Compliance-Rahmen (DSGVO, sector-specific regulations)
 | 
			
		||||
7. **Technical Constraints**: Verfügbare Ressourcen, Skills, Infrastrukturbeschränkungen
 | 
			
		||||
 | 
			
		||||
Wenn die Eingabe bereits klar, spezifisch und vollständig ist, gib stattdessen nur eine leere Liste [] zurück.
 | 
			
		||||
WENN die Beschreibung alle kritischen forensischen Aspekte abdeckt: Geben Sie eine leere Liste [] zurück.
 | 
			
		||||
 | 
			
		||||
Antwortformat strikt:
 | 
			
		||||
WENN wichtige forensische Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte forensische Analyse schließen.
 | 
			
		||||
 | 
			
		||||
\`\`\`json
 | 
			
		||||
QUALITÄTSKRITERIEN FÜR FRAGEN:
 | 
			
		||||
- Forensisch spezifisch, nicht allgemein (❌ "Mehr Details?" ✅ "Welche forensischen Artefakte (RAM-Dumps, Disk-Images, Logs) stehen zur Verfügung?")
 | 
			
		||||
- Methodisch relevant (❌ "Wann passierte das?" ✅ "Liegen Log-Dateien aus dem Incident-Zeitraum vor, und welche Retention-Policy gilt?")
 | 
			
		||||
- Priorisiert nach Auswirkung auf die forensische Untersuchungsqualität
 | 
			
		||||
 | 
			
		||||
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
 | 
			
		||||
[
 | 
			
		||||
  "Frage 1?",
 | 
			
		||||
  "Frage 2?",
 | 
			
		||||
  "Frage 3?"
 | 
			
		||||
  "Forensisch spezifische Frage 1?",
 | 
			
		||||
  "Forensisch spezifische Frage 2?",
 | 
			
		||||
  "Forensisch spezifische Frage 3?"
 | 
			
		||||
]
 | 
			
		||||
\`\`\`
 | 
			
		||||
 | 
			
		||||
Nutzer-Eingabe:
 | 
			
		||||
NUTZER-EINGABE:
 | 
			
		||||
${input}
 | 
			
		||||
  `.trim();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
  try {
 | 
			
		||||
    const authResult = await withAPIAuth(request, 'ai');
 | 
			
		||||
@ -98,12 +111,12 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
    const body = await request.json();
 | 
			
		||||
    const { input } = body;
 | 
			
		||||
 | 
			
		||||
    if (!input || typeof input !== 'string' || input.length < 20) {
 | 
			
		||||
      return apiError.badRequest('Input too short for enhancement');
 | 
			
		||||
    if (!input || typeof input !== 'string' || input.length < 40) {
 | 
			
		||||
      return apiError.badRequest('Input too short for enhancement (minimum 40 characters)');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const sanitizedInput = sanitizeInput(input);
 | 
			
		||||
    if (sanitizedInput.length < 20) {
 | 
			
		||||
    if (sanitizedInput.length < 40) {
 | 
			
		||||
      return apiError.badRequest('Input too short after sanitization');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@ -111,11 +124,11 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
    const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
 | 
			
		||||
    
 | 
			
		||||
    const aiResponse = await enqueueApiCall(() =>
 | 
			
		||||
      fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
 | 
			
		||||
      fetch(`${AI_ENDPOINT}/v1/chat/completions`, {
 | 
			
		||||
        method: 'POST',
 | 
			
		||||
        headers: {
 | 
			
		||||
          'Content-Type': 'application/json',
 | 
			
		||||
          'Authorization': `Bearer ${process.env.AI_API_KEY}`
 | 
			
		||||
          'Authorization': `Bearer ${AI_API_KEY}`
 | 
			
		||||
        },
 | 
			
		||||
        body: JSON.stringify({
 | 
			
		||||
          model: AI_MODEL,
 | 
			
		||||
@ -125,8 +138,12 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
              content: systemPrompt
 | 
			
		||||
            }
 | 
			
		||||
          ],
 | 
			
		||||
          max_tokens: 200,
 | 
			
		||||
          temperature: 0.7
 | 
			
		||||
          max_tokens: 300,
 | 
			
		||||
          temperature: 0.7,
 | 
			
		||||
          // Enhanced: Better parameters for consistent forensics output
 | 
			
		||||
          top_p: 0.9,
 | 
			
		||||
          frequency_penalty: 0.2,
 | 
			
		||||
          presence_penalty: 0.1
 | 
			
		||||
        })
 | 
			
		||||
      }), taskId);
 | 
			
		||||
 | 
			
		||||
@ -144,36 +161,47 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
 | 
			
		||||
    let questions;
 | 
			
		||||
    try {
 | 
			
		||||
    const cleanedContent = aiContent
 | 
			
		||||
      const cleanedContent = aiContent
 | 
			
		||||
        .replace(/^```json\s*/i, '')
 | 
			
		||||
        .replace(/\s*```\s*$/, '')
 | 
			
		||||
        .trim();
 | 
			
		||||
    questions = JSON.parse(cleanedContent);
 | 
			
		||||
      questions = JSON.parse(cleanedContent);
 | 
			
		||||
      
 | 
			
		||||
      if (!Array.isArray(questions) || questions.length === 0) {
 | 
			
		||||
        throw new Error('Invalid questions format');
 | 
			
		||||
      if (!Array.isArray(questions)) {
 | 
			
		||||
        throw new Error('Response is not an array');
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // Validate and clean questions
 | 
			
		||||
      // Enhanced validation and cleaning for forensics context
 | 
			
		||||
      questions = questions
 | 
			
		||||
        .filter(q => typeof q === 'string' && q.length > 5 && q.length < 120)
 | 
			
		||||
        .slice(0, 3);
 | 
			
		||||
        .filter(q => typeof q === 'string' && q.length > 20 && q.length < 200) // More appropriate length for forensics questions
 | 
			
		||||
        .filter(q => q.includes('?')) // Must be a question
 | 
			
		||||
        .filter(q => {
 | 
			
		||||
          // Enhanced: Filter for forensics-relevant questions
 | 
			
		||||
          const forensicsTerms = ['forensisch', 'log', 'dump', 'image', 'artefakt', 'evidence', 'incident', 'system', 'netzwerk', 'zeitraum', 'verfügbar'];
 | 
			
		||||
          const lowerQ = q.toLowerCase();
 | 
			
		||||
          return forensicsTerms.some(term => lowerQ.includes(term));
 | 
			
		||||
        })
 | 
			
		||||
        .map(q => q.trim())
 | 
			
		||||
        .slice(0, 3); // Max 3 questions
 | 
			
		||||
        
 | 
			
		||||
      // If no valid forensics questions, return empty array (means input is complete)
 | 
			
		||||
      if (questions.length === 0) {
 | 
			
		||||
        throw new Error('No valid questions found');
 | 
			
		||||
        questions = [];
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('Failed to parse enhancement response:', aiContent);
 | 
			
		||||
      return apiServerError.unavailable('Invalid enhancement response format');
 | 
			
		||||
      // If parsing fails, assume input is complete enough
 | 
			
		||||
      questions = [];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(`[AI Enhancement] User: ${userId}, Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
 | 
			
		||||
    console.log(`[AI Enhancement] User: ${userId}, Forensics Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
 | 
			
		||||
 | 
			
		||||
    return new Response(JSON.stringify({
 | 
			
		||||
      success: true,
 | 
			
		||||
      questions,
 | 
			
		||||
      taskId
 | 
			
		||||
      taskId,
 | 
			
		||||
      inputComplete: questions.length === 0 // Flag to indicate if input seems complete
 | 
			
		||||
    }), {
 | 
			
		||||
      status: 200,
 | 
			
		||||
      headers: { 'Content-Type': 'application/json' }
 | 
			
		||||
 | 
			
		||||
@ -1,275 +1,105 @@
 | 
			
		||||
// src/pages/api/ai/query.ts
 | 
			
		||||
// src/pages/api/ai/query.ts - FIXED: Rate limiting for micro-task pipeline
 | 
			
		||||
 | 
			
		||||
import type { APIRoute } from 'astro';
 | 
			
		||||
import { withAPIAuth } from '../../../utils/auth.js';
 | 
			
		||||
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
 | 
			
		||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
 | 
			
		||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';   
 | 
			
		||||
import { aiPipeline } from '../../../utils/aiPipeline.js';
 | 
			
		||||
 | 
			
		||||
export const prerender = false;
 | 
			
		||||
 | 
			
		||||
function getEnv(key: string): string {
 | 
			
		||||
  const value = process.env[key];
 | 
			
		||||
  if (!value) {
 | 
			
		||||
    throw new Error(`Missing environment variable: ${key}`);
 | 
			
		||||
  }
 | 
			
		||||
  return value;
 | 
			
		||||
interface RateLimitData {
 | 
			
		||||
  count: number;
 | 
			
		||||
  resetTime: number;
 | 
			
		||||
  microTaskCount: number; 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const AI_MODEL = getEnv('AI_MODEL');
 | 
			
		||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
			
		||||
const RATE_LIMIT_WINDOW = 60 * 1000; 
 | 
			
		||||
const RATE_LIMIT_MAX = 10; 
 | 
			
		||||
const rateLimitStore = new Map<string, RateLimitData>();
 | 
			
		||||
 | 
			
		||||
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
 | 
			
		||||
const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '4', 10); 
 | 
			
		||||
const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10); 
 | 
			
		||||
 | 
			
		||||
function sanitizeInput(input: string): string {
 | 
			
		||||
  let sanitized = input
 | 
			
		||||
    .replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]') // Remove code blocks
 | 
			
		||||
    .replace(/\<\/?[^>]+(>|$)/g, '') // Remove HTML tags
 | 
			
		||||
    .replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
 | 
			
		||||
    .replace(/\<\/?[^>]+(>|$)/g, '')
 | 
			
		||||
    .replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
 | 
			
		||||
    .replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
 | 
			
		||||
    .trim();
 | 
			
		||||
  
 | 
			
		||||
  sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
 | 
			
		||||
  
 | 
			
		||||
  return sanitized;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function stripMarkdownJson(content: string): string {
 | 
			
		||||
  return content
 | 
			
		||||
    .replace(/^```json\s*/i, '')
 | 
			
		||||
    .replace(/\s*```\s*$/, '')
 | 
			
		||||
    .trim();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function checkRateLimit(userId: string): boolean {
 | 
			
		||||
function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
 | 
			
		||||
  const now = Date.now();
 | 
			
		||||
  const userLimit = rateLimitStore.get(userId);
 | 
			
		||||
  
 | 
			
		||||
  if (!userLimit || now > userLimit.resetTime) {
 | 
			
		||||
    rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
 | 
			
		||||
    return true;
 | 
			
		||||
    rateLimitStore.set(userId, { 
 | 
			
		||||
      count: 1, 
 | 
			
		||||
      resetTime: now + RATE_LIMIT_WINDOW,
 | 
			
		||||
      microTaskCount: 0 
 | 
			
		||||
    });
 | 
			
		||||
    return { 
 | 
			
		||||
      allowed: true, 
 | 
			
		||||
      microTasksRemaining: MICRO_TASK_TOTAL_LIMIT 
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if (userLimit.count >= RATE_LIMIT_MAX) {
 | 
			
		||||
    return false;
 | 
			
		||||
  if (userLimit.count >= MAIN_RATE_LIMIT_MAX) {
 | 
			
		||||
    return { 
 | 
			
		||||
      allowed: false, 
 | 
			
		||||
      reason: `Main rate limit exceeded. Max ${MAIN_RATE_LIMIT_MAX} requests per minute.`
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if (userLimit.microTaskCount >= MICRO_TASK_TOTAL_LIMIT) {
 | 
			
		||||
    return { 
 | 
			
		||||
      allowed: false, 
 | 
			
		||||
      reason: `Micro-task limit exceeded. Max ${MICRO_TASK_TOTAL_LIMIT} AI calls per minute.`
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  userLimit.count++;
 | 
			
		||||
  return true;
 | 
			
		||||
  
 | 
			
		||||
  return { 
 | 
			
		||||
    allowed: true, 
 | 
			
		||||
    microTasksRemaining: MICRO_TASK_TOTAL_LIMIT - userLimit.microTaskCount
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function incrementMicroTaskCount(userId: string, aiCallsMade: number): void {
 | 
			
		||||
  const userLimit = rateLimitStore.get(userId);
 | 
			
		||||
  if (userLimit) {
 | 
			
		||||
    userLimit.microTaskCount += aiCallsMade;
 | 
			
		||||
    console.log(`[RATE LIMIT] User ${userId} now at ${userLimit.microTaskCount}/${MICRO_TASK_TOTAL_LIMIT} micro-task calls`);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function cleanupExpiredRateLimits() {
 | 
			
		||||
  const now = Date.now();
 | 
			
		||||
  const maxStoreSize = 1000; 
 | 
			
		||||
  
 | 
			
		||||
  for (const [userId, limit] of rateLimitStore.entries()) {
 | 
			
		||||
    if (now > limit.resetTime) {
 | 
			
		||||
      rateLimitStore.delete(userId);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
			
		||||
 | 
			
		||||
async function loadToolsDatabase() {
 | 
			
		||||
  try {
 | 
			
		||||
    return await getCompressedToolsDataForAI();
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    console.error('Failed to load tools database:', error);
 | 
			
		||||
    throw new Error('Database unavailable');
 | 
			
		||||
  
 | 
			
		||||
  if (rateLimitStore.size > maxStoreSize) {
 | 
			
		||||
    const entries = Array.from(rateLimitStore.entries());
 | 
			
		||||
    entries.sort((a, b) => a[1].resetTime - b[1].resetTime);
 | 
			
		||||
    
 | 
			
		||||
    const toRemove = entries.slice(0, entries.length - maxStoreSize);
 | 
			
		||||
    toRemove.forEach(([userId]) => rateLimitStore.delete(userId));
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[RATE LIMIT] Cleanup: removed ${toRemove.length} old entries`);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function createWorkflowSystemPrompt(toolsData: any): string {
 | 
			
		||||
  const toolsList = toolsData.tools.map((tool: any) => ({
 | 
			
		||||
    name: tool.name,
 | 
			
		||||
    description: tool.description,
 | 
			
		||||
    domains: tool.domains,
 | 
			
		||||
    phases: tool.phases,
 | 
			
		||||
    domainAgnostic: tool['domain-agnostic-software'],
 | 
			
		||||
    platforms: tool.platforms,
 | 
			
		||||
    skillLevel: tool.skillLevel,
 | 
			
		||||
    license: tool.license,
 | 
			
		||||
    tags: tool.tags,
 | 
			
		||||
    related_concepts: tool.related_concepts || []
 | 
			
		||||
  }));
 | 
			
		||||
 | 
			
		||||
  const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
			
		||||
    name: concept.name,
 | 
			
		||||
    description: concept.description,
 | 
			
		||||
    domains: concept.domains,
 | 
			
		||||
    phases: concept.phases,
 | 
			
		||||
    skillLevel: concept.skillLevel,
 | 
			
		||||
    tags: concept.tags
 | 
			
		||||
  }));
 | 
			
		||||
 | 
			
		||||
  const regularPhases = toolsData.phases || [];
 | 
			
		||||
  
 | 
			
		||||
  const domainAgnosticSoftware = toolsData['domain-agnostic-software'] || [];
 | 
			
		||||
  
 | 
			
		||||
  const allPhaseItems = [
 | 
			
		||||
    ...regularPhases,
 | 
			
		||||
    ...domainAgnosticSoftware
 | 
			
		||||
  ];
 | 
			
		||||
  
 | 
			
		||||
  const phasesDescription = allPhaseItems.map((phase: any) => 
 | 
			
		||||
    `- ${phase.id}: ${phase.name}`
 | 
			
		||||
  ).join('\n');
 | 
			
		||||
 | 
			
		||||
  const domainsDescription = toolsData.domains.map((domain: any) => 
 | 
			
		||||
    `- ${domain.id}: ${domain.name}`
 | 
			
		||||
  ).join('\n');
 | 
			
		||||
 | 
			
		||||
  const phaseDescriptions = regularPhases.map((phase: any) => 
 | 
			
		||||
    `- ${phase.name}: ${phase.description || 'Tools/Methods for this phase'}`
 | 
			
		||||
  ).join('\n');
 | 
			
		||||
 | 
			
		||||
  const domainAgnosticDescriptions = domainAgnosticSoftware.map((section: any) => 
 | 
			
		||||
    `- ${section.name}: ${section.description || 'Cross-cutting software and platforms'}`
 | 
			
		||||
  ).join('\n');
 | 
			
		||||
 | 
			
		||||
  const validPhases = [
 | 
			
		||||
    ...regularPhases.map((p: any) => p.id),
 | 
			
		||||
    ...domainAgnosticSoftware.map((s: any) => s.id)
 | 
			
		||||
  ].join('|');
 | 
			
		||||
 | 
			
		||||
  return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der Ermittlern bei der Auswahl von Software und Methoden hilft.
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE TOOLS/METHODEN:
 | 
			
		||||
${JSON.stringify(toolsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
 | 
			
		||||
${JSON.stringify(conceptsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
UNTERSUCHUNGSPHASEN (NIST Framework):
 | 
			
		||||
${phasesDescription}
 | 
			
		||||
 | 
			
		||||
FORENSISCHE DOMÄNEN:
 | 
			
		||||
${domainsDescription}
 | 
			
		||||
 | 
			
		||||
WICHTIGE REGELN:
 | 
			
		||||
1. Pro Phase 2-3 Tools/Methoden empfehlen (immer mindestens 2 wenn verfügbar)
 | 
			
		||||
2. Tools/Methoden können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool/Methode für jede Phase zu empfehlen, selbst wenn die Priorität "low" ist.
 | 
			
		||||
3. Für Reporting-Phase: Visualisierungs- und Dokumentationssoftware einschließen
 | 
			
		||||
4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug.
 | 
			
		||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
			
		||||
6. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
 | 
			
		||||
7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an, wenn proprietäre Software besser geeignet ist.
 | 
			
		||||
8. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
			
		||||
9. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
			
		||||
 | 
			
		||||
ENHANCED CONTEXTUAL ANALYSIS:
 | 
			
		||||
10. Analysiere das Szenario detailliert und identifiziere Schlüsselelemente, Bedrohungen und forensische Herausforderungen
 | 
			
		||||
11. Entwickle einen strategischen Untersuchungsansatz basierend auf dem spezifischen Szenario
 | 
			
		||||
12. Identifiziere zeitkritische oder besonders wichtige Faktoren für diesen Fall
 | 
			
		||||
 | 
			
		||||
SOFTWARE/METHODEN-AUSWAHL NACH PHASE:
 | 
			
		||||
${phaseDescriptions}
 | 
			
		||||
 | 
			
		||||
DOMÄNENAGNOSTISCHE SOFTWARE/METHODEN:
 | 
			
		||||
${domainAgnosticDescriptions}
 | 
			
		||||
 | 
			
		||||
ANTWORT-FORMAT (strict JSON):
 | 
			
		||||
{
 | 
			
		||||
  "scenario_analysis": "Detaillierte Analyse des Szenarios: Erkannte Schlüsselelemente, Art des Vorfalls, betroffene Systeme, potentielle Bedrohungen und forensische Herausforderungen",
 | 
			
		||||
  "investigation_approach": "Strategischer Untersuchungsansatz für dieses spezifische Szenario: Prioritäten, Reihenfolge der Phasen, besondere Überlegungen",
 | 
			
		||||
  "critical_considerations": "Zeitkritische Faktoren, wichtige Sicherheitsaspekte oder besondere Vorsichtsmaßnahmen für diesen Fall",
 | 
			
		||||
  "recommended_tools": [
 | 
			
		||||
    {
 | 
			
		||||
      "name": "EXAKTER Name aus der Tools-Database",
 | 
			
		||||
      "priority": "high|medium|low", 
 | 
			
		||||
      "phase": "${validPhases}",
 | 
			
		||||
      "justification": "Warum diese Methode für diese Phase und dieses spezifische Szenario geeignet ist - mit Bezug zu den erkannten Schlüsselelementen"
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "workflow_suggestion": "Vorgeschlagener Untersuchungsablauf mit konkreten Schritten für dieses Szenario",
 | 
			
		||||
  "background_knowledge": [
 | 
			
		||||
    {
 | 
			
		||||
      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
			
		||||
      "relevance": "Warum dieses Konzept für das Szenario relevant ist, und bei welchen der empfohlenen Methoden/Tools."
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "additional_notes": "Wichtige Überlegungen und Hinweise"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function createToolSystemPrompt(toolsData: any): string {
 | 
			
		||||
  const toolsList = toolsData.tools.map((tool: any) => ({
 | 
			
		||||
    name: tool.name,
 | 
			
		||||
    description: tool.description,
 | 
			
		||||
    domains: tool.domains,
 | 
			
		||||
    phases: tool.phases,
 | 
			
		||||
    platforms: tool.platforms,
 | 
			
		||||
    skillLevel: tool.skillLevel,
 | 
			
		||||
    license: tool.license,
 | 
			
		||||
    tags: tool.tags,
 | 
			
		||||
    url: tool.url,
 | 
			
		||||
    projectUrl: tool.projectUrl,
 | 
			
		||||
    related_concepts: tool.related_concepts || []
 | 
			
		||||
  }));
 | 
			
		||||
 | 
			
		||||
  const conceptsList = toolsData.concepts.map((concept: any) => ({
 | 
			
		||||
    name: concept.name,
 | 
			
		||||
    description: concept.description,
 | 
			
		||||
    domains: concept.domains,
 | 
			
		||||
    phases: concept.phases,
 | 
			
		||||
    skillLevel: concept.skillLevel,
 | 
			
		||||
    tags: concept.tags
 | 
			
		||||
  }));
 | 
			
		||||
 | 
			
		||||
  return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der bei der Auswahl spezifischer Software/Methoden für konkrete Probleme hilft.
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE TOOLS/METHODEN:
 | 
			
		||||
${JSON.stringify(toolsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
 | 
			
		||||
${JSON.stringify(conceptsList, null, 2)}
 | 
			
		||||
 | 
			
		||||
WICHTIGE REGELN:
 | 
			
		||||
1. Analysiere das spezifische Problem/die Anforderung sorgfältig
 | 
			
		||||
2. Empfehle 1-3 Methoden/Tools, sortiert nach Eignung (beste Empfehlung zuerst)
 | 
			
		||||
3. Gib detaillierte Erklärungen, WARUM und WIE jede Methode/Tool das Problem löst
 | 
			
		||||
4. Berücksichtige praktische Aspekte: Skill Level, Plattformen, Verfügbarkeit
 | 
			
		||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
 | 
			
		||||
6. Gib konkrete Anwendungshinweise, nicht nur allgemeine Beschreibungen - Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
 | 
			
		||||
7. Erwähne sowohl Stärken als auch Schwächen/Limitationen
 | 
			
		||||
8. Schlage alternative Ansätze vor, wenn sinnvoll
 | 
			
		||||
9. Gib grundsätzliche Hinweise, WIE die Methode/Tool konkret eingesetzt wird
 | 
			
		||||
10. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
 | 
			
		||||
11. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
 | 
			
		||||
 | 
			
		||||
ENHANCED CONTEXTUAL ANALYSIS:
 | 
			
		||||
12. Analysiere das Problem detailliert und identifiziere technische Anforderungen, Herausforderungen und Erfolgsfaktoren
 | 
			
		||||
13. Entwickle einen strategischen Lösungsansatz basierend auf dem spezifischen Problem
 | 
			
		||||
14. Identifiziere wichtige Voraussetzungen oder Warnungen für die Anwendung
 | 
			
		||||
 | 
			
		||||
ANTWORT-FORMAT (strict JSON):
 | 
			
		||||
{
 | 
			
		||||
  "problem_analysis": "Detaillierte Analyse des Problems: Erkannte technische Anforderungen, Herausforderungen, benötigte Fähigkeiten und Erfolgsfaktoren",
 | 
			
		||||
  "investigation_approach": "Strategischer Lösungsansatz für dieses spezifische Problem: Herangehensweise, Prioritäten, optimale Anwendungsreihenfolge",
 | 
			
		||||
  "critical_considerations": "Wichtige Voraussetzungen, potentielle Fallstricke oder Warnungen für die Anwendung der empfohlenen Lösungen",
 | 
			
		||||
  "recommended_tools": [
 | 
			
		||||
    {
 | 
			
		||||
      "name": "EXAKTER Name aus der Tools-Database",
 | 
			
		||||
      "rank": 1,
 | 
			
		||||
      "suitability_score": "high|medium|low",
 | 
			
		||||
      "detailed_explanation": "Detaillierte Erklärung, warum dieses Tool/diese Methode das spezifische Problem löst - mit Bezug zu den erkannten Anforderungen",
 | 
			
		||||
      "implementation_approach": "Konkrete Schritte/Ansatz zur Anwendung für dieses spezifische Problem",
 | 
			
		||||
      "pros": ["Spezifische Vorteile für diesen Anwendungsfall", "Weitere Vorteile"],
 | 
			
		||||
      "cons": ["Potentielle Nachteile oder Limitationen", "Weitere Einschränkungen"],
 | 
			
		||||
      "alternatives": "Alternative Ansätze oder ergänzende Tools/Methoden, falls relevant"
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "background_knowledge": [
 | 
			
		||||
    {
 | 
			
		||||
      "concept_name": "EXAKTER Name aus der Konzepte-Database",
 | 
			
		||||
      "relevance": "Warum dieses Konzept für die empfohlenen Tools/das Problem relevant ist, und für welche der empfohlenen Methoden/Tools."
 | 
			
		||||
    }
 | 
			
		||||
  ],
 | 
			
		||||
  "additional_considerations": "Wichtige Überlegungen, Voraussetzungen oder Warnungen"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
 | 
			
		||||
}
 | 
			
		||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
			
		||||
 | 
			
		||||
export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
  try {
 | 
			
		||||
@ -280,161 +110,100 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
    
 | 
			
		||||
    const userId = authResult.userId;
 | 
			
		||||
 | 
			
		||||
    if (!checkRateLimit(userId)) {
 | 
			
		||||
      return apiError.rateLimit('Rate limit exceeded');
 | 
			
		||||
    const rateLimitResult = checkRateLimit(userId);
 | 
			
		||||
    if (!rateLimitResult.allowed) {
 | 
			
		||||
      return apiError.rateLimit(rateLimitResult.reason || 'Rate limit exceeded');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const body = await request.json();
 | 
			
		||||
    const { query, mode = 'workflow', taskId: clientTaskId } = body;
 | 
			
		||||
 | 
			
		||||
    // ADD THIS DEBUG LOGGING
 | 
			
		||||
    console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
 | 
			
		||||
    console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
 | 
			
		||||
    console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
 | 
			
		||||
 | 
			
		||||
    if (!query || typeof query !== 'string') {
 | 
			
		||||
      console.log(`[AI API] Invalid query for task ${clientTaskId}`);
 | 
			
		||||
      console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
 | 
			
		||||
      return apiError.badRequest('Query required');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!['workflow', 'tool'].includes(mode)) {
 | 
			
		||||
      console.log(`[AI API] Invalid mode for task ${clientTaskId}: ${mode}`);
 | 
			
		||||
      console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
 | 
			
		||||
      return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const sanitizedQuery = sanitizeInput(query);
 | 
			
		||||
    if (sanitizedQuery.includes('[FILTERED]')) {
 | 
			
		||||
      console.log(`[AI API] Filtered input detected for task ${clientTaskId}`);
 | 
			
		||||
      console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
 | 
			
		||||
      return apiError.badRequest('Invalid input detected');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const toolsData = await loadToolsDatabase();
 | 
			
		||||
 | 
			
		||||
    const systemPrompt = mode === 'workflow' 
 | 
			
		||||
      ? createWorkflowSystemPrompt(toolsData)
 | 
			
		||||
      : createToolSystemPrompt(toolsData);
 | 
			
		||||
    
 | 
			
		||||
    const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[AI API] About to enqueue task ${taskId}`);
 | 
			
		||||
    console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
 | 
			
		||||
    
 | 
			
		||||
    
 | 
			
		||||
    const aiResponse = await enqueueApiCall(() =>
 | 
			
		||||
      fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
 | 
			
		||||
        method: 'POST',
 | 
			
		||||
        headers: {
 | 
			
		||||
          'Content-Type': 'application/json',
 | 
			
		||||
          'Authorization': `Bearer ${process.env.AI_API_KEY}`
 | 
			
		||||
        },
 | 
			
		||||
        body: JSON.stringify({
 | 
			
		||||
          model: AI_MODEL,
 | 
			
		||||
          messages: [
 | 
			
		||||
            {
 | 
			
		||||
              role: 'system',
 | 
			
		||||
              content: systemPrompt
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
              role: 'user',
 | 
			
		||||
              content: sanitizedQuery
 | 
			
		||||
            }
 | 
			
		||||
          ],
 | 
			
		||||
          max_tokens: 3500,
 | 
			
		||||
          temperature: 0.3
 | 
			
		||||
        })
 | 
			
		||||
      })
 | 
			
		||||
    const result = await enqueueApiCall(() => 
 | 
			
		||||
      aiPipeline.processQuery(sanitizedQuery, mode)
 | 
			
		||||
    , taskId);
 | 
			
		||||
 | 
			
		||||
    if (!aiResponse.ok) {
 | 
			
		||||
      console.error('AI API error:', await aiResponse.text());
 | 
			
		||||
      return apiServerError.unavailable('AI service unavailable');
 | 
			
		||||
    if (!result || !result.recommendation) {
 | 
			
		||||
      return apiServerError.unavailable('No response from micro-task AI pipeline');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const aiData = await aiResponse.json();
 | 
			
		||||
    const aiContent = aiData.choices?.[0]?.message?.content;
 | 
			
		||||
    const stats = result.processingStats;
 | 
			
		||||
    const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
 | 
			
		||||
    incrementMicroTaskCount(userId, estimatedAICallsMade);
 | 
			
		||||
 | 
			
		||||
    if (!aiContent) {
 | 
			
		||||
      return apiServerError.unavailable('No response from AI');
 | 
			
		||||
    }
 | 
			
		||||
    console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
 | 
			
		||||
    console.log(`  - Mode: ${mode}`);
 | 
			
		||||
    console.log(`  - User: ${userId}`);
 | 
			
		||||
    console.log(`  - Query length: ${sanitizedQuery.length}`);
 | 
			
		||||
    console.log(`  - Processing time: ${stats.processingTimeMs}ms`);
 | 
			
		||||
    console.log(`  - Micro-tasks completed: ${stats.microTasksCompleted}`);
 | 
			
		||||
    console.log(`  - Micro-tasks failed: ${stats.microTasksFailed}`);
 | 
			
		||||
    console.log(`  - Estimated AI calls: ${estimatedAICallsMade}`);
 | 
			
		||||
    console.log(`  - Embeddings used: ${stats.embeddingsUsed}`);
 | 
			
		||||
    console.log(`  - Final items: ${stats.finalSelectedItems}`);
 | 
			
		||||
 | 
			
		||||
    let recommendation;
 | 
			
		||||
    try {
 | 
			
		||||
      const cleanedContent = stripMarkdownJson(aiContent);
 | 
			
		||||
      recommendation = JSON.parse(cleanedContent);
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('Failed to parse AI response:', aiContent);
 | 
			
		||||
      return apiServerError.unavailable('Invalid AI response format');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const validToolNames = new Set(toolsData.tools.map((t: any) => t.name));
 | 
			
		||||
    const validConceptNames = new Set(toolsData.concepts.map((c: any) => c.name));
 | 
			
		||||
    
 | 
			
		||||
    let validatedRecommendation;
 | 
			
		||||
 | 
			
		||||
    if (mode === 'workflow') {
 | 
			
		||||
      validatedRecommendation = {
 | 
			
		||||
        ...recommendation,
 | 
			
		||||
        // Ensure all new fields are included with fallbacks
 | 
			
		||||
        scenario_analysis: recommendation.scenario_analysis || recommendation.problem_analysis || '',
 | 
			
		||||
        investigation_approach: recommendation.investigation_approach || '',
 | 
			
		||||
        critical_considerations: recommendation.critical_considerations || '',
 | 
			
		||||
        recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
 | 
			
		||||
          if (!validToolNames.has(tool.name)) {
 | 
			
		||||
            console.warn(`AI recommended unknown tool: ${tool.name}`);
 | 
			
		||||
            return false;
 | 
			
		||||
          }
 | 
			
		||||
          return true;
 | 
			
		||||
        }) || [],
 | 
			
		||||
        background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
 | 
			
		||||
          if (!validConceptNames.has(concept.concept_name)) {
 | 
			
		||||
            console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
 | 
			
		||||
            return false;
 | 
			
		||||
          }
 | 
			
		||||
          return true;
 | 
			
		||||
        }) || []
 | 
			
		||||
      };
 | 
			
		||||
    } else {
 | 
			
		||||
      validatedRecommendation = {
 | 
			
		||||
        ...recommendation,
 | 
			
		||||
        // Ensure all new fields are included with fallbacks
 | 
			
		||||
        problem_analysis: recommendation.problem_analysis || recommendation.scenario_analysis || '',
 | 
			
		||||
        investigation_approach: recommendation.investigation_approach || '',
 | 
			
		||||
        critical_considerations: recommendation.critical_considerations || '',
 | 
			
		||||
        recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
 | 
			
		||||
          if (!validToolNames.has(tool.name)) {
 | 
			
		||||
            console.warn(`AI recommended unknown tool: ${tool.name}`);
 | 
			
		||||
            return false;
 | 
			
		||||
          }
 | 
			
		||||
          return true;
 | 
			
		||||
        }).map((tool: any, index: number) => ({
 | 
			
		||||
          ...tool,
 | 
			
		||||
          rank: tool.rank || (index + 1),
 | 
			
		||||
          suitability_score: tool.suitability_score || 'medium',
 | 
			
		||||
          pros: Array.isArray(tool.pros) ? tool.pros : [],
 | 
			
		||||
          cons: Array.isArray(tool.cons) ? tool.cons : []
 | 
			
		||||
        })) || [],
 | 
			
		||||
        background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
 | 
			
		||||
          if (!validConceptNames.has(concept.concept_name)) {
 | 
			
		||||
            console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
 | 
			
		||||
            return false;
 | 
			
		||||
          }
 | 
			
		||||
          return true;
 | 
			
		||||
        }) || []
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}, Tools: ${validatedRecommendation.recommended_tools.length}, Concepts: ${validatedRecommendation.background_knowledge?.length || 0}`);
 | 
			
		||||
    const currentLimit = rateLimitStore.get(userId);
 | 
			
		||||
    const remainingMicroTasks = currentLimit ? 
 | 
			
		||||
      MICRO_TASK_TOTAL_LIMIT - currentLimit.microTaskCount : MICRO_TASK_TOTAL_LIMIT;
 | 
			
		||||
 | 
			
		||||
    return new Response(JSON.stringify({
 | 
			
		||||
      success: true,
 | 
			
		||||
      mode,
 | 
			
		||||
      taskId,
 | 
			
		||||
      recommendation: validatedRecommendation,
 | 
			
		||||
      query: sanitizedQuery
 | 
			
		||||
      recommendation: result.recommendation,
 | 
			
		||||
      query: sanitizedQuery,
 | 
			
		||||
      processingStats: {
 | 
			
		||||
        ...result.processingStats,
 | 
			
		||||
        pipelineType: 'micro-task',
 | 
			
		||||
        microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
 | 
			
		||||
        averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
 | 
			
		||||
        estimatedAICallsMade
 | 
			
		||||
      },
 | 
			
		||||
      rateLimitInfo: {
 | 
			
		||||
        mainRequestsRemaining: MAIN_RATE_LIMIT_MAX - (currentLimit?.count || 0),
 | 
			
		||||
        microTaskCallsRemaining: remainingMicroTasks,
 | 
			
		||||
        resetTime: Date.now() + RATE_LIMIT_WINDOW
 | 
			
		||||
      }
 | 
			
		||||
    }), {
 | 
			
		||||
      status: 200,
 | 
			
		||||
      headers: { 'Content-Type': 'application/json' }
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    console.error('AI query error:', error);
 | 
			
		||||
    return apiServerError.internal('Internal server error');
 | 
			
		||||
    console.error('[MICRO-TASK API] Pipeline error:', error);
 | 
			
		||||
    
 | 
			
		||||
    if (error.message.includes('embeddings')) {
 | 
			
		||||
      return apiServerError.unavailable('Embeddings service error - using AI fallback');
 | 
			
		||||
    } else if (error.message.includes('micro-task')) {
 | 
			
		||||
      return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed');
 | 
			
		||||
    } else if (error.message.includes('selector')) {
 | 
			
		||||
      return apiServerError.unavailable('AI selector service error');
 | 
			
		||||
    } else if (error.message.includes('rate limit')) {
 | 
			
		||||
      return apiError.rateLimit('AI service rate limits exceeded during micro-task processing');
 | 
			
		||||
    } else {
 | 
			
		||||
      return apiServerError.internal('Micro-task AI pipeline error');
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
@ -1263,6 +1263,12 @@ input[type="checkbox"] {
 | 
			
		||||
  gap: 0.5rem; 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.filter-header-controls {
 | 
			
		||||
  display: flex;
 | 
			
		||||
  align-items: center;
 | 
			
		||||
  gap: 0.5rem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Search Components */
 | 
			
		||||
.search-wrapper { 
 | 
			
		||||
  position: relative; 
 | 
			
		||||
@ -1315,6 +1321,64 @@ input[type="checkbox"] {
 | 
			
		||||
  color: var(--color-text);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.collapse-toggle {
 | 
			
		||||
  background: none;
 | 
			
		||||
  border: 1px solid var(--color-border);
 | 
			
		||||
  border-radius: 0.375rem;
 | 
			
		||||
  color: var(--color-text-secondary);
 | 
			
		||||
  cursor: pointer;
 | 
			
		||||
  padding: 0.375rem;
 | 
			
		||||
  transition: var(--transition-fast);
 | 
			
		||||
  display: inline-flex;
 | 
			
		||||
  align-items: center;
 | 
			
		||||
  justify-content: center;
 | 
			
		||||
  width: 32px;
 | 
			
		||||
  height: 32px;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.collapse-toggle:hover {
 | 
			
		||||
  background-color: var(--color-bg-secondary);
 | 
			
		||||
  border-color: var(--color-primary);
 | 
			
		||||
  color: var(--color-text);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.collapse-toggle svg {
 | 
			
		||||
  transition: transform var(--transition-medium);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* When expanded, rotate the chevron */
 | 
			
		||||
.collapse-toggle[data-collapsed="false"] svg {
 | 
			
		||||
  transform: rotate(180deg);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Collapsible Content */
 | 
			
		||||
.collapsible-content {
 | 
			
		||||
  overflow: hidden;
 | 
			
		||||
  transition: all var(--transition-medium);
 | 
			
		||||
  opacity: 1;
 | 
			
		||||
  max-height: 1000px;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.collapsible-content.hidden {
 | 
			
		||||
  opacity: 0;
 | 
			
		||||
  max-height: 0;
 | 
			
		||||
  padding-top: 0;
 | 
			
		||||
  padding-bottom: 0;
 | 
			
		||||
  margin-top: 0;
 | 
			
		||||
  margin-bottom: 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Smooth animation for expanding content */
 | 
			
		||||
.collapsible-content:not(.hidden) {
 | 
			
		||||
  animation: expandContent 0.3s ease-out;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Content spacing when expanded */
 | 
			
		||||
.collapsible-content:not(.hidden) .advanced-filters-compact,
 | 
			
		||||
.collapsible-content:not(.hidden) .tag-section {
 | 
			
		||||
  padding-top: 0.75rem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Filter Grids & Groups */
 | 
			
		||||
.filter-grid-compact { 
 | 
			
		||||
  display: grid; 
 | 
			
		||||
@ -1429,11 +1493,9 @@ input[type="checkbox"] {
 | 
			
		||||
  user-select: none;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Tag System */
 | 
			
		||||
.tag-section {
 | 
			
		||||
  display: flex;
 | 
			
		||||
  flex-direction: column;
 | 
			
		||||
  gap: 1rem;
 | 
			
		||||
.tag-section .tag-controls {
 | 
			
		||||
  order: -1; 
 | 
			
		||||
  margin-bottom: 0.75rem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.selected-tags { 
 | 
			
		||||
@ -1574,6 +1636,14 @@ input[type="checkbox"] {
 | 
			
		||||
  transition: var(--transition-fast); 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.filter-reset {
 | 
			
		||||
  width: 32px;
 | 
			
		||||
  height: 32px;
 | 
			
		||||
  display: inline-flex;
 | 
			
		||||
  align-items: center;
 | 
			
		||||
  justify-content: center;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.filter-reset:hover { 
 | 
			
		||||
  background-color: var(--color-bg-secondary); 
 | 
			
		||||
  border-color: var(--color-warning); 
 | 
			
		||||
@ -1591,13 +1661,6 @@ input[type="checkbox"] {
 | 
			
		||||
  opacity: 0.9;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Tag Controls */
 | 
			
		||||
.tag-controls { 
 | 
			
		||||
  display: flex; 
 | 
			
		||||
  align-items: center; 
 | 
			
		||||
  gap: 0.75rem; 
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.tag-toggle { 
 | 
			
		||||
  padding: 0.375rem 0.75rem; 
 | 
			
		||||
  border: 1px solid var(--color-border); 
 | 
			
		||||
@ -1818,6 +1881,130 @@ input[type="checkbox"] {
 | 
			
		||||
  border-left-color: var(--color-warning);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Add to src/styles/global.css - Micro-Task Progress Styles */
 | 
			
		||||
 | 
			
		||||
/* Micro-task progress indicator */
 | 
			
		||||
.micro-task-progress {
 | 
			
		||||
  background-color: var(--color-bg-secondary);
 | 
			
		||||
  border: 1px solid var(--color-border);
 | 
			
		||||
  border-radius: 0.5rem;
 | 
			
		||||
  padding: 1rem;
 | 
			
		||||
  margin: 1rem 0;
 | 
			
		||||
  transition: var(--transition-fast);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-task-header {
 | 
			
		||||
  display: flex;
 | 
			
		||||
  justify-content: space-between;
 | 
			
		||||
  align-items: center;
 | 
			
		||||
  margin-bottom: 0.75rem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-task-label {
 | 
			
		||||
  font-weight: 600;
 | 
			
		||||
  color: var(--color-primary);
 | 
			
		||||
  font-size: 0.875rem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-task-counter {
 | 
			
		||||
  background-color: var(--color-primary);
 | 
			
		||||
  color: white;
 | 
			
		||||
  padding: 0.25rem 0.5rem;
 | 
			
		||||
  border-radius: 1rem;
 | 
			
		||||
  font-size: 0.75rem;
 | 
			
		||||
  font-weight: 600;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-task-steps {
 | 
			
		||||
  display: grid;
 | 
			
		||||
  grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
 | 
			
		||||
  gap: 0.5rem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step {
 | 
			
		||||
  background-color: var(--color-bg);
 | 
			
		||||
  border: 1px solid var(--color-border);
 | 
			
		||||
  border-radius: 0.375rem;
 | 
			
		||||
  padding: 0.5rem;
 | 
			
		||||
  font-size: 0.75rem;
 | 
			
		||||
  text-align: center;
 | 
			
		||||
  transition: var(--transition-fast);
 | 
			
		||||
  opacity: 0.6;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step.active {
 | 
			
		||||
  background-color: var(--color-primary);
 | 
			
		||||
  color: white;
 | 
			
		||||
  border-color: var(--color-primary);
 | 
			
		||||
  opacity: 1;
 | 
			
		||||
  transform: scale(1.05);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step.completed {
 | 
			
		||||
  background-color: var(--color-accent);
 | 
			
		||||
  color: white;
 | 
			
		||||
  border-color: var(--color-accent);
 | 
			
		||||
  opacity: 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step.failed {
 | 
			
		||||
  background-color: var(--color-error);
 | 
			
		||||
  color: white;
 | 
			
		||||
  border-color: var(--color-error);
 | 
			
		||||
  opacity: 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Enhanced queue status for micro-tasks */
 | 
			
		||||
.queue-status-card.micro-task-mode {
 | 
			
		||||
  border-left: 4px solid var(--color-primary);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.queue-status-card.micro-task-mode .queue-header {
 | 
			
		||||
  background: linear-gradient(135deg, var(--color-primary) 0%, var(--color-accent) 100%);
 | 
			
		||||
  color: white;
 | 
			
		||||
  margin: -1rem -1rem 1rem -1rem;
 | 
			
		||||
  padding: 1rem;
 | 
			
		||||
  border-radius: 0.5rem 0.5rem 0 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Mobile responsive adjustments */
 | 
			
		||||
@media (max-width: 768px) {
 | 
			
		||||
  .micro-task-steps {
 | 
			
		||||
    grid-template-columns: repeat(2, 1fr);
 | 
			
		||||
    gap: 0.375rem;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  .micro-step {
 | 
			
		||||
    font-size: 0.6875rem;
 | 
			
		||||
    padding: 0.375rem;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  .micro-task-header {
 | 
			
		||||
    flex-direction: column;
 | 
			
		||||
    gap: 0.5rem;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Animation for micro-task progress */
 | 
			
		||||
@keyframes micro-task-pulse {
 | 
			
		||||
  0%, 100% { opacity: 1; }
 | 
			
		||||
  50% { opacity: 0.7; }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step.active {
 | 
			
		||||
  animation: micro-task-pulse 2s ease-in-out infinite;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@keyframes micro-task-complete {
 | 
			
		||||
  0% { transform: scale(1); }
 | 
			
		||||
  50% { transform: scale(1.1); }
 | 
			
		||||
  100% { transform: scale(1); }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step.completed {
 | 
			
		||||
  animation: micro-task-complete 0.6s ease-out;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* ===================================================================
 | 
			
		||||
   17. WORKFLOW SYSTEM (CONSOLIDATED)
 | 
			
		||||
   ================================================================= */
 | 
			
		||||
@ -2267,6 +2454,17 @@ footer {
 | 
			
		||||
  to { opacity: 1; }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@keyframes expandContent {
 | 
			
		||||
  from {
 | 
			
		||||
    opacity: 0;
 | 
			
		||||
    transform: translateY(-10px);
 | 
			
		||||
  }
 | 
			
		||||
  to {
 | 
			
		||||
    opacity: 1;
 | 
			
		||||
    transform: translateY(0);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@keyframes fadeInUp {
 | 
			
		||||
  from {
 | 
			
		||||
    opacity: 0;
 | 
			
		||||
@ -3261,6 +3459,23 @@ footer {
 | 
			
		||||
  .view-toggle {
 | 
			
		||||
    justify-content: center;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  .filter-header-controls {
 | 
			
		||||
    gap: 0.375rem;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  .collapse-toggle,
 | 
			
		||||
  .filter-reset {
 | 
			
		||||
    width: 28px;
 | 
			
		||||
    height: 28px;
 | 
			
		||||
    padding: 0.25rem;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  .collapse-toggle svg,
 | 
			
		||||
  .filter-reset svg {
 | 
			
		||||
    width: 14px;
 | 
			
		||||
    height: 14px;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@media (width <= 640px) {
 | 
			
		||||
@ -3395,6 +3610,21 @@ footer {
 | 
			
		||||
  .filter-card-compact {
 | 
			
		||||
    padding: 0.5rem;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  .filter-header-compact {
 | 
			
		||||
    flex-wrap: wrap;
 | 
			
		||||
    gap: 0.5rem;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  .filter-header-compact h3 {
 | 
			
		||||
    flex: 1 1 100%;
 | 
			
		||||
    margin-bottom: 0.25rem;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  .filter-header-controls {
 | 
			
		||||
    flex: 1 1 100%;
 | 
			
		||||
    justify-content: flex-end;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										882
									
								
								src/utils/aiPipeline.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										882
									
								
								src/utils/aiPipeline.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,882 @@
 | 
			
		||||
// src/utils/aiPipeline.ts - FIXED: Critical error corrections
 | 
			
		||||
 | 
			
		||||
import { getCompressedToolsDataForAI } from './dataService.js';
 | 
			
		||||
import { embeddingsService, type EmbeddingData } from './embeddings.js';
 | 
			
		||||
 | 
			
		||||
interface AIConfig {
 | 
			
		||||
  endpoint: string;
 | 
			
		||||
  apiKey: string;
 | 
			
		||||
  model: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface MicroTaskResult {
 | 
			
		||||
  taskType: string;
 | 
			
		||||
  content: string;
 | 
			
		||||
  processingTimeMs: number;
 | 
			
		||||
  success: boolean;
 | 
			
		||||
  error?: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface AnalysisResult {
 | 
			
		||||
  recommendation: any;
 | 
			
		||||
  processingStats: {
 | 
			
		||||
    embeddingsUsed: boolean;
 | 
			
		||||
    candidatesFromEmbeddings: number;
 | 
			
		||||
    finalSelectedItems: number;
 | 
			
		||||
    processingTimeMs: number;
 | 
			
		||||
    microTasksCompleted: number;
 | 
			
		||||
    microTasksFailed: number;
 | 
			
		||||
    contextContinuityUsed: boolean;
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface AnalysisContext {
 | 
			
		||||
  userQuery: string;
 | 
			
		||||
  mode: string;
 | 
			
		||||
  filteredData: any;
 | 
			
		||||
  contextHistory: string[];
 | 
			
		||||
  
 | 
			
		||||
  // FIXED: Add max context length tracking
 | 
			
		||||
  maxContextLength: number;
 | 
			
		||||
  currentContextLength: number;
 | 
			
		||||
  
 | 
			
		||||
  scenarioAnalysis?: string;
 | 
			
		||||
  problemAnalysis?: string;
 | 
			
		||||
  investigationApproach?: string;
 | 
			
		||||
  criticalConsiderations?: string;
 | 
			
		||||
  selectedTools?: Array<{tool: any, phase: string, priority: string, justification?: string}>;
 | 
			
		||||
  backgroundKnowledge?: Array<{concept: any, relevance: string}>;
 | 
			
		||||
  
 | 
			
		||||
  // FIXED: Add seen tools tracking to prevent duplicates
 | 
			
		||||
  seenToolNames: Set<string>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class ImprovedMicroTaskAIPipeline {
 | 
			
		||||
  private config: AIConfig;
 | 
			
		||||
  private maxSelectedItems: number;
 | 
			
		||||
  private embeddingCandidates: number;
 | 
			
		||||
  private similarityThreshold: number;
 | 
			
		||||
  private microTaskDelay: number;
 | 
			
		||||
  
 | 
			
		||||
  // FIXED: Add proper token management
 | 
			
		||||
  private maxContextTokens: number;
 | 
			
		||||
  private maxPromptTokens: number;
 | 
			
		||||
 | 
			
		||||
  constructor() {
 | 
			
		||||
    this.config = {
 | 
			
		||||
      endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
 | 
			
		||||
      apiKey: this.getEnv('AI_ANALYZER_API_KEY'), 
 | 
			
		||||
      model: this.getEnv('AI_ANALYZER_MODEL')
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
 | 
			
		||||
    this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '60', 10); 
 | 
			
		||||
    this.similarityThreshold = 0.3; 
 | 
			
		||||
    this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
 | 
			
		||||
    
 | 
			
		||||
    // FIXED: Token management
 | 
			
		||||
    this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
 | 
			
		||||
    this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private getEnv(key: string): string {
 | 
			
		||||
    const value = process.env[key];
 | 
			
		||||
    if (!value) {
 | 
			
		||||
      throw new Error(`Missing environment variable: ${key}`);
 | 
			
		||||
    }
 | 
			
		||||
    return value;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // FIXED: Estimate token count (rough approximation)
 | 
			
		||||
  private estimateTokens(text: string): number {
 | 
			
		||||
    return Math.ceil(text.length / 4); // Rough estimate: 4 chars per token
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // FIXED: Manage context history with token limits
 | 
			
		||||
  private addToContextHistory(context: AnalysisContext, newEntry: string): void {
 | 
			
		||||
    const entryTokens = this.estimateTokens(newEntry);
 | 
			
		||||
    
 | 
			
		||||
    // Add new entry
 | 
			
		||||
    context.contextHistory.push(newEntry);
 | 
			
		||||
    context.currentContextLength += entryTokens;
 | 
			
		||||
    
 | 
			
		||||
    // Prune old entries if exceeding limits
 | 
			
		||||
    while (context.currentContextLength > this.maxContextTokens && context.contextHistory.length > 1) {
 | 
			
		||||
      const removed = context.contextHistory.shift()!;
 | 
			
		||||
      context.currentContextLength -= this.estimateTokens(removed);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // FIXED: Safe JSON parsing with validation
 | 
			
		||||
  private safeParseJSON(jsonString: string, fallback: any = null): any {
 | 
			
		||||
    try {
 | 
			
		||||
      const cleaned = jsonString
 | 
			
		||||
        .replace(/^```json\s*/i, '')
 | 
			
		||||
        .replace(/\s*```\s*$/g, '')
 | 
			
		||||
        .trim();
 | 
			
		||||
      
 | 
			
		||||
      const parsed = JSON.parse(cleaned);
 | 
			
		||||
      return parsed;
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.warn('[AI PIPELINE] JSON parsing failed:', error.message);
 | 
			
		||||
      console.warn('[AI PIPELINE] Raw content:', jsonString.slice(0, 200));
 | 
			
		||||
      return fallback;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // FIXED: Add tool deduplication
 | 
			
		||||
  private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
 | 
			
		||||
    if (context.seenToolNames.has(tool.name)) {
 | 
			
		||||
      console.log(`[AI PIPELINE] Skipping duplicate tool: ${tool.name}`);
 | 
			
		||||
      return false;
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    context.seenToolNames.add(tool.name);
 | 
			
		||||
    if (!context.selectedTools) context.selectedTools = [];
 | 
			
		||||
    
 | 
			
		||||
    context.selectedTools.push({
 | 
			
		||||
      tool,
 | 
			
		||||
      phase,
 | 
			
		||||
      priority,
 | 
			
		||||
      justification
 | 
			
		||||
    });
 | 
			
		||||
    
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
 | 
			
		||||
    let candidateTools: any[] = [];
 | 
			
		||||
    let candidateConcepts: any[] = [];
 | 
			
		||||
    let selectionMethod = 'unknown';
 | 
			
		||||
    
 | 
			
		||||
    if (embeddingsService.isEnabled()) {
 | 
			
		||||
      const similarItems = await embeddingsService.findSimilar(
 | 
			
		||||
        userQuery, 
 | 
			
		||||
        this.embeddingCandidates, 
 | 
			
		||||
        this.similarityThreshold
 | 
			
		||||
      );
 | 
			
		||||
      
 | 
			
		||||
      const toolNames = new Set<string>();
 | 
			
		||||
      const conceptNames = new Set<string>();
 | 
			
		||||
      
 | 
			
		||||
      similarItems.forEach(item => {
 | 
			
		||||
        if (item.type === 'tool') toolNames.add(item.name);
 | 
			
		||||
        if (item.type === 'concept') conceptNames.add(item.name);
 | 
			
		||||
      });
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Embeddings found: ${toolNames.size} tools, ${conceptNames.size} concepts`);
 | 
			
		||||
      
 | 
			
		||||
      // FIXED: Use your expected flow - get full data of embeddings results
 | 
			
		||||
      if (toolNames.size >= 15) { // Reasonable threshold for quality
 | 
			
		||||
        candidateTools = toolsData.tools.filter((tool: any) => toolNames.has(tool.name));
 | 
			
		||||
        candidateConcepts = toolsData.concepts.filter((concept: any) => conceptNames.has(concept.name));
 | 
			
		||||
        selectionMethod = 'embeddings_candidates';
 | 
			
		||||
        
 | 
			
		||||
        console.log(`[IMPROVED PIPELINE] Using embeddings candidates: ${candidateTools.length} tools`);
 | 
			
		||||
      } else {
 | 
			
		||||
        console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${toolNames.size} < 15), using full dataset`);
 | 
			
		||||
        candidateTools = toolsData.tools;
 | 
			
		||||
        candidateConcepts = toolsData.concepts;
 | 
			
		||||
        selectionMethod = 'full_dataset';
 | 
			
		||||
      }
 | 
			
		||||
    } else {
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
 | 
			
		||||
      candidateTools = toolsData.tools;
 | 
			
		||||
      candidateConcepts = toolsData.concepts;
 | 
			
		||||
      selectionMethod = 'full_dataset';
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // FIXED: NOW AI ANALYZES FULL DATA of the candidates
 | 
			
		||||
    console.log(`[IMPROVED PIPELINE] AI will analyze FULL DATA of ${candidateTools.length} candidate tools`);
 | 
			
		||||
    const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
 | 
			
		||||
    
 | 
			
		||||
    return {
 | 
			
		||||
      tools: finalSelection.selectedTools,
 | 
			
		||||
      concepts: finalSelection.selectedConcepts,
 | 
			
		||||
      domains: toolsData.domains,
 | 
			
		||||
      phases: toolsData.phases,
 | 
			
		||||
      'domain-agnostic-software': toolsData['domain-agnostic-software']
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
// src/utils/aiPipeline.ts - FIXED: De-biased AI selection prompt
 | 
			
		||||
 | 
			
		||||
  private async aiSelectionWithFullData(
 | 
			
		||||
    userQuery: string, 
 | 
			
		||||
    candidateTools: any[], 
 | 
			
		||||
    candidateConcepts: any[], 
 | 
			
		||||
    mode: string,
 | 
			
		||||
    selectionMethod: string
 | 
			
		||||
  ) {
 | 
			
		||||
    const modeInstruction = mode === 'workflow' 
 | 
			
		||||
      ? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases. Select 15-25 tools that cover the full investigation lifecycle.'
 | 
			
		||||
      : 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem. Select 3-8 tools that are most relevant and effective.';
 | 
			
		||||
 | 
			
		||||
    // FIXED: Give AI the COMPLETE tool data, not truncated
 | 
			
		||||
    const toolsWithFullData = candidateTools.map((tool: any) => ({
 | 
			
		||||
      name: tool.name,
 | 
			
		||||
      type: tool.type,
 | 
			
		||||
      description: tool.description,
 | 
			
		||||
      domains: tool.domains,
 | 
			
		||||
      phases: tool.phases,
 | 
			
		||||
      platforms: tool.platforms || [],
 | 
			
		||||
      tags: tool.tags || [],
 | 
			
		||||
      skillLevel: tool.skillLevel,
 | 
			
		||||
      license: tool.license,
 | 
			
		||||
      accessType: tool.accessType,
 | 
			
		||||
      projectUrl: tool.projectUrl,
 | 
			
		||||
      knowledgebase: tool.knowledgebase,
 | 
			
		||||
      related_concepts: tool.related_concepts || [],
 | 
			
		||||
      related_software: tool.related_software || []
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const conceptsWithFullData = candidateConcepts.map((concept: any) => ({
 | 
			
		||||
      name: concept.name,
 | 
			
		||||
      type: 'concept',
 | 
			
		||||
      description: concept.description,
 | 
			
		||||
      domains: concept.domains,
 | 
			
		||||
      phases: concept.phases,
 | 
			
		||||
      tags: concept.tags || [],
 | 
			
		||||
      skillLevel: concept.skillLevel,
 | 
			
		||||
      related_concepts: concept.related_concepts || [],
 | 
			
		||||
      related_software: concept.related_software || []
 | 
			
		||||
    }));
 | 
			
		||||
 | 
			
		||||
    const prompt = `You are a DFIR expert with access to the complete forensics tool database. You need to select the most relevant tools and concepts for this specific query.
 | 
			
		||||
 | 
			
		||||
SELECTION METHOD: ${selectionMethod}
 | 
			
		||||
${selectionMethod === 'embeddings_candidates' ? 
 | 
			
		||||
  'These tools were pre-filtered by vector similarity, so they are already relevant. Your job is to select the BEST ones from this relevant set.' :
 | 
			
		||||
  'You have access to the full tool database. Select the most relevant tools for the query.'}
 | 
			
		||||
 | 
			
		||||
${modeInstruction}
 | 
			
		||||
 | 
			
		||||
USER QUERY: "${userQuery}"
 | 
			
		||||
 | 
			
		||||
CRITICAL SELECTION PRINCIPLES:
 | 
			
		||||
1. **CONTEXT OVER POPULARITY**: Don't default to "famous" tools like Volatility, Wireshark, or Autopsy just because they're well-known. Choose based on SPECIFIC scenario needs.
 | 
			
		||||
 | 
			
		||||
2. **METHODOLOGY vs SOFTWARE**: 
 | 
			
		||||
   - For RAPID/URGENT scenarios → Prioritize METHODS and rapid response approaches
 | 
			
		||||
   - For TIME-CRITICAL incidents → Choose triage methods over deep analysis tools
 | 
			
		||||
   - For COMPREHENSIVE analysis → Then consider detailed software tools
 | 
			
		||||
   - METHODS (type: "method") are often better than SOFTWARE for procedural guidance
 | 
			
		||||
 | 
			
		||||
3. **SCENARIO-SPECIFIC LOGIC**:
 | 
			
		||||
   - "Rapid/Quick/Urgent/Triage" scenarios → Rapid Incident Response and Triage METHOD > Volatility
 | 
			
		||||
   - "Industrial/SCADA/ICS" scenarios → Specialized ICS tools > generic network tools
 | 
			
		||||
   - "Mobile/Android/iOS" scenarios → Mobile-specific tools > desktop forensics tools
 | 
			
		||||
   - "Memory analysis needed urgently" → Quick memory tools/methods > comprehensive Volatility analysis
 | 
			
		||||
 | 
			
		||||
4. **AVOID TOOL BIAS**:
 | 
			
		||||
   - Volatility is NOT always the answer for memory analysis
 | 
			
		||||
   - Wireshark is NOT always the answer for network analysis  
 | 
			
		||||
   - Autopsy is NOT always the answer for disk analysis
 | 
			
		||||
   - Consider lighter, faster, more appropriate alternatives
 | 
			
		||||
 | 
			
		||||
AVAILABLE TOOLS (with complete data):
 | 
			
		||||
${JSON.stringify(toolsWithFullData.slice(0, 30), null, 2)}
 | 
			
		||||
 | 
			
		||||
AVAILABLE CONCEPTS (with complete data):
 | 
			
		||||
${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}
 | 
			
		||||
 | 
			
		||||
ANALYSIS INSTRUCTIONS:
 | 
			
		||||
1. Read the FULL description of each tool/concept
 | 
			
		||||
2. Consider ALL tags, platforms, related tools, and metadata
 | 
			
		||||
3. **MATCH URGENCY LEVEL**: Rapid scenarios need rapid methods, not deep analysis tools
 | 
			
		||||
4. **MATCH SPECIFICITY**: Specialized scenarios need specialized tools, not generic ones
 | 
			
		||||
5. **CONSIDER TYPE**: Methods provide procedural guidance, software provides technical capability
 | 
			
		||||
6. For SCADA/ICS queries: prioritize specialized ICS tools over generic network tools
 | 
			
		||||
7. For mobile queries: prioritize mobile-specific tools over desktop tools
 | 
			
		||||
8. For rapid/urgent queries: prioritize methodology and triage approaches
 | 
			
		||||
 | 
			
		||||
BIAS PREVENTION:
 | 
			
		||||
- If query mentions "rapid", "quick", "urgent", "triage" → Strongly favor METHODS over deep analysis SOFTWARE
 | 
			
		||||
- If query mentions specific technologies (SCADA, Android, etc.) → Strongly favor specialized tools
 | 
			
		||||
- Don't recommend Volatility unless deep memory analysis is specifically needed AND time allows
 | 
			
		||||
- Don't recommend generic tools when specialized ones are available
 | 
			
		||||
- Consider the SKILL LEVEL and TIME CONSTRAINTS implied by the query
 | 
			
		||||
 | 
			
		||||
Select the most relevant items (max ${this.maxSelectedItems} total).
 | 
			
		||||
 | 
			
		||||
Respond with ONLY this JSON format:
 | 
			
		||||
{
 | 
			
		||||
  "selectedTools": ["Tool Name 1", "Tool Name 2", ...],
 | 
			
		||||
  "selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
 | 
			
		||||
  "reasoning": "Detailed explanation of why these specific tools were selected for this query, addressing why certain popular tools were NOT selected if they were inappropriate for the scenario context"
 | 
			
		||||
}`;
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      const response = await this.callAI(prompt, 2500); // More tokens for bias prevention logic
 | 
			
		||||
      
 | 
			
		||||
      const result = this.safeParseJSON(response, null);
 | 
			
		||||
      
 | 
			
		||||
      if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
 | 
			
		||||
        console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
 | 
			
		||||
        throw new Error('AI selection failed to return valid tool selection');
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
 | 
			
		||||
      if (totalSelected === 0) {
 | 
			
		||||
        console.error('[IMPROVED PIPELINE] AI selection returned no tools');
 | 
			
		||||
        throw new Error('AI selection returned empty selection');
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts`);
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] AI reasoning: ${result.reasoning}`);
 | 
			
		||||
 | 
			
		||||
      // Return the actual tool/concept objects
 | 
			
		||||
      const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
 | 
			
		||||
      const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
 | 
			
		||||
      
 | 
			
		||||
      return {
 | 
			
		||||
        selectedTools,
 | 
			
		||||
        selectedConcepts
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[IMPROVED PIPELINE] AI selection failed:', error);
 | 
			
		||||
      
 | 
			
		||||
      // Emergency fallback with bias awareness
 | 
			
		||||
      console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
 | 
			
		||||
      return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
 | 
			
		||||
    const queryLower = userQuery.toLowerCase();
 | 
			
		||||
    const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
 | 
			
		||||
    
 | 
			
		||||
    // Score tools based on keyword matches in full data
 | 
			
		||||
    const scoredTools = candidateTools.map(tool => {
 | 
			
		||||
      const toolText = (
 | 
			
		||||
        tool.name + ' ' + 
 | 
			
		||||
        tool.description + ' ' + 
 | 
			
		||||
        (tool.tags || []).join(' ') + ' ' +
 | 
			
		||||
        (tool.platforms || []).join(' ') + ' ' +
 | 
			
		||||
        (tool.domains || []).join(' ')
 | 
			
		||||
      ).toLowerCase();
 | 
			
		||||
      
 | 
			
		||||
      const score = keywords.reduce((acc, keyword) => {
 | 
			
		||||
        return acc + (toolText.includes(keyword) ? 1 : 0);
 | 
			
		||||
      }, 0);
 | 
			
		||||
      
 | 
			
		||||
      return { tool, score };
 | 
			
		||||
    }).filter(item => item.score > 0)
 | 
			
		||||
      .sort((a, b) => b.score - a.score);
 | 
			
		||||
    
 | 
			
		||||
    const maxTools = mode === 'workflow' ? 20 : 8;
 | 
			
		||||
    const selectedTools = scoredTools.slice(0, maxTools).map(item => item.tool);
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
 | 
			
		||||
    
 | 
			
		||||
    return {
 | 
			
		||||
      selectedTools,
 | 
			
		||||
      selectedConcepts: candidateConcepts.slice(0, 3)
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async delay(ms: number): Promise<void> {
 | 
			
		||||
    return new Promise(resolve => setTimeout(resolve, ms));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 300): Promise<MicroTaskResult> {
 | 
			
		||||
    const startTime = Date.now();
 | 
			
		||||
    
 | 
			
		||||
    // FIXED: Build context prompt with token management
 | 
			
		||||
    let contextPrompt = prompt;
 | 
			
		||||
    if (context.contextHistory.length > 0) {
 | 
			
		||||
      const contextSection = `BISHERIGE ANALYSE:\n${context.contextHistory.join('\n\n')}\n\nAKTUELLE AUFGABE:\n`;
 | 
			
		||||
      const combinedPrompt = contextSection + prompt;
 | 
			
		||||
      
 | 
			
		||||
      // Check if combined prompt exceeds limits
 | 
			
		||||
      if (this.estimateTokens(combinedPrompt) <= this.maxPromptTokens) {
 | 
			
		||||
        contextPrompt = combinedPrompt;
 | 
			
		||||
      } else {
 | 
			
		||||
        console.warn('[AI PIPELINE] Context too long, using prompt only');
 | 
			
		||||
        // Could implement smarter context truncation here
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    try {
 | 
			
		||||
      const response = await this.callAI(contextPrompt, maxTokens);
 | 
			
		||||
      
 | 
			
		||||
      return {
 | 
			
		||||
        taskType: 'micro-task',
 | 
			
		||||
        content: response.trim(),
 | 
			
		||||
        processingTimeMs: Date.now() - startTime,
 | 
			
		||||
        success: true
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      return {
 | 
			
		||||
        taskType: 'micro-task',
 | 
			
		||||
        content: '',
 | 
			
		||||
        processingTimeMs: Date.now() - startTime,
 | 
			
		||||
        success: false,
 | 
			
		||||
        error: error.message
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
			
		||||
    const isWorkflow = context.mode === 'workflow';
 | 
			
		||||
    
 | 
			
		||||
    const prompt = `Sie sind ein erfahrener DFIR-Experte. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
 | 
			
		||||
 | 
			
		||||
${isWorkflow ? 'FORENSISCHES SZENARIO' : 'TECHNISCHES PROBLEM'}: "${context.userQuery}"
 | 
			
		||||
 | 
			
		||||
Führen Sie eine systematische ${isWorkflow ? 'Szenario-Analyse' : 'Problem-Analyse'} durch und berücksichtigen Sie dabei:
 | 
			
		||||
 | 
			
		||||
${isWorkflow ? 
 | 
			
		||||
  `- Angriffsvektoren und Bedrohungsmodellierung nach MITRE ATT&CK
 | 
			
		||||
- Betroffene Systeme und kritische Infrastrukturen
 | 
			
		||||
- Zeitkritische Faktoren und Beweiserhaltung
 | 
			
		||||
- Forensische Artefakte und Datenquellen` :
 | 
			
		||||
  `- Spezifische forensische Herausforderungen
 | 
			
		||||
- Verfügbare Datenquellen und deren Integrität
 | 
			
		||||
- Methodische Anforderungen für rechtssichere Analyse`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Maximum 150 Wörter.`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 220);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      if (isWorkflow) {
 | 
			
		||||
        context.scenarioAnalysis = result.content;
 | 
			
		||||
      } else {
 | 
			
		||||
        context.problemAnalysis = result.content;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // FIXED: Use new context management
 | 
			
		||||
      this.addToContextHistory(context, `${isWorkflow ? 'Szenario' : 'Problem'}-Analyse: ${result.content.slice(0, 200)}...`);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async generateApproach(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
			
		||||
    const isWorkflow = context.mode === 'workflow';
 | 
			
		||||
    
 | 
			
		||||
    const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} nach NIST SP 800-86 Methodik.
 | 
			
		||||
 | 
			
		||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
 | 
			
		||||
 | 
			
		||||
Entwickeln Sie einen systematischen ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} unter Berücksichtigung von:
 | 
			
		||||
 | 
			
		||||
${isWorkflow ?
 | 
			
		||||
  `- Triage-Prioritäten nach forensischer Dringlichkeit
 | 
			
		||||
- Phasenabfolge nach NIST-Methodik
 | 
			
		||||
- Kontaminationsvermeidung und forensische Isolierung` :
 | 
			
		||||
  `- Methodik-Auswahl nach wissenschaftlichen Kriterien
 | 
			
		||||
- Validierung und Verifizierung der gewählten Ansätze
 | 
			
		||||
- Integration in bestehende forensische Workflows`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 150 Wörter.`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 220);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      context.investigationApproach = result.content;
 | 
			
		||||
      this.addToContextHistory(context, `${isWorkflow ? 'Untersuchungs' : 'Lösungs'}ansatz: ${result.content.slice(0, 200)}...`);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async generateCriticalConsiderations(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
			
		||||
    const isWorkflow = context.mode === 'workflow';
 | 
			
		||||
    
 | 
			
		||||
    const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall.
 | 
			
		||||
 | 
			
		||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
 | 
			
		||||
 | 
			
		||||
Berücksichtigen Sie folgende forensische Aspekte:
 | 
			
		||||
 | 
			
		||||
${isWorkflow ?
 | 
			
		||||
  `- Time-sensitive evidence preservation
 | 
			
		||||
- Chain of custody requirements und rechtliche Verwertbarkeit
 | 
			
		||||
- Incident containment vs. evidence preservation Dilemma
 | 
			
		||||
- Privacy- und Compliance-Anforderungen` :
 | 
			
		||||
  `- Tool-Validierung und Nachvollziehbarkeit
 | 
			
		||||
- False positive/negative Risiken bei der gewählten Methodik
 | 
			
		||||
- Qualifikationsanforderungen für die Durchführung
 | 
			
		||||
- Dokumentations- und Reporting-Standards`
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 180);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      context.criticalConsiderations = result.content;
 | 
			
		||||
      this.addToContextHistory(context, `Kritische Überlegungen: ${result.content.slice(0, 200)}...`);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async selectToolsForPhase(context: AnalysisContext, phase: any): Promise<MicroTaskResult> {
 | 
			
		||||
    const phaseTools = context.filteredData.tools.filter((tool: any) => 
 | 
			
		||||
      tool.phases && tool.phases.includes(phase.id)
 | 
			
		||||
    );
 | 
			
		||||
 | 
			
		||||
    if (phaseTools.length === 0) {
 | 
			
		||||
      return {
 | 
			
		||||
        taskType: 'tool-selection',
 | 
			
		||||
        content: JSON.stringify([]),
 | 
			
		||||
        processingTimeMs: 0,
 | 
			
		||||
        success: true
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const prompt = `Wählen Sie 2-3 Methoden/Tools für die Phase "${phase.name}" basierend auf objektiven, fallbezogenen Kriterien.
 | 
			
		||||
 | 
			
		||||
SZENARIO: "${context.userQuery}"
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE TOOLS FÜR ${phase.name.toUpperCase()}:
 | 
			
		||||
${phaseTools.map((tool: any) => `- ${tool.name}: ${tool.description.slice(0, 100)}...`).join('\n')}
 | 
			
		||||
 | 
			
		||||
Wählen Sie Methoden/Tools nach forensischen Kriterien aus:
 | 
			
		||||
- Court admissibility und Chain of Custody Kompatibilität  
 | 
			
		||||
- Integration in forensische Standard-Workflows
 | 
			
		||||
- Reproduzierbarkeit und Dokumentationsqualität
 | 
			
		||||
- Objektivität
 | 
			
		||||
 | 
			
		||||
Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
 | 
			
		||||
[
 | 
			
		||||
  {
 | 
			
		||||
    "toolName": "Exakter Methoden/Tool-Name",
 | 
			
		||||
    "priority": "high|medium|low", 
 | 
			
		||||
    "justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist"
 | 
			
		||||
  }
 | 
			
		||||
]`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 450);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      // FIXED: Safe JSON parsing with validation
 | 
			
		||||
      const selections = this.safeParseJSON(result.content, []);
 | 
			
		||||
      
 | 
			
		||||
      if (Array.isArray(selections)) {
 | 
			
		||||
        const validSelections = selections.filter((sel: any) => 
 | 
			
		||||
          sel.toolName && phaseTools.some((tool: any) => tool.name === sel.toolName)
 | 
			
		||||
        );
 | 
			
		||||
        
 | 
			
		||||
        validSelections.forEach((sel: any) => {
 | 
			
		||||
          const tool = phaseTools.find((t: any) => t.name === sel.toolName);
 | 
			
		||||
          if (tool) {
 | 
			
		||||
            // FIXED: Use deduplication helper
 | 
			
		||||
            this.addToolToSelection(context, tool, phase.id, sel.priority, sel.justification);
 | 
			
		||||
          }
 | 
			
		||||
        });
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
 | 
			
		||||
    const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem nach forensischen Qualitätskriterien.
 | 
			
		||||
 | 
			
		||||
PROBLEM: "${context.userQuery}"
 | 
			
		||||
 | 
			
		||||
TOOL: ${tool.name}
 | 
			
		||||
BESCHREIBUNG: ${tool.description}
 | 
			
		||||
PLATTFORMEN: ${tool.platforms?.join(', ') || 'N/A'}
 | 
			
		||||
SKILL LEVEL: ${tool.skillLevel}
 | 
			
		||||
 | 
			
		||||
Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
			
		||||
{
 | 
			
		||||
  "suitability_score": "high|medium|low",
 | 
			
		||||
  "detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
 | 
			
		||||
  "implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
 | 
			
		||||
  "pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
 | 
			
		||||
  "cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
 | 
			
		||||
  "alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
 | 
			
		||||
}`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 650);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      // FIXED: Safe JSON parsing
 | 
			
		||||
      const evaluation = this.safeParseJSON(result.content, {
 | 
			
		||||
        suitability_score: 'medium',
 | 
			
		||||
        detailed_explanation: 'Evaluation failed',
 | 
			
		||||
        implementation_approach: '',
 | 
			
		||||
        pros: [],
 | 
			
		||||
        cons: [],
 | 
			
		||||
        alternatives: ''
 | 
			
		||||
      });
 | 
			
		||||
      
 | 
			
		||||
      // FIXED: Use deduplication helper
 | 
			
		||||
      this.addToolToSelection(context, {
 | 
			
		||||
        ...tool,
 | 
			
		||||
        evaluation: {
 | 
			
		||||
          ...evaluation,
 | 
			
		||||
          rank
 | 
			
		||||
        }
 | 
			
		||||
      }, 'evaluation', evaluation.suitability_score);
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async selectBackgroundKnowledge(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
			
		||||
    const availableConcepts = context.filteredData.concepts;
 | 
			
		||||
    
 | 
			
		||||
    if (availableConcepts.length === 0) {
 | 
			
		||||
      return {
 | 
			
		||||
        taskType: 'background-knowledge',
 | 
			
		||||
        content: JSON.stringify([]),
 | 
			
		||||
        processingTimeMs: 0,
 | 
			
		||||
        success: true
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
 | 
			
		||||
    
 | 
			
		||||
    const prompt = `Wählen Sie relevante forensische Konzepte für das Verständnis der empfohlenen Methodik.
 | 
			
		||||
 | 
			
		||||
${context.mode === 'workflow' ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
 | 
			
		||||
EMPFOHLENE TOOLS: ${selectedToolNames.join(', ')}
 | 
			
		||||
 | 
			
		||||
VERFÜGBARE KONZEPTE:
 | 
			
		||||
${availableConcepts.slice(0, 15).map((concept: any) => `- ${concept.name}: ${concept.description.slice(0, 80)}...`).join('\n')}
 | 
			
		||||
 | 
			
		||||
Wählen Sie 2-4 Konzepte aus, die für das Verständnis der forensischen Methodik essentiell sind.
 | 
			
		||||
 | 
			
		||||
Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
 | 
			
		||||
[
 | 
			
		||||
  {
 | 
			
		||||
    "conceptName": "Exakter Konzept-Name",
 | 
			
		||||
    "relevance": "Forensische Relevanz: Warum dieses Konzept für das Verständnis der Methodik kritisch ist"
 | 
			
		||||
  }
 | 
			
		||||
]`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 400);
 | 
			
		||||
    
 | 
			
		||||
    if (result.success) {
 | 
			
		||||
      // FIXED: Safe JSON parsing
 | 
			
		||||
      const selections = this.safeParseJSON(result.content, []);
 | 
			
		||||
      
 | 
			
		||||
      if (Array.isArray(selections)) {
 | 
			
		||||
        context.backgroundKnowledge = selections.filter((sel: any) => 
 | 
			
		||||
          sel.conceptName && availableConcepts.some((concept: any) => concept.name === sel.conceptName)
 | 
			
		||||
        ).map((sel: any) => ({
 | 
			
		||||
          concept: availableConcepts.find((c: any) => c.name === sel.conceptName),
 | 
			
		||||
          relevance: sel.relevance
 | 
			
		||||
        }));
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async generateFinalRecommendations(context: AnalysisContext): Promise<MicroTaskResult> {
 | 
			
		||||
    const isWorkflow = context.mode === 'workflow';
 | 
			
		||||
    
 | 
			
		||||
    const prompt = isWorkflow ? 
 | 
			
		||||
      `Erstellen Sie eine forensisch fundierte Workflow-Empfehlung basierend auf DFIR-Prinzipien.
 | 
			
		||||
 | 
			
		||||
SZENARIO: "${context.userQuery}"
 | 
			
		||||
AUSGEWÄHLTE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Tools ausgewählt'}
 | 
			
		||||
 | 
			
		||||
Erstellen Sie konkrete methodische Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung forensischer Best Practices, Objektivität und rechtlicher Verwertbarkeit.
 | 
			
		||||
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.` :
 | 
			
		||||
      
 | 
			
		||||
      `Erstellen Sie wichtige methodische Überlegungen für die korrekte Methoden-/Tool-Anwendung.
 | 
			
		||||
 | 
			
		||||
PROBLEM: "${context.userQuery}"
 | 
			
		||||
EMPFOHLENE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Methoden/Tools ausgewählt'}
 | 
			
		||||
 | 
			
		||||
Geben Sie kritische methodische Überlegungen, Validierungsanforderungen und Qualitätssicherungsmaßnahmen für die korrekte Anwendung der empfohlenen Methoden/Tools.
 | 
			
		||||
 | 
			
		||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 100 Wörter.`;
 | 
			
		||||
 | 
			
		||||
    const result = await this.callMicroTaskAI(prompt, context, 180);
 | 
			
		||||
    return result;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
 | 
			
		||||
    const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
 | 
			
		||||
      method: 'POST',
 | 
			
		||||
      headers: {
 | 
			
		||||
        'Content-Type': 'application/json',
 | 
			
		||||
        'Authorization': `Bearer ${this.config.apiKey}`
 | 
			
		||||
      },
 | 
			
		||||
      body: JSON.stringify({
 | 
			
		||||
        model: this.config.model,
 | 
			
		||||
        messages: [{ role: 'user', content: prompt }],
 | 
			
		||||
        max_tokens: maxTokens,
 | 
			
		||||
        temperature: 0.3
 | 
			
		||||
      })
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    if (!response.ok) {
 | 
			
		||||
      const errorText = await response.text();
 | 
			
		||||
      throw new Error(`AI API error: ${response.status} - ${errorText}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const data = await response.json();
 | 
			
		||||
    const content = data.choices?.[0]?.message?.content;
 | 
			
		||||
    
 | 
			
		||||
    if (!content) {
 | 
			
		||||
      throw new Error('No response from AI model');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return content;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
 | 
			
		||||
    const startTime = Date.now();
 | 
			
		||||
    let completedTasks = 0;
 | 
			
		||||
    let failedTasks = 0;
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity`);
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      // Stage 1: Get intelligent candidates (embeddings + AI selection)
 | 
			
		||||
      const toolsData = await getCompressedToolsDataForAI();
 | 
			
		||||
      const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
 | 
			
		||||
      
 | 
			
		||||
      // FIXED: Initialize context with proper state management
 | 
			
		||||
      const context: AnalysisContext = {
 | 
			
		||||
        userQuery,
 | 
			
		||||
        mode,
 | 
			
		||||
        filteredData,
 | 
			
		||||
        contextHistory: [],
 | 
			
		||||
        maxContextLength: this.maxContextTokens,
 | 
			
		||||
        currentContextLength: 0,
 | 
			
		||||
        seenToolNames: new Set<string>() // FIXED: Add deduplication tracking
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
 | 
			
		||||
 | 
			
		||||
      // MICRO-TASK SEQUENCE
 | 
			
		||||
      
 | 
			
		||||
      // Task 1: Scenario/Problem Analysis
 | 
			
		||||
      const analysisResult = await this.analyzeScenario(context);
 | 
			
		||||
      if (analysisResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
      await this.delay(this.microTaskDelay);
 | 
			
		||||
 | 
			
		||||
      // Task 2: Investigation/Solution Approach
 | 
			
		||||
      const approachResult = await this.generateApproach(context);
 | 
			
		||||
      if (approachResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
      await this.delay(this.microTaskDelay);
 | 
			
		||||
 | 
			
		||||
      // Task 3: Critical Considerations
 | 
			
		||||
      const considerationsResult = await this.generateCriticalConsiderations(context);
 | 
			
		||||
      if (considerationsResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
      await this.delay(this.microTaskDelay);
 | 
			
		||||
 | 
			
		||||
      // Task 4: Tool Selection/Evaluation (mode-dependent)
 | 
			
		||||
      if (mode === 'workflow') {
 | 
			
		||||
        // Select tools for each phase
 | 
			
		||||
        const phases = toolsData.phases || [];
 | 
			
		||||
        for (const phase of phases) {
 | 
			
		||||
          const toolSelectionResult = await this.selectToolsForPhase(context, phase);
 | 
			
		||||
          if (toolSelectionResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
          await this.delay(this.microTaskDelay);
 | 
			
		||||
        }
 | 
			
		||||
      } else {
 | 
			
		||||
        // Evaluate top 3 tools for specific problem
 | 
			
		||||
        const topTools = filteredData.tools.slice(0, 3);
 | 
			
		||||
        for (let i = 0; i < topTools.length; i++) {
 | 
			
		||||
          const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
 | 
			
		||||
          if (evaluationResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
          await this.delay(this.microTaskDelay);
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      // Task 5: Background Knowledge Selection
 | 
			
		||||
      const knowledgeResult = await this.selectBackgroundKnowledge(context);
 | 
			
		||||
      if (knowledgeResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
      await this.delay(this.microTaskDelay);
 | 
			
		||||
 | 
			
		||||
      // Task 6: Final Recommendations
 | 
			
		||||
      const finalResult = await this.generateFinalRecommendations(context);
 | 
			
		||||
      if (finalResult.success) completedTasks++; else failedTasks++;
 | 
			
		||||
 | 
			
		||||
      // Build final recommendation
 | 
			
		||||
      const recommendation = this.buildRecommendation(context, mode, finalResult.content);
 | 
			
		||||
 | 
			
		||||
      const processingStats = {
 | 
			
		||||
        embeddingsUsed: embeddingsService.isEnabled(),
 | 
			
		||||
        candidatesFromEmbeddings: filteredData.tools.length,
 | 
			
		||||
        finalSelectedItems: (context.selectedTools?.length || 0) + 
 | 
			
		||||
                           (context.backgroundKnowledge?.length || 0),
 | 
			
		||||
        processingTimeMs: Date.now() - startTime,
 | 
			
		||||
        microTasksCompleted: completedTasks,
 | 
			
		||||
        microTasksFailed: failedTasks,
 | 
			
		||||
        contextContinuityUsed: true
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
 | 
			
		||||
      console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
 | 
			
		||||
 | 
			
		||||
      return {
 | 
			
		||||
        recommendation,
 | 
			
		||||
        processingStats
 | 
			
		||||
      };
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[IMPROVED PIPELINE] Processing failed:', error);
 | 
			
		||||
      throw error;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Build recommendation (same structure but using fixed context)
 | 
			
		||||
  private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
 | 
			
		||||
    const isWorkflow = mode === 'workflow';
 | 
			
		||||
    
 | 
			
		||||
    const base = {
 | 
			
		||||
      [isWorkflow ? 'scenario_analysis' : 'problem_analysis']: 
 | 
			
		||||
        isWorkflow ? context.scenarioAnalysis : context.problemAnalysis,
 | 
			
		||||
      investigation_approach: context.investigationApproach,
 | 
			
		||||
      critical_considerations: context.criticalConsiderations,
 | 
			
		||||
      background_knowledge: context.backgroundKnowledge?.map(bk => ({
 | 
			
		||||
        concept_name: bk.concept.name,
 | 
			
		||||
        relevance: bk.relevance
 | 
			
		||||
      })) || []
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    if (isWorkflow) {
 | 
			
		||||
      return {
 | 
			
		||||
        ...base,
 | 
			
		||||
        recommended_tools: context.selectedTools?.map(st => ({
 | 
			
		||||
          name: st.tool.name,
 | 
			
		||||
          phase: st.phase,
 | 
			
		||||
          priority: st.priority,
 | 
			
		||||
          justification: st.justification || `Empfohlen für ${st.phase}`
 | 
			
		||||
        })) || [],
 | 
			
		||||
        workflow_suggestion: finalContent
 | 
			
		||||
      };
 | 
			
		||||
    } else {
 | 
			
		||||
      return {
 | 
			
		||||
        ...base,
 | 
			
		||||
        recommended_tools: context.selectedTools?.map(st => ({
 | 
			
		||||
          name: st.tool.name,
 | 
			
		||||
          rank: st.tool.evaluation?.rank || 1,
 | 
			
		||||
          suitability_score: st.priority,
 | 
			
		||||
          detailed_explanation: st.tool.evaluation?.detailed_explanation || '',
 | 
			
		||||
          implementation_approach: st.tool.evaluation?.implementation_approach || '',
 | 
			
		||||
          pros: st.tool.evaluation?.pros || [],
 | 
			
		||||
          cons: st.tool.evaluation?.cons || [],
 | 
			
		||||
          alternatives: st.tool.evaluation?.alternatives || ''
 | 
			
		||||
        })) || [],
 | 
			
		||||
        additional_considerations: finalContent
 | 
			
		||||
      };
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Global instance
 | 
			
		||||
const aiPipeline = new ImprovedMicroTaskAIPipeline();
 | 
			
		||||
 | 
			
		||||
export { aiPipeline, type AnalysisResult };
 | 
			
		||||
@ -1,3 +1,4 @@
 | 
			
		||||
// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
 | 
			
		||||
import { promises as fs } from 'fs';
 | 
			
		||||
import { load } from 'js-yaml';
 | 
			
		||||
import path from 'path';
 | 
			
		||||
@ -21,30 +22,44 @@ const ToolSchema = z.object({
 | 
			
		||||
  accessType: z.string().optional().nullable(),
 | 
			
		||||
  'domain-agnostic-software': z.array(z.string()).optional().nullable(),
 | 
			
		||||
  related_concepts: z.array(z.string()).optional().nullable().default([]),
 | 
			
		||||
  related_software: z.array(z.string()).optional().nullable().default([]), // Added this line
 | 
			
		||||
  related_software: z.array(z.string()).optional().nullable().default([]),
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
const ToolsDataSchema = z.object({
 | 
			
		||||
  tools: z.array(ToolSchema),
 | 
			
		||||
  domains: z.array(z.object({
 | 
			
		||||
    id: z.string(),
 | 
			
		||||
    name: z.string()
 | 
			
		||||
    name: z.string(),
 | 
			
		||||
    description: z.string().optional() 
 | 
			
		||||
  })),
 | 
			
		||||
  phases: z.array(z.object({
 | 
			
		||||
    id: z.string(), 
 | 
			
		||||
    name: z.string(),
 | 
			
		||||
    description: z.string().optional()
 | 
			
		||||
    description: z.string().optional(),
 | 
			
		||||
    typical_tools: z.array(z.string()).optional().default([]), 
 | 
			
		||||
    key_activities: z.array(z.string()).optional().default([]) 
 | 
			
		||||
  })),
 | 
			
		||||
  'domain-agnostic-software': z.array(z.object({
 | 
			
		||||
    id: z.string(),
 | 
			
		||||
    name: z.string(),
 | 
			
		||||
    description: z.string().optional()
 | 
			
		||||
    description: z.string().optional(),
 | 
			
		||||
    use_cases: z.array(z.string()).optional().default([]) 
 | 
			
		||||
  })).optional().default([]),
 | 
			
		||||
  scenarios: z.array(z.object({
 | 
			
		||||
    id: z.string(),
 | 
			
		||||
    icon: z.string(),
 | 
			
		||||
    friendly_name: z.string()
 | 
			
		||||
    friendly_name: z.string(),
 | 
			
		||||
    description: z.string().optional(), 
 | 
			
		||||
    typical_phases: z.array(z.string()).optional().default([]), 
 | 
			
		||||
    complexity: z.enum(['low', 'medium', 'high']).optional() 
 | 
			
		||||
  })).optional().default([]),
 | 
			
		||||
  skill_levels: z.object({
 | 
			
		||||
    novice: z.string().optional(),
 | 
			
		||||
    beginner: z.string().optional(), 
 | 
			
		||||
    intermediate: z.string().optional(),
 | 
			
		||||
    advanced: z.string().optional(),
 | 
			
		||||
    expert: z.string().optional()
 | 
			
		||||
  }).optional().default({})
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
interface ToolsData {
 | 
			
		||||
@ -53,20 +68,49 @@ interface ToolsData {
 | 
			
		||||
  phases: any[];
 | 
			
		||||
  'domain-agnostic-software': any[];
 | 
			
		||||
  scenarios: any[];
 | 
			
		||||
  skill_levels?: any;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface CompressedToolsData {
 | 
			
		||||
interface EnhancedCompressedToolsData {
 | 
			
		||||
  tools: any[];
 | 
			
		||||
  concepts: any[];
 | 
			
		||||
  domains: any[];
 | 
			
		||||
  phases: any[];
 | 
			
		||||
  'domain-agnostic-software': any[];
 | 
			
		||||
  scenarios?: any[]; // Optional for AI processing
 | 
			
		||||
  skill_levels: any;
 | 
			
		||||
  // Enhanced context for micro-tasks
 | 
			
		||||
  domain_relationships: DomainRelationship[];
 | 
			
		||||
  phase_dependencies: PhaseDependency[];
 | 
			
		||||
  tool_compatibility_matrix: CompatibilityMatrix[];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface DomainRelationship {
 | 
			
		||||
  domain_id: string;
 | 
			
		||||
  tool_count: number;
 | 
			
		||||
  common_tags: string[];
 | 
			
		||||
  skill_distribution: Record<string, number>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface PhaseDependency {
 | 
			
		||||
  phase_id: string;
 | 
			
		||||
  order: number;
 | 
			
		||||
  depends_on: string | null;
 | 
			
		||||
  enables: string | null;
 | 
			
		||||
  is_parallel_capable: boolean;
 | 
			
		||||
  typical_duration: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface CompatibilityMatrix {
 | 
			
		||||
  type: string;
 | 
			
		||||
  groups: Record<string, string[]>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let cachedData: ToolsData | null = null;
 | 
			
		||||
let cachedRandomizedData: ToolsData | null = null;
 | 
			
		||||
let cachedCompressedData: CompressedToolsData | null = null;
 | 
			
		||||
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
 | 
			
		||||
let lastRandomizationDate: string | null = null;
 | 
			
		||||
let dataVersion: string | null = null;
 | 
			
		||||
 | 
			
		||||
function seededRandom(seed: number): () => number {
 | 
			
		||||
  let x = Math.sin(seed) * 10000;
 | 
			
		||||
@ -91,6 +135,115 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
 | 
			
		||||
  return shuffled;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function generateDataVersion(data: any): string {
 | 
			
		||||
  const str = JSON.stringify(data, Object.keys(data).sort());
 | 
			
		||||
  let hash = 0;
 | 
			
		||||
  for (let i = 0; i < str.length; i++) {
 | 
			
		||||
    const char = str.charCodeAt(i);
 | 
			
		||||
    hash = ((hash << 5) - hash) + char;
 | 
			
		||||
    hash = hash & hash;
 | 
			
		||||
  }
 | 
			
		||||
  return Math.abs(hash).toString(36);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Enhanced: Generate domain relationships for better AI understanding
 | 
			
		||||
function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
 | 
			
		||||
  const relationships: DomainRelationship[] = [];
 | 
			
		||||
  
 | 
			
		||||
  for (const domain of domains) {
 | 
			
		||||
    const domainTools = tools.filter(tool => 
 | 
			
		||||
      tool.domains && tool.domains.includes(domain.id)
 | 
			
		||||
    );
 | 
			
		||||
    
 | 
			
		||||
    const commonTags = domainTools
 | 
			
		||||
      .flatMap(tool => tool.tags || [])
 | 
			
		||||
      .reduce((acc: any, tag: string) => {
 | 
			
		||||
        acc[tag] = (acc[tag] || 0) + 1;
 | 
			
		||||
        return acc;
 | 
			
		||||
      }, {});
 | 
			
		||||
      
 | 
			
		||||
    const topTags = Object.entries(commonTags)
 | 
			
		||||
      .sort(([,a], [,b]) => (b as number) - (a as number))
 | 
			
		||||
      .slice(0, 5)
 | 
			
		||||
      .map(([tag]) => tag);
 | 
			
		||||
    
 | 
			
		||||
    relationships.push({
 | 
			
		||||
      domain_id: domain.id,
 | 
			
		||||
      tool_count: domainTools.length,
 | 
			
		||||
      common_tags: topTags,
 | 
			
		||||
      skill_distribution: domainTools.reduce((acc: any, tool: any) => {
 | 
			
		||||
        acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
 | 
			
		||||
        return acc;
 | 
			
		||||
      }, {})
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return relationships;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Enhanced: Generate phase dependencies
 | 
			
		||||
function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
 | 
			
		||||
  const dependencies: PhaseDependency[] = [];
 | 
			
		||||
  
 | 
			
		||||
  for (let i = 0; i < phases.length; i++) {
 | 
			
		||||
    const phase = phases[i];
 | 
			
		||||
    const nextPhase = phases[i + 1];
 | 
			
		||||
    const prevPhase = phases[i - 1];
 | 
			
		||||
    
 | 
			
		||||
    dependencies.push({
 | 
			
		||||
      phase_id: phase.id,
 | 
			
		||||
      order: i + 1,
 | 
			
		||||
      depends_on: prevPhase?.id || null,
 | 
			
		||||
      enables: nextPhase?.id || null,
 | 
			
		||||
      is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
 | 
			
		||||
      typical_duration: phase.id === 'data-collection' ? 'hours-days' :
 | 
			
		||||
                       phase.id === 'examination' ? 'hours-weeks' :
 | 
			
		||||
                       phase.id === 'analysis' ? 'days-weeks' :
 | 
			
		||||
                       'hours-days'
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return dependencies;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Enhanced: Generate tool compatibility matrix
 | 
			
		||||
function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
 | 
			
		||||
  const matrix: CompatibilityMatrix[] = [];
 | 
			
		||||
  
 | 
			
		||||
  // Group tools by common characteristics
 | 
			
		||||
  const platformGroups = tools.reduce((acc: any, tool: any) => {
 | 
			
		||||
    if (tool.platforms) {
 | 
			
		||||
      tool.platforms.forEach((platform: string) => {
 | 
			
		||||
        if (!acc[platform]) acc[platform] = [];
 | 
			
		||||
        acc[platform].push(tool.name);
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
    return acc;
 | 
			
		||||
  }, {});
 | 
			
		||||
  
 | 
			
		||||
  const phaseGroups = tools.reduce((acc: any, tool: any) => {
 | 
			
		||||
    if (tool.phases) {
 | 
			
		||||
      tool.phases.forEach((phase: string) => {
 | 
			
		||||
        if (!acc[phase]) acc[phase] = [];
 | 
			
		||||
        acc[phase].push(tool.name);
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
    return acc;
 | 
			
		||||
  }, {});
 | 
			
		||||
  
 | 
			
		||||
  matrix.push({
 | 
			
		||||
    type: 'platform_compatibility',
 | 
			
		||||
    groups: platformGroups
 | 
			
		||||
  });
 | 
			
		||||
  
 | 
			
		||||
  matrix.push({
 | 
			
		||||
    type: 'phase_synergy',
 | 
			
		||||
    groups: phaseGroups
 | 
			
		||||
  });
 | 
			
		||||
  
 | 
			
		||||
  return matrix;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function loadRawData(): Promise<ToolsData> {
 | 
			
		||||
  if (!cachedData) {
 | 
			
		||||
    const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
 | 
			
		||||
@ -99,6 +252,21 @@ async function loadRawData(): Promise<ToolsData> {
 | 
			
		||||
    
 | 
			
		||||
    try {
 | 
			
		||||
      cachedData = ToolsDataSchema.parse(rawData);
 | 
			
		||||
      
 | 
			
		||||
      // Enhanced: Add default skill level descriptions if not provided
 | 
			
		||||
      if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
 | 
			
		||||
        cachedData.skill_levels = {
 | 
			
		||||
          novice: "Minimal technical background required, guided interfaces",
 | 
			
		||||
          beginner: "Basic IT knowledge, some command-line familiarity helpful",
 | 
			
		||||
          intermediate: "Solid technical foundation, comfortable with various tools",
 | 
			
		||||
          advanced: "Extensive experience, deep technical understanding required",
 | 
			
		||||
          expert: "Specialist knowledge, cutting-edge techniques and complex scenarios"
 | 
			
		||||
        };
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      dataVersion = generateDataVersion(cachedData);
 | 
			
		||||
      console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
 | 
			
		||||
      
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('YAML validation failed:', error);
 | 
			
		||||
      throw new Error('Invalid tools.yaml structure');
 | 
			
		||||
@ -123,47 +291,88 @@ export async function getToolsData(): Promise<ToolsData> {
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    lastRandomizationDate = today;
 | 
			
		||||
    
 | 
			
		||||
    cachedCompressedData = null;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return cachedRandomizedData;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData> {
 | 
			
		||||
export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedToolsData> {
 | 
			
		||||
  if (!cachedCompressedData) {
 | 
			
		||||
    const data = await getToolsData();
 | 
			
		||||
    
 | 
			
		||||
    // Enhanced: More detailed tool information for micro-tasks
 | 
			
		||||
    const compressedTools = data.tools
 | 
			
		||||
      .filter(tool => tool.type !== 'concept') 
 | 
			
		||||
      .map(tool => {
 | 
			
		||||
        const { projectUrl, statusUrl, ...compressedTool } = tool;
 | 
			
		||||
        return compressedTool;
 | 
			
		||||
        return {
 | 
			
		||||
          ...compressedTool,
 | 
			
		||||
          // Enhanced: Add computed fields for AI
 | 
			
		||||
          is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
 | 
			
		||||
          is_open_source: tool.license && tool.license !== 'Proprietary',
 | 
			
		||||
          complexity_score: tool.skillLevel === 'expert' ? 5 :
 | 
			
		||||
                           tool.skillLevel === 'advanced' ? 4 :
 | 
			
		||||
                           tool.skillLevel === 'intermediate' ? 3 :
 | 
			
		||||
                           tool.skillLevel === 'beginner' ? 2 : 1,
 | 
			
		||||
          // Enhanced: Phase-specific suitability hints
 | 
			
		||||
          phase_suitability: tool.phases?.map(phase => ({
 | 
			
		||||
            phase,
 | 
			
		||||
            primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
 | 
			
		||||
          })) || []
 | 
			
		||||
        };
 | 
			
		||||
      });
 | 
			
		||||
    
 | 
			
		||||
    const concepts = data.tools
 | 
			
		||||
      .filter(tool => tool.type === 'concept')
 | 
			
		||||
      .map(concept => {
 | 
			
		||||
        const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
 | 
			
		||||
        return compressedConcept;
 | 
			
		||||
        return {
 | 
			
		||||
          ...compressedConcept,
 | 
			
		||||
          // Enhanced: Learning difficulty indicator
 | 
			
		||||
          learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
 | 
			
		||||
                              concept.skillLevel === 'advanced' ? 'high' :
 | 
			
		||||
                              concept.skillLevel === 'intermediate' ? 'medium' :
 | 
			
		||||
                              'low'
 | 
			
		||||
        };
 | 
			
		||||
      });
 | 
			
		||||
    
 | 
			
		||||
    // Enhanced: Add rich context data
 | 
			
		||||
    const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
 | 
			
		||||
    const phaseDependencies = generatePhaseDependencies(data.phases);
 | 
			
		||||
    const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
 | 
			
		||||
    
 | 
			
		||||
    cachedCompressedData = {
 | 
			
		||||
      tools: compressedTools,
 | 
			
		||||
      concepts: concepts,
 | 
			
		||||
      domains: data.domains,
 | 
			
		||||
      phases: data.phases,
 | 
			
		||||
      'domain-agnostic-software': data['domain-agnostic-software']
 | 
			
		||||
      // scenarios intentionally excluded from AI data
 | 
			
		||||
      'domain-agnostic-software': data['domain-agnostic-software'],
 | 
			
		||||
      scenarios: data.scenarios, // Include scenarios for context
 | 
			
		||||
      skill_levels: data.skill_levels || {},
 | 
			
		||||
      // Enhanced context for micro-tasks
 | 
			
		||||
      domain_relationships: domainRelationships,
 | 
			
		||||
      phase_dependencies: phaseDependencies,
 | 
			
		||||
      tool_compatibility_matrix: toolCompatibilityMatrix
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
 | 
			
		||||
    console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return cachedCompressedData;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function getDataVersion(): string | null {
 | 
			
		||||
  return dataVersion;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function clearCache(): void {
 | 
			
		||||
  cachedData = null;
 | 
			
		||||
  cachedRandomizedData = null;
 | 
			
		||||
  cachedCompressedData = null;
 | 
			
		||||
  lastRandomizationDate = null;
 | 
			
		||||
  dataVersion = null;
 | 
			
		||||
  
 | 
			
		||||
  console.log('[DATA SERVICE] Enhanced cache cleared');
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										267
									
								
								src/utils/embeddings.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										267
									
								
								src/utils/embeddings.ts
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,267 @@
 | 
			
		||||
// src/utils/embeddings.ts
 | 
			
		||||
import { promises as fs } from 'fs';
 | 
			
		||||
import path from 'path';
 | 
			
		||||
import { getCompressedToolsDataForAI } from './dataService.js';
 | 
			
		||||
 | 
			
		||||
interface EmbeddingData {
 | 
			
		||||
  id: string;
 | 
			
		||||
  type: 'tool' | 'concept';
 | 
			
		||||
  name: string;
 | 
			
		||||
  content: string;
 | 
			
		||||
  embedding: number[];
 | 
			
		||||
  metadata: {
 | 
			
		||||
    domains?: string[];
 | 
			
		||||
    phases?: string[];
 | 
			
		||||
    tags?: string[];
 | 
			
		||||
    skillLevel?: string;
 | 
			
		||||
    type?: string;
 | 
			
		||||
  };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface EmbeddingsDatabase {
 | 
			
		||||
  version: string;
 | 
			
		||||
  lastUpdated: number;
 | 
			
		||||
  embeddings: EmbeddingData[];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
class EmbeddingsService {
 | 
			
		||||
  private embeddings: EmbeddingData[] = [];
 | 
			
		||||
  private isInitialized = false;
 | 
			
		||||
  private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
 | 
			
		||||
  private readonly batchSize: number;
 | 
			
		||||
  private readonly batchDelay: number;
 | 
			
		||||
  private readonly enabled: boolean;
 | 
			
		||||
 | 
			
		||||
  constructor() {
 | 
			
		||||
    this.enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
 | 
			
		||||
    this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
 | 
			
		||||
    this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async initialize(): Promise<void> {
 | 
			
		||||
    if (!this.enabled) {
 | 
			
		||||
      console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
 | 
			
		||||
      return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      console.log('[EMBEDDINGS] Initializing embeddings system...');
 | 
			
		||||
      
 | 
			
		||||
      // Create data directory if it doesn't exist
 | 
			
		||||
      await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
 | 
			
		||||
      
 | 
			
		||||
      const toolsData = await getCompressedToolsDataForAI();
 | 
			
		||||
      const currentDataHash = this.hashData(toolsData);
 | 
			
		||||
      
 | 
			
		||||
      // Try to load existing embeddings
 | 
			
		||||
      const existingEmbeddings = await this.loadEmbeddings();
 | 
			
		||||
      
 | 
			
		||||
      if (existingEmbeddings && existingEmbeddings.version === currentDataHash) {
 | 
			
		||||
        console.log('[EMBEDDINGS] Using cached embeddings');
 | 
			
		||||
        this.embeddings = existingEmbeddings.embeddings;
 | 
			
		||||
      } else {
 | 
			
		||||
        console.log('[EMBEDDINGS] Generating new embeddings...');
 | 
			
		||||
        await this.generateEmbeddings(toolsData, currentDataHash);
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      this.isInitialized = true;
 | 
			
		||||
      console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings`);
 | 
			
		||||
      
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[EMBEDDINGS] Failed to initialize:', error);
 | 
			
		||||
      this.isInitialized = false;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private hashData(data: any): string {
 | 
			
		||||
    return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
 | 
			
		||||
    try {
 | 
			
		||||
      const data = await fs.readFile(this.embeddingsPath, 'utf8');
 | 
			
		||||
      return JSON.parse(data);
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.log('[EMBEDDINGS] No existing embeddings found');
 | 
			
		||||
      return null;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async saveEmbeddings(version: string): Promise<void> {
 | 
			
		||||
    const database: EmbeddingsDatabase = {
 | 
			
		||||
      version,
 | 
			
		||||
      lastUpdated: Date.now(),
 | 
			
		||||
      embeddings: this.embeddings
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
 | 
			
		||||
    console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private createContentString(item: any): string {
 | 
			
		||||
    const parts = [
 | 
			
		||||
      item.name,
 | 
			
		||||
      item.description || '',
 | 
			
		||||
      ...(item.tags || []),
 | 
			
		||||
      ...(item.domains || []),
 | 
			
		||||
      ...(item.phases || [])
 | 
			
		||||
    ];
 | 
			
		||||
    
 | 
			
		||||
    return parts.filter(Boolean).join(' ').toLowerCase();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
 | 
			
		||||
    const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
 | 
			
		||||
    const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
 | 
			
		||||
    const model = process.env.AI_EMBEDDINGS_MODEL;
 | 
			
		||||
 | 
			
		||||
    if (!endpoint || !apiKey || !model) {
 | 
			
		||||
      throw new Error('Missing embeddings API configuration');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const response = await fetch(endpoint, {
 | 
			
		||||
      method: 'POST',
 | 
			
		||||
      headers: {
 | 
			
		||||
        'Content-Type': 'application/json',
 | 
			
		||||
        'Authorization': `Bearer ${apiKey}`
 | 
			
		||||
      },
 | 
			
		||||
      body: JSON.stringify({
 | 
			
		||||
        model,
 | 
			
		||||
        input: contents
 | 
			
		||||
      })
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    if (!response.ok) {
 | 
			
		||||
      const error = await response.text();
 | 
			
		||||
      throw new Error(`Embeddings API error: ${response.status} - ${error}`);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const data = await response.json();
 | 
			
		||||
    return data.data.map((item: any) => item.embedding);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private async generateEmbeddings(toolsData: any, version: string): Promise<void> {
 | 
			
		||||
    const allItems = [
 | 
			
		||||
      ...toolsData.tools.map((tool: any) => ({ ...tool, type: 'tool' })),
 | 
			
		||||
      ...toolsData.concepts.map((concept: any) => ({ ...concept, type: 'concept' }))
 | 
			
		||||
    ];
 | 
			
		||||
 | 
			
		||||
    const contents = allItems.map(item => this.createContentString(item));
 | 
			
		||||
    this.embeddings = [];
 | 
			
		||||
 | 
			
		||||
    // Process in batches to respect rate limits
 | 
			
		||||
    for (let i = 0; i < contents.length; i += this.batchSize) {
 | 
			
		||||
      const batch = contents.slice(i, i + this.batchSize);
 | 
			
		||||
      const batchItems = allItems.slice(i, i + this.batchSize);
 | 
			
		||||
      
 | 
			
		||||
      console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
 | 
			
		||||
      
 | 
			
		||||
      try {
 | 
			
		||||
        const embeddings = await this.generateEmbeddingsBatch(batch);
 | 
			
		||||
        
 | 
			
		||||
        embeddings.forEach((embedding, index) => {
 | 
			
		||||
          const item = batchItems[index];
 | 
			
		||||
          this.embeddings.push({
 | 
			
		||||
            id: `${item.type}_${item.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
 | 
			
		||||
            type: item.type,
 | 
			
		||||
            name: item.name,
 | 
			
		||||
            content: batch[index],
 | 
			
		||||
            embedding,
 | 
			
		||||
            metadata: {
 | 
			
		||||
              domains: item.domains,
 | 
			
		||||
              phases: item.phases,
 | 
			
		||||
              tags: item.tags,
 | 
			
		||||
              skillLevel: item.skillLevel,
 | 
			
		||||
              type: item.type
 | 
			
		||||
            }
 | 
			
		||||
          });
 | 
			
		||||
        });
 | 
			
		||||
        
 | 
			
		||||
        // Rate limiting delay between batches
 | 
			
		||||
        if (i + this.batchSize < contents.length) {
 | 
			
		||||
          await new Promise(resolve => setTimeout(resolve, this.batchDelay));
 | 
			
		||||
        }
 | 
			
		||||
        
 | 
			
		||||
      } catch (error) {
 | 
			
		||||
        console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
 | 
			
		||||
        throw error;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    await this.saveEmbeddings(version);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  public async embedText(text: string): Promise<number[]> {
 | 
			
		||||
    // Re‑use the private batch helper to avoid auth duplication
 | 
			
		||||
    const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
 | 
			
		||||
    return embedding;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private cosineSimilarity(a: number[], b: number[]): number {
 | 
			
		||||
    let dotProduct = 0;
 | 
			
		||||
    let normA = 0;
 | 
			
		||||
    let normB = 0;
 | 
			
		||||
    
 | 
			
		||||
    for (let i = 0; i < a.length; i++) {
 | 
			
		||||
      dotProduct += a[i] * b[i];
 | 
			
		||||
      normA += a[i] * a[i];
 | 
			
		||||
      normB += b[i] * b[i];
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<EmbeddingData[]> {
 | 
			
		||||
    if (!this.enabled || !this.isInitialized || this.embeddings.length === 0) {
 | 
			
		||||
      return [];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    try {
 | 
			
		||||
      // Generate embedding for query
 | 
			
		||||
      const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
 | 
			
		||||
      const queryEmbedding = queryEmbeddings[0];
 | 
			
		||||
 | 
			
		||||
      // Calculate similarities
 | 
			
		||||
      const similarities = this.embeddings.map(item => ({
 | 
			
		||||
        ...item,
 | 
			
		||||
        similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
 | 
			
		||||
      }));
 | 
			
		||||
 | 
			
		||||
      // Filter by threshold and sort by similarity
 | 
			
		||||
      return similarities
 | 
			
		||||
        .filter(item => item.similarity >= threshold)
 | 
			
		||||
        .sort((a, b) => b.similarity - a.similarity)
 | 
			
		||||
        .slice(0, maxResults);
 | 
			
		||||
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('[EMBEDDINGS] Failed to find similar items:', error);
 | 
			
		||||
      return [];
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  isEnabled(): boolean {
 | 
			
		||||
    return this.enabled && this.isInitialized;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  getStats(): { enabled: boolean; initialized: boolean; count: number } {
 | 
			
		||||
    return {
 | 
			
		||||
      enabled: this.enabled,
 | 
			
		||||
      initialized: this.isInitialized,
 | 
			
		||||
      count: this.embeddings.length
 | 
			
		||||
    };
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// Global instance
 | 
			
		||||
const embeddingsService = new EmbeddingsService();
 | 
			
		||||
 | 
			
		||||
export { embeddingsService, type EmbeddingData };
 | 
			
		||||
 | 
			
		||||
// Auto-initialize on import in server environment
 | 
			
		||||
if (typeof window === 'undefined' && process.env.NODE_ENV !== 'test') {
 | 
			
		||||
  embeddingsService.initialize().catch(error => {
 | 
			
		||||
    console.error('[EMBEDDINGS] Auto-initialization failed:', error);
 | 
			
		||||
  });
 | 
			
		||||
}
 | 
			
		||||
@ -1,4 +1,4 @@
 | 
			
		||||
// src/utils/rateLimitedQueue.ts
 | 
			
		||||
// src/utils/rateLimitedQueue.ts - FIXED: Memory leak and better cleanup
 | 
			
		||||
 | 
			
		||||
import dotenv from "dotenv";
 | 
			
		||||
 | 
			
		||||
@ -31,6 +31,43 @@ class RateLimitedQueue {
 | 
			
		||||
  private delayMs = RATE_LIMIT_DELAY_MS;
 | 
			
		||||
  private lastProcessedAt = 0;
 | 
			
		||||
  private currentlyProcessingTaskId: string | null = null;
 | 
			
		||||
  
 | 
			
		||||
  private cleanupInterval: NodeJS.Timeout;
 | 
			
		||||
  private readonly TASK_RETENTION_MS = 30000; 
 | 
			
		||||
 | 
			
		||||
  constructor() {
 | 
			
		||||
    this.cleanupInterval = setInterval(() => {
 | 
			
		||||
      this.cleanupOldTasks();
 | 
			
		||||
    }, 30000);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  private cleanupOldTasks(): void {
 | 
			
		||||
    const now = Date.now();
 | 
			
		||||
    const initialLength = this.tasks.length;
 | 
			
		||||
    
 | 
			
		||||
    this.tasks = this.tasks.filter(task => {
 | 
			
		||||
      if (task.status === 'queued' || task.status === 'processing') {
 | 
			
		||||
        return true;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      if (task.completedAt && (now - task.completedAt) > this.TASK_RETENTION_MS) {
 | 
			
		||||
        return false;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      return true;
 | 
			
		||||
    });
 | 
			
		||||
    
 | 
			
		||||
    const cleaned = initialLength - this.tasks.length;
 | 
			
		||||
    if (cleaned > 0) {
 | 
			
		||||
      console.log(`[QUEUE] Cleaned up ${cleaned} old tasks, ${this.tasks.length} remaining`);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  public shutdown(): void {
 | 
			
		||||
    if (this.cleanupInterval) {
 | 
			
		||||
      clearInterval(this.cleanupInterval);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  add<T>(task: Task<T>, taskId?: string): Promise<T> {
 | 
			
		||||
    const id = taskId || this.generateTaskId();
 | 
			
		||||
@ -103,7 +140,6 @@ class RateLimitedQueue {
 | 
			
		||||
            const processingOffset = processingTasks.length > 0 ? 1 : 0;
 | 
			
		||||
            status.currentPosition = processingOffset + positionInQueue + 1;
 | 
			
		||||
          }
 | 
			
		||||
        } else if (task.status === 'completed' || task.status === 'failed') {
 | 
			
		||||
        }
 | 
			
		||||
      } else {        
 | 
			
		||||
        const taskTimestamp = taskId.match(/ai_(\d+)_/)?.[1];
 | 
			
		||||
@ -152,7 +188,6 @@ class RateLimitedQueue {
 | 
			
		||||
        this.currentlyProcessingTaskId = nextTask.id;
 | 
			
		||||
        this.lastProcessedAt = Date.now();
 | 
			
		||||
        
 | 
			
		||||
        
 | 
			
		||||
        try {
 | 
			
		||||
          await nextTask.task();
 | 
			
		||||
          nextTask.status = 'completed';
 | 
			
		||||
@ -166,14 +201,6 @@ class RateLimitedQueue {
 | 
			
		||||
        
 | 
			
		||||
        this.currentlyProcessingTaskId = null;
 | 
			
		||||
        
 | 
			
		||||
        setTimeout(() => {
 | 
			
		||||
          const index = this.tasks.findIndex(t => t.id === nextTask.id);
 | 
			
		||||
          if (index >= 0) {
 | 
			
		||||
            console.log(`[QUEUE] Removing completed task ${nextTask.id}`);
 | 
			
		||||
            this.tasks.splice(index, 1);
 | 
			
		||||
          }
 | 
			
		||||
        }, 10000); 
 | 
			
		||||
        
 | 
			
		||||
        const hasMoreQueued = this.tasks.some(t => t.status === 'queued');
 | 
			
		||||
        if (hasMoreQueued) {
 | 
			
		||||
          console.log(`[QUEUE] Waiting ${this.delayMs}ms before next task`);
 | 
			
		||||
@ -201,4 +228,8 @@ export function getQueueStatus(taskId?: string): QueueStatus {
 | 
			
		||||
  return queue.getStatus(taskId);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export function shutdownQueue(): void {
 | 
			
		||||
  queue.shutdown();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export default queue;
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user