Merge branch 'main' of https://git.cc24.dev/HSMW_CC24/forensic-pathways
This commit is contained in:
commit
3973479ae4
84
.env.example
84
.env.example
@ -2,34 +2,74 @@
|
|||||||
# ForensicPathways Environment Configuration
|
# ForensicPathways Environment Configuration
|
||||||
# ===========================================
|
# ===========================================
|
||||||
|
|
||||||
# Authentication & OIDC (Required)
|
# === Authentication Configuration ===
|
||||||
AUTH_SECRET=change-this-to-a-strong-secret-key-in-production
|
AUTHENTICATION_NECESSARY=false
|
||||||
|
AUTHENTICATION_NECESSARY_CONTRIBUTIONS=false
|
||||||
|
AUTHENTICATION_NECESSARY_AI=false
|
||||||
|
AUTH_SECRET=your-secret-key-change-in-production
|
||||||
|
|
||||||
|
# OIDC Configuration (if authentication enabled)
|
||||||
OIDC_ENDPOINT=https://your-oidc-provider.com
|
OIDC_ENDPOINT=https://your-oidc-provider.com
|
||||||
OIDC_CLIENT_ID=your-oidc-client-id
|
OIDC_CLIENT_ID=your-client-id
|
||||||
OIDC_CLIENT_SECRET=your-oidc-client-secret
|
OIDC_CLIENT_SECRET=your-client-secret
|
||||||
|
|
||||||
# Auth Scopes - set to true in prod
|
# ===================================================================
|
||||||
AUTHENTICATION_NECESSARY_CONTRIBUTIONS=true
|
# AI CONFIGURATION - Complete Reference for Improved Pipeline
|
||||||
AUTHENTICATION_NECESSARY_AI=true
|
# ===================================================================
|
||||||
|
|
||||||
# Application Configuration (Required)
|
# === CORE AI ENDPOINTS & MODELS ===
|
||||||
PUBLIC_BASE_URL=https://your-domain.com
|
AI_API_ENDPOINT=https://llm.mikoshi.de
|
||||||
NODE_ENV=production
|
AI_API_KEY=sREDACTED3w
|
||||||
|
AI_MODEL='mistral/mistral-small-latest'
|
||||||
|
|
||||||
# AI Service Configuration (Required for AI features)
|
# === IMPROVED PIPELINE: Use separate analyzer model (mistral-small is fine) ===
|
||||||
AI_MODEL=mistral-large-latest
|
AI_ANALYZER_ENDPOINT=https://llm.mikoshi.de
|
||||||
AI_API_ENDPOINT=https://api.mistral.ai
|
AI_ANALYZER_API_KEY=skREDACTEDw3w
|
||||||
AI_API_KEY=your-mistral-api-key
|
AI_ANALYZER_MODEL='mistral/mistral-small-latest'
|
||||||
AI_RATE_LIMIT_DELAY_MS=1000
|
|
||||||
|
|
||||||
# Git Integration (Required for contributions)
|
# === EMBEDDINGS CONFIGURATION ===
|
||||||
GIT_REPO_URL=https://git.cc24.dev/mstoeck3/forensic-pathways
|
AI_EMBEDDINGS_ENABLED=true
|
||||||
GIT_PROVIDER=gitea
|
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
|
||||||
GIT_API_ENDPOINT=https://git.cc24.dev/api/v1
|
AI_EMBEDDINGS_API_KEY=ZREDACTED3wL
|
||||||
GIT_API_TOKEN=your-git-api-token
|
AI_EMBEDDINGS_MODEL=mistral-embed
|
||||||
|
AI_EMBEDDINGS_BATCH_SIZE=20
|
||||||
|
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
|
||||||
|
|
||||||
# File Upload Configuration (Optional)
|
# === PIPELINE: VectorIndex (HNSW) Configuration ===
|
||||||
LOCAL_UPLOAD_PATH=./public/uploads
|
AI_MAX_SELECTED_ITEMS=60 # Tools visible to each micro-task
|
||||||
|
AI_EMBEDDING_CANDIDATES=60 # VectorIndex candidates (HNSW is more efficient)
|
||||||
|
AI_SIMILARITY_THRESHOLD=0.3 # Not used by VectorIndex (uses cosine distance internally)
|
||||||
|
|
||||||
|
# === MICRO-TASK CONFIGURATION ===
|
||||||
|
AI_MICRO_TASK_DELAY_MS=500 # Delay between micro-tasks
|
||||||
|
AI_MICRO_TASK_TIMEOUT_MS=25000 # Timeout per micro-task (increased for full context)
|
||||||
|
|
||||||
|
# === RATE LIMITING ===
|
||||||
|
AI_RATE_LIMIT_DELAY_MS=3000 # Main rate limit delay
|
||||||
|
AI_RATE_LIMIT_MAX_REQUESTS=6 # Main requests per minute (reduced - fewer but richer calls)
|
||||||
|
AI_MICRO_TASK_RATE_LIMIT=15 # Micro-task requests per minute (was 30)
|
||||||
|
|
||||||
|
# === QUEUE MANAGEMENT ===
|
||||||
|
AI_QUEUE_MAX_SIZE=50
|
||||||
|
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
|
||||||
|
|
||||||
|
# === PERFORMANCE & MONITORING ===
|
||||||
|
AI_MICRO_TASK_DEBUG=true
|
||||||
|
AI_PERFORMANCE_METRICS=true
|
||||||
|
AI_RESPONSE_CACHE_TTL_MS=3600000
|
||||||
|
|
||||||
|
# ===================================================================
|
||||||
|
# LEGACY VARIABLES (still used but less important)
|
||||||
|
# ===================================================================
|
||||||
|
|
||||||
|
# These are still used by other parts of the system:
|
||||||
|
AI_RESPONSE_CACHE_TTL_MS=3600000 # For caching responses
|
||||||
|
AI_QUEUE_MAX_SIZE=50 # Queue management
|
||||||
|
AI_QUEUE_CLEANUP_INTERVAL_MS=300000 # Queue cleanup
|
||||||
|
|
||||||
|
# === Application Configuration ===
|
||||||
|
PUBLIC_BASE_URL=http://localhost:4321
|
||||||
|
NODE_ENV=development
|
||||||
|
|
||||||
# Nextcloud Integration (Optional)
|
# Nextcloud Integration (Optional)
|
||||||
NEXTCLOUD_ENDPOINT=https://your-nextcloud.com
|
NEXTCLOUD_ENDPOINT=https://your-nextcloud.com
|
||||||
|
358
RAG-Roadmap.md
Normal file
358
RAG-Roadmap.md
Normal file
@ -0,0 +1,358 @@
|
|||||||
|
# Forensic-Grade RAG Implementation Roadmap
|
||||||
|
|
||||||
|
## Context & Current State Analysis
|
||||||
|
|
||||||
|
You have access to a forensic tools recommendation system built with:
|
||||||
|
- **Embeddings-based retrieval** (src/utils/embeddings.ts)
|
||||||
|
- **Multi-stage AI pipeline** (src/utils/aiPipeline.ts)
|
||||||
|
- **Micro-task processing** for detailed analysis
|
||||||
|
- **Rate limiting and queue management** (src/utils/rateLimitedQueue.ts)
|
||||||
|
- **YAML-based tool database** (src/data/tools.yaml)
|
||||||
|
|
||||||
|
**Current Architecture**: Basic RAG (Retrieve → AI Selection → Micro-task Generation)
|
||||||
|
|
||||||
|
**Target Architecture**: Forensic-Grade RAG with transparency, objectivity, and reproducibility
|
||||||
|
|
||||||
|
## Implementation Roadmap
|
||||||
|
|
||||||
|
### PHASE 1: Configuration Externalization & AI Architecture Enhancement (Weeks 1-2)
|
||||||
|
|
||||||
|
#### 1.1 Complete Configuration Externalization
|
||||||
|
**Objective**: Remove all hard-coded values from codebase (except AI prompts)
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Create comprehensive configuration schema** in `src/config/`
|
||||||
|
- `forensic-scoring.yaml` - All scoring criteria, weights, thresholds
|
||||||
|
- `ai-models.yaml` - AI model configurations and routing
|
||||||
|
- `system-parameters.yaml` - Rate limits, queue settings, processing parameters
|
||||||
|
- `validation-criteria.yaml` - Expert validation rules, bias detection parameters
|
||||||
|
|
||||||
|
2. **Implement configuration loader** (`src/utils/configLoader.ts`)
|
||||||
|
- Hot-reload capability for configuration changes
|
||||||
|
- Environment-specific overrides (dev/staging/prod)
|
||||||
|
- Configuration validation and schema enforcement
|
||||||
|
- Default fallbacks for missing values
|
||||||
|
|
||||||
|
3. **Audit existing codebase** for hard-coded values:
|
||||||
|
- Search for literal numbers, strings, arrays in TypeScript files
|
||||||
|
- Extract to configuration files with meaningful names
|
||||||
|
- Ensure all thresholds (similarity scores, rate limits, token counts) are configurable
|
||||||
|
|
||||||
|
#### 1.2 Dual AI Model Architecture Implementation
|
||||||
|
**Objective**: Implement large + small model strategy for optimal cost/performance
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Extend environment configuration**:
|
||||||
|
```
|
||||||
|
# Strategic Analysis Model (Large, Few Tokens)
|
||||||
|
AI_STRATEGIC_ENDPOINT=
|
||||||
|
AI_STRATEGIC_API_KEY=
|
||||||
|
AI_STRATEGIC_MODEL=mistral-large-latest
|
||||||
|
AI_STRATEGIC_MAX_TOKENS=500
|
||||||
|
AI_STRATEGIC_CONTEXT_WINDOW=32000
|
||||||
|
|
||||||
|
# Content Generation Model (Small, Many Tokens)
|
||||||
|
AI_CONTENT_ENDPOINT=
|
||||||
|
AI_CONTENT_API_KEY=
|
||||||
|
AI_CONTENT_MODEL=mistral-small-latest
|
||||||
|
AI_CONTENT_MAX_TOKENS=2000
|
||||||
|
AI_CONTENT_CONTEXT_WINDOW=8000
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Create AI router** (`src/utils/aiRouter.ts`):
|
||||||
|
- Route different task types to appropriate models
|
||||||
|
- **Strategic tasks** → Large model: tool selection, bias analysis, methodology decisions
|
||||||
|
- **Content tasks** → Small model: descriptions, explanations, micro-task outputs
|
||||||
|
- Automatic fallback logic if primary model fails
|
||||||
|
- Usage tracking and cost optimization
|
||||||
|
|
||||||
|
3. **Update aiPipeline.ts**:
|
||||||
|
- Replace single `callAI()` method with task-specific methods
|
||||||
|
- Implement intelligent routing based on task complexity
|
||||||
|
- Add token estimation for optimal model selection
|
||||||
|
|
||||||
|
### PHASE 2: Evidence-Based Scoring Framework (Weeks 3-5)
|
||||||
|
|
||||||
|
#### 2.1 Forensic Scoring Engine Implementation
|
||||||
|
**Objective**: Replace subjective AI selection with objective, measurable criteria
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Create scoring framework** (`src/scoring/ForensicScorer.ts`):
|
||||||
|
```typescript
|
||||||
|
interface ScoringCriterion {
|
||||||
|
name: string;
|
||||||
|
weight: number;
|
||||||
|
methodology: string;
|
||||||
|
dataSources: string[];
|
||||||
|
calculator: (tool: Tool, scenario: Scenario) => Promise<CriterionScore>;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CriterionScore {
|
||||||
|
value: number; // 0-100
|
||||||
|
confidence: number; // 0-100
|
||||||
|
evidence: Evidence[];
|
||||||
|
lastUpdated: Date;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Implement core scoring criteria**:
|
||||||
|
- **Court Admissibility Scorer**: Based on legal precedent database
|
||||||
|
- **Scientific Validity Scorer**: Based on peer-reviewed research citations
|
||||||
|
- **Methodology Alignment Scorer**: NIST SP 800-86 compliance assessment
|
||||||
|
- **Expert Consensus Scorer**: Practitioner survey data integration
|
||||||
|
- **Error Rate Scorer**: Known false positive/negative rates
|
||||||
|
|
||||||
|
3. **Build evidence provenance system**:
|
||||||
|
- Track source of every score component
|
||||||
|
- Maintain citation database for all claims
|
||||||
|
- Version control for scoring methodologies
|
||||||
|
- Automatic staleness detection for outdated evidence
|
||||||
|
|
||||||
|
#### 2.2 Deterministic Core Implementation
|
||||||
|
**Objective**: Ensure reproducible results for identical inputs
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Implement deterministic pipeline** (`src/analysis/DeterministicAnalyzer.ts`):
|
||||||
|
- Rule-based scenario classification (SCADA/Mobile/Network/etc.)
|
||||||
|
- Mathematical scoring combination (weighted averages, not AI decisions)
|
||||||
|
- Consistent tool ranking algorithms
|
||||||
|
- Reproducibility validation tests
|
||||||
|
|
||||||
|
2. **Add AI enhancement layer**:
|
||||||
|
- AI provides explanations, NOT decisions
|
||||||
|
- AI generates workflow descriptions based on deterministic selections
|
||||||
|
- AI creates contextual advice around objective tool choices
|
||||||
|
|
||||||
|
### PHASE 3: Transparency & Audit Trail System (Weeks 4-6)
|
||||||
|
|
||||||
|
#### 3.1 Complete Audit Trail Implementation
|
||||||
|
**Objective**: Track every decision with forensic-grade documentation
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Create audit framework** (`src/audit/AuditTrail.ts`):
|
||||||
|
```typescript
|
||||||
|
interface ForensicAuditTrail {
|
||||||
|
queryId: string;
|
||||||
|
userQuery: string;
|
||||||
|
processingSteps: AuditStep[];
|
||||||
|
finalRecommendation: RecommendationWithEvidence;
|
||||||
|
reproducibilityHash: string;
|
||||||
|
validationStatus: ValidationStatus;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AuditStep {
|
||||||
|
stepName: string;
|
||||||
|
input: any;
|
||||||
|
methodology: string;
|
||||||
|
output: any;
|
||||||
|
evidence: Evidence[];
|
||||||
|
confidence: number;
|
||||||
|
processingTime: number;
|
||||||
|
modelUsed?: string;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Implement evidence citation system**:
|
||||||
|
- Automatic citation generation for all claims
|
||||||
|
- Link to source standards (NIST, ISO, RFC)
|
||||||
|
- Reference scientific papers for methodology choices
|
||||||
|
- Track expert validation contributors
|
||||||
|
|
||||||
|
3. **Build explanation generator**:
|
||||||
|
- Human-readable reasoning for every recommendation
|
||||||
|
- "Why this tool" and "Why not alternatives" explanations
|
||||||
|
- Confidence level communication
|
||||||
|
- Uncertainty quantification
|
||||||
|
|
||||||
|
#### 3.2 Bias Detection & Mitigation System
|
||||||
|
**Objective**: Actively detect and correct recommendation biases
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Implement bias detection** (`src/bias/BiasDetector.ts`):
|
||||||
|
- **Popularity bias**: Over-recommendation of well-known tools
|
||||||
|
- **Availability bias**: Preference for easily accessible tools
|
||||||
|
- **Recency bias**: Over-weighting of newest tools
|
||||||
|
- **Cultural bias**: Platform or methodology preferences
|
||||||
|
|
||||||
|
2. **Create mitigation strategies**:
|
||||||
|
- Automatic bias adjustment algorithms
|
||||||
|
- Diversity requirements for recommendations
|
||||||
|
- Fairness metrics across tool categories
|
||||||
|
- Bias reporting in audit trails
|
||||||
|
|
||||||
|
### PHASE 4: Expert Validation & Learning System (Weeks 6-8)
|
||||||
|
|
||||||
|
#### 4.1 Expert Review Integration
|
||||||
|
**Objective**: Enable forensic experts to validate and improve recommendations
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Build expert validation interface** (`src/validation/ExpertReview.ts`):
|
||||||
|
- Structured feedback collection from forensic practitioners
|
||||||
|
- Agreement/disagreement tracking with detailed reasoning
|
||||||
|
- Expert consensus building over time
|
||||||
|
- Minority opinion preservation
|
||||||
|
|
||||||
|
2. **Implement validation loop**:
|
||||||
|
- Flag recommendations requiring expert review
|
||||||
|
- Track expert validation rates and patterns
|
||||||
|
- Update scoring based on real-world feedback
|
||||||
|
- Methodology improvement based on expert input
|
||||||
|
|
||||||
|
#### 4.2 Real-World Case Learning
|
||||||
|
**Objective**: Learn from actual forensic investigations
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Create case study integration** (`src/learning/CaseStudyLearner.ts`):
|
||||||
|
- Anonymous case outcome tracking
|
||||||
|
- Tool effectiveness measurement in real scenarios
|
||||||
|
- Methodology success/failure analysis
|
||||||
|
- Continuous improvement based on field results
|
||||||
|
|
||||||
|
2. **Implement feedback loops**:
|
||||||
|
- Post-case recommendation validation
|
||||||
|
- Tool performance tracking in actual investigations
|
||||||
|
- Methodology refinement based on outcomes
|
||||||
|
- Success rate improvement over time
|
||||||
|
|
||||||
|
### PHASE 5: Advanced Features & Scientific Rigor (Weeks 7-10)
|
||||||
|
|
||||||
|
#### 5.1 Confidence & Uncertainty Quantification
|
||||||
|
**Objective**: Provide scientific confidence levels for all recommendations
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Implement uncertainty quantification** (`src/uncertainty/ConfidenceCalculator.ts`):
|
||||||
|
- Statistical confidence intervals for scores
|
||||||
|
- Uncertainty propagation through scoring pipeline
|
||||||
|
- Risk assessment for recommendation reliability
|
||||||
|
- Alternative recommendation ranking
|
||||||
|
|
||||||
|
2. **Add fallback recommendation system**:
|
||||||
|
- Multiple ranked alternatives for each recommendation
|
||||||
|
- Contingency planning for tool failures
|
||||||
|
- Risk-based recommendation portfolios
|
||||||
|
- Sensitivity analysis for critical decisions
|
||||||
|
|
||||||
|
#### 5.2 Reproducibility Testing Framework
|
||||||
|
**Objective**: Ensure consistent results across time and implementations
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Build reproducibility testing** (`src/testing/ReproducibilityTester.ts`):
|
||||||
|
- Automated consistency validation
|
||||||
|
- Inter-rater reliability testing
|
||||||
|
- Cross-temporal stability analysis
|
||||||
|
- Version control for methodology changes
|
||||||
|
|
||||||
|
2. **Implement quality assurance**:
|
||||||
|
- Continuous integration for reproducibility
|
||||||
|
- Regression testing for methodology changes
|
||||||
|
- Performance monitoring for consistency
|
||||||
|
- Alert system for unexpected variations
|
||||||
|
|
||||||
|
### PHASE 6: Integration & Production Readiness (Weeks 9-12)
|
||||||
|
|
||||||
|
#### 6.1 System Integration
|
||||||
|
**Objective**: Integrate all forensic-grade components seamlessly
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Update existing components**:
|
||||||
|
- Modify `aiPipeline.ts` to use new scoring framework
|
||||||
|
- Update `embeddings.ts` with evidence tracking
|
||||||
|
- Enhance `rateLimitedQueue.ts` with audit capabilities
|
||||||
|
- Refactor `query.ts` API to return audit trails
|
||||||
|
|
||||||
|
2. **Performance optimization**:
|
||||||
|
- Caching strategies for expensive evidence lookups
|
||||||
|
- Parallel processing for scoring criteria
|
||||||
|
- Efficient storage for audit trails
|
||||||
|
- Load balancing for dual AI models
|
||||||
|
|
||||||
|
#### 6.2 Production Features
|
||||||
|
**Objective**: Make system ready for professional forensic use
|
||||||
|
|
||||||
|
**Tasks**:
|
||||||
|
1. **Add professional features**:
|
||||||
|
- Export recommendations to forensic report formats
|
||||||
|
- Integration with existing forensic workflows
|
||||||
|
- Batch processing for multiple scenarios
|
||||||
|
- API endpoints for external tool integration
|
||||||
|
|
||||||
|
2. **Implement monitoring & maintenance**:
|
||||||
|
- Health checks for all system components
|
||||||
|
- Performance monitoring for response times
|
||||||
|
- Error tracking and alerting
|
||||||
|
- Automatic system updates for new evidence
|
||||||
|
|
||||||
|
## Technical Implementation Guidelines
|
||||||
|
|
||||||
|
### Configuration Management
|
||||||
|
- Use YAML files for human-readable configuration
|
||||||
|
- Implement JSON Schema validation for all config files
|
||||||
|
- Support environment variable overrides
|
||||||
|
- Hot-reload for development, restart for production changes
|
||||||
|
|
||||||
|
### AI Model Routing Strategy
|
||||||
|
```typescript
|
||||||
|
// Task Classification for Model Selection
|
||||||
|
const AI_TASK_ROUTING = {
|
||||||
|
strategic: ['tool-selection', 'bias-analysis', 'methodology-decisions'],
|
||||||
|
content: ['descriptions', 'explanations', 'micro-tasks', 'workflows']
|
||||||
|
};
|
||||||
|
|
||||||
|
// Cost Optimization Logic
|
||||||
|
if (taskComplexity === 'high' && responseTokens < 500) {
|
||||||
|
useModel = 'large';
|
||||||
|
} else if (taskComplexity === 'low' && responseTokens > 1000) {
|
||||||
|
useModel = 'small';
|
||||||
|
} else {
|
||||||
|
useModel = config.defaultModel;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Evidence Database Structure
|
||||||
|
```typescript
|
||||||
|
interface EvidenceSource {
|
||||||
|
type: 'standard' | 'paper' | 'case-law' | 'expert-survey';
|
||||||
|
citation: string;
|
||||||
|
reliability: number;
|
||||||
|
lastValidated: Date;
|
||||||
|
content: string;
|
||||||
|
metadata: Record<string, any>;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Quality Assurance Requirements
|
||||||
|
- All scoring criteria must have documented methodologies
|
||||||
|
- Every recommendation must include confidence levels
|
||||||
|
- All AI-generated content must be marked as such
|
||||||
|
- Reproducibility tests must pass with >95% consistency
|
||||||
|
- Expert validation rate must exceed 80% for production use
|
||||||
|
|
||||||
|
## Success Metrics
|
||||||
|
|
||||||
|
### Forensic Quality Metrics
|
||||||
|
- **Transparency**: 100% of decisions traceable to evidence
|
||||||
|
- **Objectivity**: <5% variance in scoring between runs
|
||||||
|
- **Reproducibility**: >95% identical results for identical inputs
|
||||||
|
- **Expert Agreement**: >80% expert validation rate
|
||||||
|
- **Bias Reduction**: <10% bias score across all categories
|
||||||
|
|
||||||
|
### Performance Metrics
|
||||||
|
- **Response Time**: <30 seconds for workflow recommendations
|
||||||
|
- **Accuracy**: >90% real-world case validation success
|
||||||
|
- **Coverage**: Support for >95% of common forensic scenarios
|
||||||
|
- **Reliability**: <1% system error rate
|
||||||
|
- **Cost Efficiency**: <50% cost reduction vs. single large model
|
||||||
|
|
||||||
|
## Risk Mitigation
|
||||||
|
|
||||||
|
### Technical Risks
|
||||||
|
- **AI Model Failures**: Implement robust fallback mechanisms
|
||||||
|
- **Configuration Errors**: Comprehensive validation and testing
|
||||||
|
- **Performance Issues**: Load testing and optimization
|
||||||
|
- **Data Corruption**: Backup and recovery procedures
|
||||||
|
|
||||||
|
### Forensic Risks
|
||||||
|
- **Bias Introduction**: Continuous monitoring and expert validation
|
||||||
|
- **Methodology Errors**: Peer review and scientific validation
|
||||||
|
- **Legal Challenges**: Ensure compliance with admissibility standards
|
||||||
|
- **Expert Disagreement**: Transparent uncertainty communication
|
File diff suppressed because it is too large
Load Diff
@ -91,19 +91,27 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<!-- Advanced Filters Section -->
|
<!-- Advanced Filters Section - COLLAPSIBLE -->
|
||||||
<div class="filter-section">
|
<div class="filter-section">
|
||||||
<div class="filter-card-compact">
|
<div class="filter-card-compact">
|
||||||
<div class="filter-header-compact">
|
<div class="filter-header-compact">
|
||||||
<h3>⚙️ Erweiterte Filter</h3>
|
<h3>⚙️ Erweiterte Filter</h3>
|
||||||
|
<div class="filter-header-controls">
|
||||||
<button class="filter-reset" id="reset-advanced" title="Erweiterte Filter zurücksetzen">
|
<button class="filter-reset" id="reset-advanced" title="Erweiterte Filter zurücksetzen">
|
||||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||||
<polyline points="1 4 1 10 7 10"/>
|
<polyline points="1 4 1 10 7 10"/>
|
||||||
<path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
|
<path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
|
||||||
</svg>
|
</svg>
|
||||||
</button>
|
</button>
|
||||||
|
<button class="collapse-toggle" id="toggle-advanced" data-collapsed="true" title="Erweiterte Filter ein/ausblenden">
|
||||||
|
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||||
|
<polyline points="6 9 12 15 18 9"></polyline>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="collapsible-content hidden" id="advanced-filters-content">
|
||||||
<div class="advanced-filters-compact">
|
<div class="advanced-filters-compact">
|
||||||
<div class="filter-grid-compact">
|
<div class="filter-grid-compact">
|
||||||
<div class="filter-group">
|
<div class="filter-group">
|
||||||
@ -171,27 +179,36 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Tag Filters Section -->
|
<!-- Tag Filters Section - COLLAPSIBLE -->
|
||||||
<div class="filter-section">
|
<div class="filter-section">
|
||||||
<div class="filter-card-compact">
|
<div class="filter-card-compact">
|
||||||
<div class="filter-header-compact">
|
<div class="filter-header-compact">
|
||||||
<h3>🏷️ Tag-Filter</h3>
|
<h3>🏷️ Tag-Filter</h3>
|
||||||
<div class="tag-controls">
|
<div class="filter-header-controls">
|
||||||
<button class="filter-reset" id="reset-tags" title="Tags zurücksetzen">
|
<button class="filter-reset" id="reset-tags" title="Tags zurücksetzen">
|
||||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||||
<polyline points="1 4 1 10 7 10"/>
|
<polyline points="1 4 1 10 7 10"/>
|
||||||
<path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
|
<path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
|
||||||
</svg>
|
</svg>
|
||||||
</button>
|
</button>
|
||||||
<button id="tag-cloud-toggle" class="tag-toggle" data-expanded="false">
|
<button class="collapse-toggle" id="toggle-tags" data-collapsed="true" title="Tag-Filter ein/ausblenden">
|
||||||
Mehr zeigen
|
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||||
|
<polyline points="6 9 12 15 18 9"></polyline>
|
||||||
|
</svg>
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="collapsible-content hidden" id="tag-filters-content">
|
||||||
<div class="tag-section">
|
<div class="tag-section">
|
||||||
<div class="selected-tags" id="selected-tags"></div>
|
<div class="selected-tags" id="selected-tags"></div>
|
||||||
|
<div class="tag-controls">
|
||||||
|
<button id="tag-cloud-toggle" class="tag-toggle" data-expanded="false">
|
||||||
|
Mehr zeigen
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
<div class="tag-cloud" id="tag-cloud">
|
<div class="tag-cloud" id="tag-cloud">
|
||||||
{sortedTags.map((tag, index) => (
|
{sortedTags.map((tag, index) => (
|
||||||
<button
|
<button
|
||||||
@ -208,6 +225,7 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- View Controls Section -->
|
<!-- View Controls Section -->
|
||||||
<div class="filter-section">
|
<div class="filter-section">
|
||||||
@ -293,7 +311,12 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
advanced: document.getElementById('reset-advanced'),
|
advanced: document.getElementById('reset-advanced'),
|
||||||
tags: document.getElementById('reset-tags'),
|
tags: document.getElementById('reset-tags'),
|
||||||
all: document.getElementById('reset-all-filters')
|
all: document.getElementById('reset-all-filters')
|
||||||
}
|
},
|
||||||
|
// Collapsible elements
|
||||||
|
toggleAdvanced: document.getElementById('toggle-advanced'),
|
||||||
|
toggleTags: document.getElementById('toggle-tags'),
|
||||||
|
advancedContent: document.getElementById('advanced-filters-content'),
|
||||||
|
tagContent: document.getElementById('tag-filters-content')
|
||||||
};
|
};
|
||||||
|
|
||||||
// Verify critical elements exist
|
// Verify critical elements exist
|
||||||
@ -307,6 +330,52 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
let selectedPhase = '';
|
let selectedPhase = '';
|
||||||
let isTagCloudExpanded = false;
|
let isTagCloudExpanded = false;
|
||||||
|
|
||||||
|
// Collapsible functionality
|
||||||
|
function toggleCollapsible(toggleBtn, content, storageKey) {
|
||||||
|
const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true';
|
||||||
|
const newState = !isCollapsed;
|
||||||
|
|
||||||
|
toggleBtn.setAttribute('data-collapsed', newState.toString());
|
||||||
|
|
||||||
|
if (newState) {
|
||||||
|
// Collapse
|
||||||
|
content.classList.add('hidden');
|
||||||
|
toggleBtn.style.transform = 'rotate(0deg)';
|
||||||
|
} else {
|
||||||
|
// Expand
|
||||||
|
content.classList.remove('hidden');
|
||||||
|
toggleBtn.style.transform = 'rotate(180deg)';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store state in sessionStorage
|
||||||
|
sessionStorage.setItem(storageKey, newState.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize collapsible sections (collapsed by default)
|
||||||
|
function initializeCollapsible() {
|
||||||
|
// Advanced filters
|
||||||
|
const advancedCollapsed = sessionStorage.getItem('advanced-collapsed') !== 'false';
|
||||||
|
elements.toggleAdvanced.setAttribute('data-collapsed', advancedCollapsed.toString());
|
||||||
|
if (advancedCollapsed) {
|
||||||
|
elements.advancedContent.classList.add('hidden');
|
||||||
|
elements.toggleAdvanced.style.transform = 'rotate(0deg)';
|
||||||
|
} else {
|
||||||
|
elements.advancedContent.classList.remove('hidden');
|
||||||
|
elements.toggleAdvanced.style.transform = 'rotate(180deg)';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Tag filters
|
||||||
|
const tagsCollapsed = sessionStorage.getItem('tags-collapsed') !== 'false';
|
||||||
|
elements.toggleTags.setAttribute('data-collapsed', tagsCollapsed.toString());
|
||||||
|
if (tagsCollapsed) {
|
||||||
|
elements.tagContent.classList.add('hidden');
|
||||||
|
elements.toggleTags.style.transform = 'rotate(0deg)';
|
||||||
|
} else {
|
||||||
|
elements.tagContent.classList.remove('hidden');
|
||||||
|
elements.toggleTags.style.transform = 'rotate(180deg)';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Helper function to check if tool is hosted
|
// Helper function to check if tool is hosted
|
||||||
function isToolHosted(tool) {
|
function isToolHosted(tool) {
|
||||||
return tool.projectUrl !== undefined &&
|
return tool.projectUrl !== undefined &&
|
||||||
@ -418,18 +487,23 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add/remove tags
|
// Add/remove tags - FIXED: Update ALL matching elements
|
||||||
function addTag(tag) {
|
function addTag(tag) {
|
||||||
selectedTags.add(tag);
|
selectedTags.add(tag);
|
||||||
document.querySelector(`[data-tag="${tag}"]`).classList.add('active');
|
// FIXED: Use querySelectorAll to update ALL matching tag elements
|
||||||
|
document.querySelectorAll(`[data-tag="${tag}"]`).forEach(element => {
|
||||||
|
element.classList.add('active');
|
||||||
|
});
|
||||||
updateSelectedTags();
|
updateSelectedTags();
|
||||||
filterTools();
|
filterTools();
|
||||||
}
|
}
|
||||||
|
|
||||||
function removeTag(tag) {
|
function removeTag(tag) {
|
||||||
selectedTags.delete(tag);
|
selectedTags.delete(tag);
|
||||||
const tagElement = document.querySelector(`[data-tag="${tag}"]`);
|
// FIXED: Use querySelectorAll to update ALL matching tag elements
|
||||||
if (tagElement) tagElement.classList.remove('active');
|
document.querySelectorAll(`[data-tag="${tag}"]`).forEach(element => {
|
||||||
|
element.classList.remove('active');
|
||||||
|
});
|
||||||
updateSelectedTags();
|
updateSelectedTags();
|
||||||
filterTools();
|
filterTools();
|
||||||
}
|
}
|
||||||
@ -553,7 +627,10 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
|
|
||||||
function resetTags() {
|
function resetTags() {
|
||||||
selectedTags.clear();
|
selectedTags.clear();
|
||||||
elements.tagCloudItems.forEach(item => item.classList.remove('active'));
|
// FIXED: Update ALL tag elements
|
||||||
|
document.querySelectorAll('.tag-cloud-item').forEach(item => {
|
||||||
|
item.classList.remove('active');
|
||||||
|
});
|
||||||
updateSelectedTags();
|
updateSelectedTags();
|
||||||
filterTools();
|
filterTools();
|
||||||
}
|
}
|
||||||
@ -630,11 +707,21 @@ const sortedTags = Object.entries(tagFrequency)
|
|||||||
elements.resetButtons.tags.addEventListener('click', resetTags);
|
elements.resetButtons.tags.addEventListener('click', resetTags);
|
||||||
elements.resetButtons.all.addEventListener('click', resetAllFilters);
|
elements.resetButtons.all.addEventListener('click', resetAllFilters);
|
||||||
|
|
||||||
|
// Collapsible toggle listeners
|
||||||
|
elements.toggleAdvanced.addEventListener('click', () => {
|
||||||
|
toggleCollapsible(elements.toggleAdvanced, elements.advancedContent, 'advanced-collapsed');
|
||||||
|
});
|
||||||
|
|
||||||
|
elements.toggleTags.addEventListener('click', () => {
|
||||||
|
toggleCollapsible(elements.toggleTags, elements.tagContent, 'tags-collapsed');
|
||||||
|
});
|
||||||
|
|
||||||
// Expose functions globally for backwards compatibility
|
// Expose functions globally for backwards compatibility
|
||||||
window.clearTagFilters = resetTags;
|
window.clearTagFilters = resetTags;
|
||||||
window.clearAllFilters = resetAllFilters;
|
window.clearAllFilters = resetAllFilters;
|
||||||
|
|
||||||
// Initialize
|
// Initialize
|
||||||
|
initializeCollapsible();
|
||||||
initTagCloud();
|
initTagCloud();
|
||||||
filterTagCloud();
|
filterTagCloud();
|
||||||
updateSelectedTags();
|
updateSelectedTags();
|
||||||
|
22
src/pages/api/ai/embeddings.status.ts
Normal file
22
src/pages/api/ai/embeddings.status.ts
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
// src/pages/api/ai/embeddings-status.ts
|
||||||
|
import type { APIRoute } from 'astro';
|
||||||
|
import { embeddingsService } from '../../../utils/embeddings.js';
|
||||||
|
import { apiResponse, apiServerError } from '../../../utils/api.js';
|
||||||
|
|
||||||
|
export const prerender = false;
|
||||||
|
|
||||||
|
export const GET: APIRoute = async () => {
|
||||||
|
try {
|
||||||
|
const stats = embeddingsService.getStats();
|
||||||
|
|
||||||
|
return apiResponse.success({
|
||||||
|
embeddings: stats,
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
status: stats.enabled && stats.initialized ? 'ready' :
|
||||||
|
stats.enabled && !stats.initialized ? 'initializing' : 'disabled'
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Embeddings status error:', error);
|
||||||
|
return apiServerError.internal('Failed to get embeddings status');
|
||||||
|
}
|
||||||
|
};
|
@ -1,4 +1,4 @@
|
|||||||
// src/pages/api/ai/enhance-input.ts
|
// src/pages/api/ai/enhance-input.ts - ENHANCED with forensics methodology
|
||||||
import type { APIRoute } from 'astro';
|
import type { APIRoute } from 'astro';
|
||||||
import { withAPIAuth } from '../../../utils/auth.js';
|
import { withAPIAuth } from '../../../utils/auth.js';
|
||||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||||
@ -14,7 +14,11 @@ function getEnv(key: string): string {
|
|||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
const AI_MODEL = getEnv('AI_MODEL');
|
// Use the analyzer AI for smart prompting (smaller, faster model)
|
||||||
|
const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
|
||||||
|
const AI_API_KEY = getEnv('AI_ANALYZER_API_KEY');
|
||||||
|
const AI_MODEL = getEnv('AI_ANALYZER_MODEL');
|
||||||
|
|
||||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
||||||
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
|
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
|
||||||
const RATE_LIMIT_MAX = 5; // 5 enhancement requests per minute per user
|
const RATE_LIMIT_MAX = 5; // 5 enhancement requests per minute per user
|
||||||
@ -59,29 +63,38 @@ function cleanupExpiredRateLimits() {
|
|||||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||||
|
|
||||||
function createEnhancementPrompt(input: string): string {
|
function createEnhancementPrompt(input: string): string {
|
||||||
return `
|
return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein forensisches Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte forensische Untersuchung.
|
||||||
Du bist eine KI für digitale Forensik. Der Nutzer beschreibt ein forensisches Szenario. Analysiere die Eingabe.
|
|
||||||
|
|
||||||
Wenn die Beschreibung unvollständig oder vage ist, stelle bis zu drei präzise Rückfragen im JSON-Array-Format, um wichtige Details zu klären (z. B. Vorfalltyp, System, Ziel, Datenquellen, Zeit, Beteiligte, rechtlicher Rahmen).
|
ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
|
||||||
|
1. **Incident Context**: Was ist passiert? Welche Angriffsvektoren oder technischen Probleme liegen vor?
|
||||||
|
2. **Affected Systems**: Welche spezifischen Technologien/Plattformen sind betroffen? (Windows/Linux/ICS/SCADA/Mobile/Cloud/Network Infrastructure)
|
||||||
|
3. **Available Evidence**: Welche forensischen Datenquellen stehen zur Verfügung? (RAM-Dumps, Disk-Images, Log-Files, Network-Captures, Registry-Hives)
|
||||||
|
4. **Investigation Objectives**: Was soll erreicht werden? (IOC-Extraktion, Timeline-Rekonstruktion, Attribution, Impact-Assessment)
|
||||||
|
5. **Timeline Constraints**: Wie zeitkritisch ist die Untersuchung?
|
||||||
|
6. **Legal & Compliance**: Rechtliche Anforderungen, Chain of Custody, Compliance-Rahmen (DSGVO, sector-specific regulations)
|
||||||
|
7. **Technical Constraints**: Verfügbare Ressourcen, Skills, Infrastrukturbeschränkungen
|
||||||
|
|
||||||
Wenn die Eingabe bereits klar, spezifisch und vollständig ist, gib stattdessen nur eine leere Liste [] zurück.
|
WENN die Beschreibung alle kritischen forensischen Aspekte abdeckt: Geben Sie eine leere Liste [] zurück.
|
||||||
|
|
||||||
Antwortformat strikt:
|
WENN wichtige forensische Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte forensische Analyse schließen.
|
||||||
|
|
||||||
\`\`\`json
|
QUALITÄTSKRITERIEN FÜR FRAGEN:
|
||||||
|
- Forensisch spezifisch, nicht allgemein (❌ "Mehr Details?" ✅ "Welche forensischen Artefakte (RAM-Dumps, Disk-Images, Logs) stehen zur Verfügung?")
|
||||||
|
- Methodisch relevant (❌ "Wann passierte das?" ✅ "Liegen Log-Dateien aus dem Incident-Zeitraum vor, und welche Retention-Policy gilt?")
|
||||||
|
- Priorisiert nach Auswirkung auf die forensische Untersuchungsqualität
|
||||||
|
|
||||||
|
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
|
||||||
[
|
[
|
||||||
"Frage 1?",
|
"Forensisch spezifische Frage 1?",
|
||||||
"Frage 2?",
|
"Forensisch spezifische Frage 2?",
|
||||||
"Frage 3?"
|
"Forensisch spezifische Frage 3?"
|
||||||
]
|
]
|
||||||
\`\`\`
|
|
||||||
|
|
||||||
Nutzer-Eingabe:
|
NUTZER-EINGABE:
|
||||||
${input}
|
${input}
|
||||||
`.trim();
|
`.trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
export const POST: APIRoute = async ({ request }) => {
|
export const POST: APIRoute = async ({ request }) => {
|
||||||
try {
|
try {
|
||||||
const authResult = await withAPIAuth(request, 'ai');
|
const authResult = await withAPIAuth(request, 'ai');
|
||||||
@ -98,12 +111,12 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
const body = await request.json();
|
const body = await request.json();
|
||||||
const { input } = body;
|
const { input } = body;
|
||||||
|
|
||||||
if (!input || typeof input !== 'string' || input.length < 20) {
|
if (!input || typeof input !== 'string' || input.length < 40) {
|
||||||
return apiError.badRequest('Input too short for enhancement');
|
return apiError.badRequest('Input too short for enhancement (minimum 40 characters)');
|
||||||
}
|
}
|
||||||
|
|
||||||
const sanitizedInput = sanitizeInput(input);
|
const sanitizedInput = sanitizeInput(input);
|
||||||
if (sanitizedInput.length < 20) {
|
if (sanitizedInput.length < 40) {
|
||||||
return apiError.badRequest('Input too short after sanitization');
|
return apiError.badRequest('Input too short after sanitization');
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,11 +124,11 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
|
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
|
||||||
|
|
||||||
const aiResponse = await enqueueApiCall(() =>
|
const aiResponse = await enqueueApiCall(() =>
|
||||||
fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
|
fetch(`${AI_ENDPOINT}/v1/chat/completions`, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
'Authorization': `Bearer ${process.env.AI_API_KEY}`
|
'Authorization': `Bearer ${AI_API_KEY}`
|
||||||
},
|
},
|
||||||
body: JSON.stringify({
|
body: JSON.stringify({
|
||||||
model: AI_MODEL,
|
model: AI_MODEL,
|
||||||
@ -125,8 +138,12 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
content: systemPrompt
|
content: systemPrompt
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
max_tokens: 200,
|
max_tokens: 300,
|
||||||
temperature: 0.7
|
temperature: 0.7,
|
||||||
|
// Enhanced: Better parameters for consistent forensics output
|
||||||
|
top_p: 0.9,
|
||||||
|
frequency_penalty: 0.2,
|
||||||
|
presence_penalty: 0.1
|
||||||
})
|
})
|
||||||
}), taskId);
|
}), taskId);
|
||||||
|
|
||||||
@ -150,30 +167,41 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
.trim();
|
.trim();
|
||||||
questions = JSON.parse(cleanedContent);
|
questions = JSON.parse(cleanedContent);
|
||||||
|
|
||||||
if (!Array.isArray(questions) || questions.length === 0) {
|
if (!Array.isArray(questions)) {
|
||||||
throw new Error('Invalid questions format');
|
throw new Error('Response is not an array');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate and clean questions
|
// Enhanced validation and cleaning for forensics context
|
||||||
questions = questions
|
questions = questions
|
||||||
.filter(q => typeof q === 'string' && q.length > 5 && q.length < 120)
|
.filter(q => typeof q === 'string' && q.length > 20 && q.length < 200) // More appropriate length for forensics questions
|
||||||
.slice(0, 3);
|
.filter(q => q.includes('?')) // Must be a question
|
||||||
|
.filter(q => {
|
||||||
|
// Enhanced: Filter for forensics-relevant questions
|
||||||
|
const forensicsTerms = ['forensisch', 'log', 'dump', 'image', 'artefakt', 'evidence', 'incident', 'system', 'netzwerk', 'zeitraum', 'verfügbar'];
|
||||||
|
const lowerQ = q.toLowerCase();
|
||||||
|
return forensicsTerms.some(term => lowerQ.includes(term));
|
||||||
|
})
|
||||||
|
.map(q => q.trim())
|
||||||
|
.slice(0, 3); // Max 3 questions
|
||||||
|
|
||||||
|
// If no valid forensics questions, return empty array (means input is complete)
|
||||||
if (questions.length === 0) {
|
if (questions.length === 0) {
|
||||||
throw new Error('No valid questions found');
|
questions = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Failed to parse enhancement response:', aiContent);
|
console.error('Failed to parse enhancement response:', aiContent);
|
||||||
return apiServerError.unavailable('Invalid enhancement response format');
|
// If parsing fails, assume input is complete enough
|
||||||
|
questions = [];
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`[AI Enhancement] User: ${userId}, Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
console.log(`[AI Enhancement] User: ${userId}, Forensics Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
||||||
|
|
||||||
return new Response(JSON.stringify({
|
return new Response(JSON.stringify({
|
||||||
success: true,
|
success: true,
|
||||||
questions,
|
questions,
|
||||||
taskId
|
taskId,
|
||||||
|
inputComplete: questions.length === 0 // Flag to indicate if input seems complete
|
||||||
}), {
|
}), {
|
||||||
status: 200,
|
status: 200,
|
||||||
headers: { 'Content-Type': 'application/json' }
|
headers: { 'Content-Type': 'application/json' }
|
||||||
|
@ -1,276 +1,106 @@
|
|||||||
// src/pages/api/ai/query.ts
|
// src/pages/api/ai/query.ts - FIXED: Rate limiting for micro-task pipeline
|
||||||
|
|
||||||
import type { APIRoute } from 'astro';
|
import type { APIRoute } from 'astro';
|
||||||
import { withAPIAuth } from '../../../utils/auth.js';
|
import { withAPIAuth } from '../../../utils/auth.js';
|
||||||
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
|
|
||||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
||||||
|
import { aiPipeline } from '../../../utils/aiPipeline.js';
|
||||||
|
|
||||||
export const prerender = false;
|
export const prerender = false;
|
||||||
|
|
||||||
function getEnv(key: string): string {
|
interface RateLimitData {
|
||||||
const value = process.env[key];
|
count: number;
|
||||||
if (!value) {
|
resetTime: number;
|
||||||
throw new Error(`Missing environment variable: ${key}`);
|
microTaskCount: number;
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const AI_MODEL = getEnv('AI_MODEL');
|
const rateLimitStore = new Map<string, RateLimitData>();
|
||||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
|
||||||
const RATE_LIMIT_WINDOW = 60 * 1000;
|
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
|
||||||
const RATE_LIMIT_MAX = 10;
|
const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '4', 10);
|
||||||
|
const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10);
|
||||||
|
|
||||||
function sanitizeInput(input: string): string {
|
function sanitizeInput(input: string): string {
|
||||||
let sanitized = input
|
let sanitized = input
|
||||||
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]') // Remove code blocks
|
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
|
||||||
.replace(/\<\/?[^>]+(>|$)/g, '') // Remove HTML tags
|
.replace(/\<\/?[^>]+(>|$)/g, '')
|
||||||
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
||||||
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
|
sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
|
||||||
|
|
||||||
return sanitized;
|
return sanitized;
|
||||||
}
|
}
|
||||||
|
|
||||||
function stripMarkdownJson(content: string): string {
|
function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
|
||||||
return content
|
|
||||||
.replace(/^```json\s*/i, '')
|
|
||||||
.replace(/\s*```\s*$/, '')
|
|
||||||
.trim();
|
|
||||||
}
|
|
||||||
|
|
||||||
function checkRateLimit(userId: string): boolean {
|
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
const userLimit = rateLimitStore.get(userId);
|
const userLimit = rateLimitStore.get(userId);
|
||||||
|
|
||||||
if (!userLimit || now > userLimit.resetTime) {
|
if (!userLimit || now > userLimit.resetTime) {
|
||||||
rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
|
rateLimitStore.set(userId, {
|
||||||
return true;
|
count: 1,
|
||||||
|
resetTime: now + RATE_LIMIT_WINDOW,
|
||||||
|
microTaskCount: 0
|
||||||
|
});
|
||||||
|
return {
|
||||||
|
allowed: true,
|
||||||
|
microTasksRemaining: MICRO_TASK_TOTAL_LIMIT
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (userLimit.count >= RATE_LIMIT_MAX) {
|
if (userLimit.count >= MAIN_RATE_LIMIT_MAX) {
|
||||||
return false;
|
return {
|
||||||
|
allowed: false,
|
||||||
|
reason: `Main rate limit exceeded. Max ${MAIN_RATE_LIMIT_MAX} requests per minute.`
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
if (userLimit.microTaskCount >= MICRO_TASK_TOTAL_LIMIT) {
|
||||||
|
return {
|
||||||
|
allowed: false,
|
||||||
|
reason: `Micro-task limit exceeded. Max ${MICRO_TASK_TOTAL_LIMIT} AI calls per minute.`
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
userLimit.count++;
|
userLimit.count++;
|
||||||
return true;
|
|
||||||
|
return {
|
||||||
|
allowed: true,
|
||||||
|
microTasksRemaining: MICRO_TASK_TOTAL_LIMIT - userLimit.microTaskCount
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function incrementMicroTaskCount(userId: string, aiCallsMade: number): void {
|
||||||
|
const userLimit = rateLimitStore.get(userId);
|
||||||
|
if (userLimit) {
|
||||||
|
userLimit.microTaskCount += aiCallsMade;
|
||||||
|
console.log(`[RATE LIMIT] User ${userId} now at ${userLimit.microTaskCount}/${MICRO_TASK_TOTAL_LIMIT} micro-task calls`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function cleanupExpiredRateLimits() {
|
function cleanupExpiredRateLimits() {
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
|
const maxStoreSize = 1000;
|
||||||
|
|
||||||
for (const [userId, limit] of rateLimitStore.entries()) {
|
for (const [userId, limit] of rateLimitStore.entries()) {
|
||||||
if (now > limit.resetTime) {
|
if (now > limit.resetTime) {
|
||||||
rateLimitStore.delete(userId);
|
rateLimitStore.delete(userId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rateLimitStore.size > maxStoreSize) {
|
||||||
|
const entries = Array.from(rateLimitStore.entries());
|
||||||
|
entries.sort((a, b) => a[1].resetTime - b[1].resetTime);
|
||||||
|
|
||||||
|
const toRemove = entries.slice(0, entries.length - maxStoreSize);
|
||||||
|
toRemove.forEach(([userId]) => rateLimitStore.delete(userId));
|
||||||
|
|
||||||
|
console.log(`[RATE LIMIT] Cleanup: removed ${toRemove.length} old entries`);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||||
|
|
||||||
async function loadToolsDatabase() {
|
|
||||||
try {
|
|
||||||
return await getCompressedToolsDataForAI();
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Failed to load tools database:', error);
|
|
||||||
throw new Error('Database unavailable');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
function createWorkflowSystemPrompt(toolsData: any): string {
|
|
||||||
const toolsList = toolsData.tools.map((tool: any) => ({
|
|
||||||
name: tool.name,
|
|
||||||
description: tool.description,
|
|
||||||
domains: tool.domains,
|
|
||||||
phases: tool.phases,
|
|
||||||
domainAgnostic: tool['domain-agnostic-software'],
|
|
||||||
platforms: tool.platforms,
|
|
||||||
skillLevel: tool.skillLevel,
|
|
||||||
license: tool.license,
|
|
||||||
tags: tool.tags,
|
|
||||||
related_concepts: tool.related_concepts || []
|
|
||||||
}));
|
|
||||||
|
|
||||||
const conceptsList = toolsData.concepts.map((concept: any) => ({
|
|
||||||
name: concept.name,
|
|
||||||
description: concept.description,
|
|
||||||
domains: concept.domains,
|
|
||||||
phases: concept.phases,
|
|
||||||
skillLevel: concept.skillLevel,
|
|
||||||
tags: concept.tags
|
|
||||||
}));
|
|
||||||
|
|
||||||
const regularPhases = toolsData.phases || [];
|
|
||||||
|
|
||||||
const domainAgnosticSoftware = toolsData['domain-agnostic-software'] || [];
|
|
||||||
|
|
||||||
const allPhaseItems = [
|
|
||||||
...regularPhases,
|
|
||||||
...domainAgnosticSoftware
|
|
||||||
];
|
|
||||||
|
|
||||||
const phasesDescription = allPhaseItems.map((phase: any) =>
|
|
||||||
`- ${phase.id}: ${phase.name}`
|
|
||||||
).join('\n');
|
|
||||||
|
|
||||||
const domainsDescription = toolsData.domains.map((domain: any) =>
|
|
||||||
`- ${domain.id}: ${domain.name}`
|
|
||||||
).join('\n');
|
|
||||||
|
|
||||||
const phaseDescriptions = regularPhases.map((phase: any) =>
|
|
||||||
`- ${phase.name}: ${phase.description || 'Tools/Methods for this phase'}`
|
|
||||||
).join('\n');
|
|
||||||
|
|
||||||
const domainAgnosticDescriptions = domainAgnosticSoftware.map((section: any) =>
|
|
||||||
`- ${section.name}: ${section.description || 'Cross-cutting software and platforms'}`
|
|
||||||
).join('\n');
|
|
||||||
|
|
||||||
const validPhases = [
|
|
||||||
...regularPhases.map((p: any) => p.id),
|
|
||||||
...domainAgnosticSoftware.map((s: any) => s.id)
|
|
||||||
].join('|');
|
|
||||||
|
|
||||||
return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der Ermittlern bei der Auswahl von Software und Methoden hilft.
|
|
||||||
|
|
||||||
VERFÜGBARE TOOLS/METHODEN:
|
|
||||||
${JSON.stringify(toolsList, null, 2)}
|
|
||||||
|
|
||||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
|
|
||||||
${JSON.stringify(conceptsList, null, 2)}
|
|
||||||
|
|
||||||
UNTERSUCHUNGSPHASEN (NIST Framework):
|
|
||||||
${phasesDescription}
|
|
||||||
|
|
||||||
FORENSISCHE DOMÄNEN:
|
|
||||||
${domainsDescription}
|
|
||||||
|
|
||||||
WICHTIGE REGELN:
|
|
||||||
1. Pro Phase 2-3 Tools/Methoden empfehlen (immer mindestens 2 wenn verfügbar)
|
|
||||||
2. Tools/Methoden können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool/Methode für jede Phase zu empfehlen, selbst wenn die Priorität "low" ist.
|
|
||||||
3. Für Reporting-Phase: Visualisierungs- und Dokumentationssoftware einschließen
|
|
||||||
4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug.
|
|
||||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
|
|
||||||
6. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
|
|
||||||
7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an, wenn proprietäre Software besser geeignet ist.
|
|
||||||
8. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
|
|
||||||
9. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
|
|
||||||
|
|
||||||
ENHANCED CONTEXTUAL ANALYSIS:
|
|
||||||
10. Analysiere das Szenario detailliert und identifiziere Schlüsselelemente, Bedrohungen und forensische Herausforderungen
|
|
||||||
11. Entwickle einen strategischen Untersuchungsansatz basierend auf dem spezifischen Szenario
|
|
||||||
12. Identifiziere zeitkritische oder besonders wichtige Faktoren für diesen Fall
|
|
||||||
|
|
||||||
SOFTWARE/METHODEN-AUSWAHL NACH PHASE:
|
|
||||||
${phaseDescriptions}
|
|
||||||
|
|
||||||
DOMÄNENAGNOSTISCHE SOFTWARE/METHODEN:
|
|
||||||
${domainAgnosticDescriptions}
|
|
||||||
|
|
||||||
ANTWORT-FORMAT (strict JSON):
|
|
||||||
{
|
|
||||||
"scenario_analysis": "Detaillierte Analyse des Szenarios: Erkannte Schlüsselelemente, Art des Vorfalls, betroffene Systeme, potentielle Bedrohungen und forensische Herausforderungen",
|
|
||||||
"investigation_approach": "Strategischer Untersuchungsansatz für dieses spezifische Szenario: Prioritäten, Reihenfolge der Phasen, besondere Überlegungen",
|
|
||||||
"critical_considerations": "Zeitkritische Faktoren, wichtige Sicherheitsaspekte oder besondere Vorsichtsmaßnahmen für diesen Fall",
|
|
||||||
"recommended_tools": [
|
|
||||||
{
|
|
||||||
"name": "EXAKTER Name aus der Tools-Database",
|
|
||||||
"priority": "high|medium|low",
|
|
||||||
"phase": "${validPhases}",
|
|
||||||
"justification": "Warum diese Methode für diese Phase und dieses spezifische Szenario geeignet ist - mit Bezug zu den erkannten Schlüsselelementen"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"workflow_suggestion": "Vorgeschlagener Untersuchungsablauf mit konkreten Schritten für dieses Szenario",
|
|
||||||
"background_knowledge": [
|
|
||||||
{
|
|
||||||
"concept_name": "EXAKTER Name aus der Konzepte-Database",
|
|
||||||
"relevance": "Warum dieses Konzept für das Szenario relevant ist, und bei welchen der empfohlenen Methoden/Tools."
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"additional_notes": "Wichtige Überlegungen und Hinweise"
|
|
||||||
}
|
|
||||||
|
|
||||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
|
|
||||||
}
|
|
||||||
|
|
||||||
function createToolSystemPrompt(toolsData: any): string {
|
|
||||||
const toolsList = toolsData.tools.map((tool: any) => ({
|
|
||||||
name: tool.name,
|
|
||||||
description: tool.description,
|
|
||||||
domains: tool.domains,
|
|
||||||
phases: tool.phases,
|
|
||||||
platforms: tool.platforms,
|
|
||||||
skillLevel: tool.skillLevel,
|
|
||||||
license: tool.license,
|
|
||||||
tags: tool.tags,
|
|
||||||
url: tool.url,
|
|
||||||
projectUrl: tool.projectUrl,
|
|
||||||
related_concepts: tool.related_concepts || []
|
|
||||||
}));
|
|
||||||
|
|
||||||
const conceptsList = toolsData.concepts.map((concept: any) => ({
|
|
||||||
name: concept.name,
|
|
||||||
description: concept.description,
|
|
||||||
domains: concept.domains,
|
|
||||||
phases: concept.phases,
|
|
||||||
skillLevel: concept.skillLevel,
|
|
||||||
tags: concept.tags
|
|
||||||
}));
|
|
||||||
|
|
||||||
return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der bei der Auswahl spezifischer Software/Methoden für konkrete Probleme hilft.
|
|
||||||
|
|
||||||
VERFÜGBARE TOOLS/METHODEN:
|
|
||||||
${JSON.stringify(toolsList, null, 2)}
|
|
||||||
|
|
||||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
|
|
||||||
${JSON.stringify(conceptsList, null, 2)}
|
|
||||||
|
|
||||||
WICHTIGE REGELN:
|
|
||||||
1. Analysiere das spezifische Problem/die Anforderung sorgfältig
|
|
||||||
2. Empfehle 1-3 Methoden/Tools, sortiert nach Eignung (beste Empfehlung zuerst)
|
|
||||||
3. Gib detaillierte Erklärungen, WARUM und WIE jede Methode/Tool das Problem löst
|
|
||||||
4. Berücksichtige praktische Aspekte: Skill Level, Plattformen, Verfügbarkeit
|
|
||||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
|
|
||||||
6. Gib konkrete Anwendungshinweise, nicht nur allgemeine Beschreibungen - Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
|
|
||||||
7. Erwähne sowohl Stärken als auch Schwächen/Limitationen
|
|
||||||
8. Schlage alternative Ansätze vor, wenn sinnvoll
|
|
||||||
9. Gib grundsätzliche Hinweise, WIE die Methode/Tool konkret eingesetzt wird
|
|
||||||
10. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
|
|
||||||
11. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
|
|
||||||
|
|
||||||
ENHANCED CONTEXTUAL ANALYSIS:
|
|
||||||
12. Analysiere das Problem detailliert und identifiziere technische Anforderungen, Herausforderungen und Erfolgsfaktoren
|
|
||||||
13. Entwickle einen strategischen Lösungsansatz basierend auf dem spezifischen Problem
|
|
||||||
14. Identifiziere wichtige Voraussetzungen oder Warnungen für die Anwendung
|
|
||||||
|
|
||||||
ANTWORT-FORMAT (strict JSON):
|
|
||||||
{
|
|
||||||
"problem_analysis": "Detaillierte Analyse des Problems: Erkannte technische Anforderungen, Herausforderungen, benötigte Fähigkeiten und Erfolgsfaktoren",
|
|
||||||
"investigation_approach": "Strategischer Lösungsansatz für dieses spezifische Problem: Herangehensweise, Prioritäten, optimale Anwendungsreihenfolge",
|
|
||||||
"critical_considerations": "Wichtige Voraussetzungen, potentielle Fallstricke oder Warnungen für die Anwendung der empfohlenen Lösungen",
|
|
||||||
"recommended_tools": [
|
|
||||||
{
|
|
||||||
"name": "EXAKTER Name aus der Tools-Database",
|
|
||||||
"rank": 1,
|
|
||||||
"suitability_score": "high|medium|low",
|
|
||||||
"detailed_explanation": "Detaillierte Erklärung, warum dieses Tool/diese Methode das spezifische Problem löst - mit Bezug zu den erkannten Anforderungen",
|
|
||||||
"implementation_approach": "Konkrete Schritte/Ansatz zur Anwendung für dieses spezifische Problem",
|
|
||||||
"pros": ["Spezifische Vorteile für diesen Anwendungsfall", "Weitere Vorteile"],
|
|
||||||
"cons": ["Potentielle Nachteile oder Limitationen", "Weitere Einschränkungen"],
|
|
||||||
"alternatives": "Alternative Ansätze oder ergänzende Tools/Methoden, falls relevant"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"background_knowledge": [
|
|
||||||
{
|
|
||||||
"concept_name": "EXAKTER Name aus der Konzepte-Database",
|
|
||||||
"relevance": "Warum dieses Konzept für die empfohlenen Tools/das Problem relevant ist, und für welche der empfohlenen Methoden/Tools."
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"additional_considerations": "Wichtige Überlegungen, Voraussetzungen oder Warnungen"
|
|
||||||
}
|
|
||||||
|
|
||||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
|
|
||||||
}
|
|
||||||
|
|
||||||
export const POST: APIRoute = async ({ request }) => {
|
export const POST: APIRoute = async ({ request }) => {
|
||||||
try {
|
try {
|
||||||
const authResult = await withAPIAuth(request, 'ai');
|
const authResult = await withAPIAuth(request, 'ai');
|
||||||
@ -280,161 +110,100 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
|
|
||||||
const userId = authResult.userId;
|
const userId = authResult.userId;
|
||||||
|
|
||||||
if (!checkRateLimit(userId)) {
|
const rateLimitResult = checkRateLimit(userId);
|
||||||
return apiError.rateLimit('Rate limit exceeded');
|
if (!rateLimitResult.allowed) {
|
||||||
|
return apiError.rateLimit(rateLimitResult.reason || 'Rate limit exceeded');
|
||||||
}
|
}
|
||||||
|
|
||||||
const body = await request.json();
|
const body = await request.json();
|
||||||
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
||||||
|
|
||||||
// ADD THIS DEBUG LOGGING
|
console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
||||||
console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
|
||||||
|
|
||||||
if (!query || typeof query !== 'string') {
|
if (!query || typeof query !== 'string') {
|
||||||
console.log(`[AI API] Invalid query for task ${clientTaskId}`);
|
console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
|
||||||
return apiError.badRequest('Query required');
|
return apiError.badRequest('Query required');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!['workflow', 'tool'].includes(mode)) {
|
if (!['workflow', 'tool'].includes(mode)) {
|
||||||
console.log(`[AI API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
||||||
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
||||||
}
|
}
|
||||||
|
|
||||||
const sanitizedQuery = sanitizeInput(query);
|
const sanitizedQuery = sanitizeInput(query);
|
||||||
if (sanitizedQuery.includes('[FILTERED]')) {
|
if (sanitizedQuery.includes('[FILTERED]')) {
|
||||||
console.log(`[AI API] Filtered input detected for task ${clientTaskId}`);
|
console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
|
||||||
return apiError.badRequest('Invalid input detected');
|
return apiError.badRequest('Invalid input detected');
|
||||||
}
|
}
|
||||||
|
|
||||||
const toolsData = await loadToolsDatabase();
|
|
||||||
|
|
||||||
const systemPrompt = mode === 'workflow'
|
|
||||||
? createWorkflowSystemPrompt(toolsData)
|
|
||||||
: createToolSystemPrompt(toolsData);
|
|
||||||
|
|
||||||
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
||||||
|
|
||||||
console.log(`[AI API] About to enqueue task ${taskId}`);
|
console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
|
||||||
|
|
||||||
|
const result = await enqueueApiCall(() =>
|
||||||
const aiResponse = await enqueueApiCall(() =>
|
aiPipeline.processQuery(sanitizedQuery, mode)
|
||||||
fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
|
|
||||||
method: 'POST',
|
|
||||||
headers: {
|
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Authorization': `Bearer ${process.env.AI_API_KEY}`
|
|
||||||
},
|
|
||||||
body: JSON.stringify({
|
|
||||||
model: AI_MODEL,
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: 'system',
|
|
||||||
content: systemPrompt
|
|
||||||
},
|
|
||||||
{
|
|
||||||
role: 'user',
|
|
||||||
content: sanitizedQuery
|
|
||||||
}
|
|
||||||
],
|
|
||||||
max_tokens: 3500,
|
|
||||||
temperature: 0.3
|
|
||||||
})
|
|
||||||
})
|
|
||||||
, taskId);
|
, taskId);
|
||||||
|
|
||||||
if (!aiResponse.ok) {
|
if (!result || !result.recommendation) {
|
||||||
console.error('AI API error:', await aiResponse.text());
|
return apiServerError.unavailable('No response from micro-task AI pipeline');
|
||||||
return apiServerError.unavailable('AI service unavailable');
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const aiData = await aiResponse.json();
|
const stats = result.processingStats;
|
||||||
const aiContent = aiData.choices?.[0]?.message?.content;
|
const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
|
||||||
|
incrementMicroTaskCount(userId, estimatedAICallsMade);
|
||||||
|
|
||||||
if (!aiContent) {
|
console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
|
||||||
return apiServerError.unavailable('No response from AI');
|
console.log(` - Mode: ${mode}`);
|
||||||
}
|
console.log(` - User: ${userId}`);
|
||||||
|
console.log(` - Query length: ${sanitizedQuery.length}`);
|
||||||
|
console.log(` - Processing time: ${stats.processingTimeMs}ms`);
|
||||||
|
console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`);
|
||||||
|
console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`);
|
||||||
|
console.log(` - Estimated AI calls: ${estimatedAICallsMade}`);
|
||||||
|
console.log(` - Embeddings used: ${stats.embeddingsUsed}`);
|
||||||
|
console.log(` - Final items: ${stats.finalSelectedItems}`);
|
||||||
|
|
||||||
let recommendation;
|
const currentLimit = rateLimitStore.get(userId);
|
||||||
try {
|
const remainingMicroTasks = currentLimit ?
|
||||||
const cleanedContent = stripMarkdownJson(aiContent);
|
MICRO_TASK_TOTAL_LIMIT - currentLimit.microTaskCount : MICRO_TASK_TOTAL_LIMIT;
|
||||||
recommendation = JSON.parse(cleanedContent);
|
|
||||||
} catch (error) {
|
|
||||||
console.error('Failed to parse AI response:', aiContent);
|
|
||||||
return apiServerError.unavailable('Invalid AI response format');
|
|
||||||
}
|
|
||||||
|
|
||||||
const validToolNames = new Set(toolsData.tools.map((t: any) => t.name));
|
|
||||||
const validConceptNames = new Set(toolsData.concepts.map((c: any) => c.name));
|
|
||||||
|
|
||||||
let validatedRecommendation;
|
|
||||||
|
|
||||||
if (mode === 'workflow') {
|
|
||||||
validatedRecommendation = {
|
|
||||||
...recommendation,
|
|
||||||
// Ensure all new fields are included with fallbacks
|
|
||||||
scenario_analysis: recommendation.scenario_analysis || recommendation.problem_analysis || '',
|
|
||||||
investigation_approach: recommendation.investigation_approach || '',
|
|
||||||
critical_considerations: recommendation.critical_considerations || '',
|
|
||||||
recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
|
|
||||||
if (!validToolNames.has(tool.name)) {
|
|
||||||
console.warn(`AI recommended unknown tool: ${tool.name}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}) || [],
|
|
||||||
background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
|
|
||||||
if (!validConceptNames.has(concept.concept_name)) {
|
|
||||||
console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}) || []
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
validatedRecommendation = {
|
|
||||||
...recommendation,
|
|
||||||
// Ensure all new fields are included with fallbacks
|
|
||||||
problem_analysis: recommendation.problem_analysis || recommendation.scenario_analysis || '',
|
|
||||||
investigation_approach: recommendation.investigation_approach || '',
|
|
||||||
critical_considerations: recommendation.critical_considerations || '',
|
|
||||||
recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
|
|
||||||
if (!validToolNames.has(tool.name)) {
|
|
||||||
console.warn(`AI recommended unknown tool: ${tool.name}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}).map((tool: any, index: number) => ({
|
|
||||||
...tool,
|
|
||||||
rank: tool.rank || (index + 1),
|
|
||||||
suitability_score: tool.suitability_score || 'medium',
|
|
||||||
pros: Array.isArray(tool.pros) ? tool.pros : [],
|
|
||||||
cons: Array.isArray(tool.cons) ? tool.cons : []
|
|
||||||
})) || [],
|
|
||||||
background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
|
|
||||||
if (!validConceptNames.has(concept.concept_name)) {
|
|
||||||
console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}) || []
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}, Tools: ${validatedRecommendation.recommended_tools.length}, Concepts: ${validatedRecommendation.background_knowledge?.length || 0}`);
|
|
||||||
|
|
||||||
return new Response(JSON.stringify({
|
return new Response(JSON.stringify({
|
||||||
success: true,
|
success: true,
|
||||||
mode,
|
mode,
|
||||||
taskId,
|
taskId,
|
||||||
recommendation: validatedRecommendation,
|
recommendation: result.recommendation,
|
||||||
query: sanitizedQuery
|
query: sanitizedQuery,
|
||||||
|
processingStats: {
|
||||||
|
...result.processingStats,
|
||||||
|
pipelineType: 'micro-task',
|
||||||
|
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||||
|
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||||
|
estimatedAICallsMade
|
||||||
|
},
|
||||||
|
rateLimitInfo: {
|
||||||
|
mainRequestsRemaining: MAIN_RATE_LIMIT_MAX - (currentLimit?.count || 0),
|
||||||
|
microTaskCallsRemaining: remainingMicroTasks,
|
||||||
|
resetTime: Date.now() + RATE_LIMIT_WINDOW
|
||||||
|
}
|
||||||
}), {
|
}), {
|
||||||
status: 200,
|
status: 200,
|
||||||
headers: { 'Content-Type': 'application/json' }
|
headers: { 'Content-Type': 'application/json' }
|
||||||
});
|
});
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('AI query error:', error);
|
console.error('[MICRO-TASK API] Pipeline error:', error);
|
||||||
return apiServerError.internal('Internal server error');
|
|
||||||
|
if (error.message.includes('embeddings')) {
|
||||||
|
return apiServerError.unavailable('Embeddings service error - using AI fallback');
|
||||||
|
} else if (error.message.includes('micro-task')) {
|
||||||
|
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed');
|
||||||
|
} else if (error.message.includes('selector')) {
|
||||||
|
return apiServerError.unavailable('AI selector service error');
|
||||||
|
} else if (error.message.includes('rate limit')) {
|
||||||
|
return apiError.rateLimit('AI service rate limits exceeded during micro-task processing');
|
||||||
|
} else {
|
||||||
|
return apiServerError.internal('Micro-task AI pipeline error');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
@ -1263,6 +1263,12 @@ input[type="checkbox"] {
|
|||||||
gap: 0.5rem;
|
gap: 0.5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.filter-header-controls {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
/* Search Components */
|
/* Search Components */
|
||||||
.search-wrapper {
|
.search-wrapper {
|
||||||
position: relative;
|
position: relative;
|
||||||
@ -1315,6 +1321,64 @@ input[type="checkbox"] {
|
|||||||
color: var(--color-text);
|
color: var(--color-text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.collapse-toggle {
|
||||||
|
background: none;
|
||||||
|
border: 1px solid var(--color-border);
|
||||||
|
border-radius: 0.375rem;
|
||||||
|
color: var(--color-text-secondary);
|
||||||
|
cursor: pointer;
|
||||||
|
padding: 0.375rem;
|
||||||
|
transition: var(--transition-fast);
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
width: 32px;
|
||||||
|
height: 32px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.collapse-toggle:hover {
|
||||||
|
background-color: var(--color-bg-secondary);
|
||||||
|
border-color: var(--color-primary);
|
||||||
|
color: var(--color-text);
|
||||||
|
}
|
||||||
|
|
||||||
|
.collapse-toggle svg {
|
||||||
|
transition: transform var(--transition-medium);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* When expanded, rotate the chevron */
|
||||||
|
.collapse-toggle[data-collapsed="false"] svg {
|
||||||
|
transform: rotate(180deg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Collapsible Content */
|
||||||
|
.collapsible-content {
|
||||||
|
overflow: hidden;
|
||||||
|
transition: all var(--transition-medium);
|
||||||
|
opacity: 1;
|
||||||
|
max-height: 1000px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.collapsible-content.hidden {
|
||||||
|
opacity: 0;
|
||||||
|
max-height: 0;
|
||||||
|
padding-top: 0;
|
||||||
|
padding-bottom: 0;
|
||||||
|
margin-top: 0;
|
||||||
|
margin-bottom: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Smooth animation for expanding content */
|
||||||
|
.collapsible-content:not(.hidden) {
|
||||||
|
animation: expandContent 0.3s ease-out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Content spacing when expanded */
|
||||||
|
.collapsible-content:not(.hidden) .advanced-filters-compact,
|
||||||
|
.collapsible-content:not(.hidden) .tag-section {
|
||||||
|
padding-top: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
/* Filter Grids & Groups */
|
/* Filter Grids & Groups */
|
||||||
.filter-grid-compact {
|
.filter-grid-compact {
|
||||||
display: grid;
|
display: grid;
|
||||||
@ -1429,11 +1493,9 @@ input[type="checkbox"] {
|
|||||||
user-select: none;
|
user-select: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Tag System */
|
.tag-section .tag-controls {
|
||||||
.tag-section {
|
order: -1;
|
||||||
display: flex;
|
margin-bottom: 0.75rem;
|
||||||
flex-direction: column;
|
|
||||||
gap: 1rem;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.selected-tags {
|
.selected-tags {
|
||||||
@ -1574,6 +1636,14 @@ input[type="checkbox"] {
|
|||||||
transition: var(--transition-fast);
|
transition: var(--transition-fast);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.filter-reset {
|
||||||
|
width: 32px;
|
||||||
|
height: 32px;
|
||||||
|
display: inline-flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
}
|
||||||
|
|
||||||
.filter-reset:hover {
|
.filter-reset:hover {
|
||||||
background-color: var(--color-bg-secondary);
|
background-color: var(--color-bg-secondary);
|
||||||
border-color: var(--color-warning);
|
border-color: var(--color-warning);
|
||||||
@ -1591,13 +1661,6 @@ input[type="checkbox"] {
|
|||||||
opacity: 0.9;
|
opacity: 0.9;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Tag Controls */
|
|
||||||
.tag-controls {
|
|
||||||
display: flex;
|
|
||||||
align-items: center;
|
|
||||||
gap: 0.75rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.tag-toggle {
|
.tag-toggle {
|
||||||
padding: 0.375rem 0.75rem;
|
padding: 0.375rem 0.75rem;
|
||||||
border: 1px solid var(--color-border);
|
border: 1px solid var(--color-border);
|
||||||
@ -1818,6 +1881,130 @@ input[type="checkbox"] {
|
|||||||
border-left-color: var(--color-warning);
|
border-left-color: var(--color-warning);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Add to src/styles/global.css - Micro-Task Progress Styles */
|
||||||
|
|
||||||
|
/* Micro-task progress indicator */
|
||||||
|
.micro-task-progress {
|
||||||
|
background-color: var(--color-bg-secondary);
|
||||||
|
border: 1px solid var(--color-border);
|
||||||
|
border-radius: 0.5rem;
|
||||||
|
padding: 1rem;
|
||||||
|
margin: 1rem 0;
|
||||||
|
transition: var(--transition-fast);
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-task-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
margin-bottom: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-task-label {
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--color-primary);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-task-counter {
|
||||||
|
background-color: var(--color-primary);
|
||||||
|
color: white;
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
border-radius: 1rem;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-task-steps {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step {
|
||||||
|
background-color: var(--color-bg);
|
||||||
|
border: 1px solid var(--color-border);
|
||||||
|
border-radius: 0.375rem;
|
||||||
|
padding: 0.5rem;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
text-align: center;
|
||||||
|
transition: var(--transition-fast);
|
||||||
|
opacity: 0.6;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step.active {
|
||||||
|
background-color: var(--color-primary);
|
||||||
|
color: white;
|
||||||
|
border-color: var(--color-primary);
|
||||||
|
opacity: 1;
|
||||||
|
transform: scale(1.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step.completed {
|
||||||
|
background-color: var(--color-accent);
|
||||||
|
color: white;
|
||||||
|
border-color: var(--color-accent);
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step.failed {
|
||||||
|
background-color: var(--color-error);
|
||||||
|
color: white;
|
||||||
|
border-color: var(--color-error);
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Enhanced queue status for micro-tasks */
|
||||||
|
.queue-status-card.micro-task-mode {
|
||||||
|
border-left: 4px solid var(--color-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.queue-status-card.micro-task-mode .queue-header {
|
||||||
|
background: linear-gradient(135deg, var(--color-primary) 0%, var(--color-accent) 100%);
|
||||||
|
color: white;
|
||||||
|
margin: -1rem -1rem 1rem -1rem;
|
||||||
|
padding: 1rem;
|
||||||
|
border-radius: 0.5rem 0.5rem 0 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Mobile responsive adjustments */
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
.micro-task-steps {
|
||||||
|
grid-template-columns: repeat(2, 1fr);
|
||||||
|
gap: 0.375rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step {
|
||||||
|
font-size: 0.6875rem;
|
||||||
|
padding: 0.375rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-task-header {
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Animation for micro-task progress */
|
||||||
|
@keyframes micro-task-pulse {
|
||||||
|
0%, 100% { opacity: 1; }
|
||||||
|
50% { opacity: 0.7; }
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step.active {
|
||||||
|
animation: micro-task-pulse 2s ease-in-out infinite;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes micro-task-complete {
|
||||||
|
0% { transform: scale(1); }
|
||||||
|
50% { transform: scale(1.1); }
|
||||||
|
100% { transform: scale(1); }
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step.completed {
|
||||||
|
animation: micro-task-complete 0.6s ease-out;
|
||||||
|
}
|
||||||
|
|
||||||
/* ===================================================================
|
/* ===================================================================
|
||||||
17. WORKFLOW SYSTEM (CONSOLIDATED)
|
17. WORKFLOW SYSTEM (CONSOLIDATED)
|
||||||
================================================================= */
|
================================================================= */
|
||||||
@ -2267,6 +2454,17 @@ footer {
|
|||||||
to { opacity: 1; }
|
to { opacity: 1; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@keyframes expandContent {
|
||||||
|
from {
|
||||||
|
opacity: 0;
|
||||||
|
transform: translateY(-10px);
|
||||||
|
}
|
||||||
|
to {
|
||||||
|
opacity: 1;
|
||||||
|
transform: translateY(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@keyframes fadeInUp {
|
@keyframes fadeInUp {
|
||||||
from {
|
from {
|
||||||
opacity: 0;
|
opacity: 0;
|
||||||
@ -3261,6 +3459,23 @@ footer {
|
|||||||
.view-toggle {
|
.view-toggle {
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.filter-header-controls {
|
||||||
|
gap: 0.375rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.collapse-toggle,
|
||||||
|
.filter-reset {
|
||||||
|
width: 28px;
|
||||||
|
height: 28px;
|
||||||
|
padding: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.collapse-toggle svg,
|
||||||
|
.filter-reset svg {
|
||||||
|
width: 14px;
|
||||||
|
height: 14px;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@media (width <= 640px) {
|
@media (width <= 640px) {
|
||||||
@ -3395,6 +3610,21 @@ footer {
|
|||||||
.filter-card-compact {
|
.filter-card-compact {
|
||||||
padding: 0.5rem;
|
padding: 0.5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.filter-header-compact {
|
||||||
|
flex-wrap: wrap;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-header-compact h3 {
|
||||||
|
flex: 1 1 100%;
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.filter-header-controls {
|
||||||
|
flex: 1 1 100%;
|
||||||
|
justify-content: flex-end;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
882
src/utils/aiPipeline.ts
Normal file
882
src/utils/aiPipeline.ts
Normal file
@ -0,0 +1,882 @@
|
|||||||
|
// src/utils/aiPipeline.ts - FIXED: Critical error corrections
|
||||||
|
|
||||||
|
import { getCompressedToolsDataForAI } from './dataService.js';
|
||||||
|
import { embeddingsService, type EmbeddingData } from './embeddings.js';
|
||||||
|
|
||||||
|
interface AIConfig {
|
||||||
|
endpoint: string;
|
||||||
|
apiKey: string;
|
||||||
|
model: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface MicroTaskResult {
|
||||||
|
taskType: string;
|
||||||
|
content: string;
|
||||||
|
processingTimeMs: number;
|
||||||
|
success: boolean;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AnalysisResult {
|
||||||
|
recommendation: any;
|
||||||
|
processingStats: {
|
||||||
|
embeddingsUsed: boolean;
|
||||||
|
candidatesFromEmbeddings: number;
|
||||||
|
finalSelectedItems: number;
|
||||||
|
processingTimeMs: number;
|
||||||
|
microTasksCompleted: number;
|
||||||
|
microTasksFailed: number;
|
||||||
|
contextContinuityUsed: boolean;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface AnalysisContext {
|
||||||
|
userQuery: string;
|
||||||
|
mode: string;
|
||||||
|
filteredData: any;
|
||||||
|
contextHistory: string[];
|
||||||
|
|
||||||
|
// FIXED: Add max context length tracking
|
||||||
|
maxContextLength: number;
|
||||||
|
currentContextLength: number;
|
||||||
|
|
||||||
|
scenarioAnalysis?: string;
|
||||||
|
problemAnalysis?: string;
|
||||||
|
investigationApproach?: string;
|
||||||
|
criticalConsiderations?: string;
|
||||||
|
selectedTools?: Array<{tool: any, phase: string, priority: string, justification?: string}>;
|
||||||
|
backgroundKnowledge?: Array<{concept: any, relevance: string}>;
|
||||||
|
|
||||||
|
// FIXED: Add seen tools tracking to prevent duplicates
|
||||||
|
seenToolNames: Set<string>;
|
||||||
|
}
|
||||||
|
|
||||||
|
class ImprovedMicroTaskAIPipeline {
|
||||||
|
private config: AIConfig;
|
||||||
|
private maxSelectedItems: number;
|
||||||
|
private embeddingCandidates: number;
|
||||||
|
private similarityThreshold: number;
|
||||||
|
private microTaskDelay: number;
|
||||||
|
|
||||||
|
// FIXED: Add proper token management
|
||||||
|
private maxContextTokens: number;
|
||||||
|
private maxPromptTokens: number;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.config = {
|
||||||
|
endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
|
||||||
|
apiKey: this.getEnv('AI_ANALYZER_API_KEY'),
|
||||||
|
model: this.getEnv('AI_ANALYZER_MODEL')
|
||||||
|
};
|
||||||
|
|
||||||
|
this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
|
||||||
|
this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '60', 10);
|
||||||
|
this.similarityThreshold = 0.3;
|
||||||
|
this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
|
||||||
|
|
||||||
|
// FIXED: Token management
|
||||||
|
this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
|
||||||
|
this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
private getEnv(key: string): string {
|
||||||
|
const value = process.env[key];
|
||||||
|
if (!value) {
|
||||||
|
throw new Error(`Missing environment variable: ${key}`);
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXED: Estimate token count (rough approximation)
|
||||||
|
private estimateTokens(text: string): number {
|
||||||
|
return Math.ceil(text.length / 4); // Rough estimate: 4 chars per token
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXED: Manage context history with token limits
|
||||||
|
private addToContextHistory(context: AnalysisContext, newEntry: string): void {
|
||||||
|
const entryTokens = this.estimateTokens(newEntry);
|
||||||
|
|
||||||
|
// Add new entry
|
||||||
|
context.contextHistory.push(newEntry);
|
||||||
|
context.currentContextLength += entryTokens;
|
||||||
|
|
||||||
|
// Prune old entries if exceeding limits
|
||||||
|
while (context.currentContextLength > this.maxContextTokens && context.contextHistory.length > 1) {
|
||||||
|
const removed = context.contextHistory.shift()!;
|
||||||
|
context.currentContextLength -= this.estimateTokens(removed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXED: Safe JSON parsing with validation
|
||||||
|
private safeParseJSON(jsonString: string, fallback: any = null): any {
|
||||||
|
try {
|
||||||
|
const cleaned = jsonString
|
||||||
|
.replace(/^```json\s*/i, '')
|
||||||
|
.replace(/\s*```\s*$/g, '')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
const parsed = JSON.parse(cleaned);
|
||||||
|
return parsed;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn('[AI PIPELINE] JSON parsing failed:', error.message);
|
||||||
|
console.warn('[AI PIPELINE] Raw content:', jsonString.slice(0, 200));
|
||||||
|
return fallback;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXED: Add tool deduplication
|
||||||
|
private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
|
||||||
|
if (context.seenToolNames.has(tool.name)) {
|
||||||
|
console.log(`[AI PIPELINE] Skipping duplicate tool: ${tool.name}`);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
context.seenToolNames.add(tool.name);
|
||||||
|
if (!context.selectedTools) context.selectedTools = [];
|
||||||
|
|
||||||
|
context.selectedTools.push({
|
||||||
|
tool,
|
||||||
|
phase,
|
||||||
|
priority,
|
||||||
|
justification
|
||||||
|
});
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
|
||||||
|
let candidateTools: any[] = [];
|
||||||
|
let candidateConcepts: any[] = [];
|
||||||
|
let selectionMethod = 'unknown';
|
||||||
|
|
||||||
|
if (embeddingsService.isEnabled()) {
|
||||||
|
const similarItems = await embeddingsService.findSimilar(
|
||||||
|
userQuery,
|
||||||
|
this.embeddingCandidates,
|
||||||
|
this.similarityThreshold
|
||||||
|
);
|
||||||
|
|
||||||
|
const toolNames = new Set<string>();
|
||||||
|
const conceptNames = new Set<string>();
|
||||||
|
|
||||||
|
similarItems.forEach(item => {
|
||||||
|
if (item.type === 'tool') toolNames.add(item.name);
|
||||||
|
if (item.type === 'concept') conceptNames.add(item.name);
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`[IMPROVED PIPELINE] Embeddings found: ${toolNames.size} tools, ${conceptNames.size} concepts`);
|
||||||
|
|
||||||
|
// FIXED: Use your expected flow - get full data of embeddings results
|
||||||
|
if (toolNames.size >= 15) { // Reasonable threshold for quality
|
||||||
|
candidateTools = toolsData.tools.filter((tool: any) => toolNames.has(tool.name));
|
||||||
|
candidateConcepts = toolsData.concepts.filter((concept: any) => conceptNames.has(concept.name));
|
||||||
|
selectionMethod = 'embeddings_candidates';
|
||||||
|
|
||||||
|
console.log(`[IMPROVED PIPELINE] Using embeddings candidates: ${candidateTools.length} tools`);
|
||||||
|
} else {
|
||||||
|
console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${toolNames.size} < 15), using full dataset`);
|
||||||
|
candidateTools = toolsData.tools;
|
||||||
|
candidateConcepts = toolsData.concepts;
|
||||||
|
selectionMethod = 'full_dataset';
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
|
||||||
|
candidateTools = toolsData.tools;
|
||||||
|
candidateConcepts = toolsData.concepts;
|
||||||
|
selectionMethod = 'full_dataset';
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXED: NOW AI ANALYZES FULL DATA of the candidates
|
||||||
|
console.log(`[IMPROVED PIPELINE] AI will analyze FULL DATA of ${candidateTools.length} candidate tools`);
|
||||||
|
const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
|
||||||
|
|
||||||
|
return {
|
||||||
|
tools: finalSelection.selectedTools,
|
||||||
|
concepts: finalSelection.selectedConcepts,
|
||||||
|
domains: toolsData.domains,
|
||||||
|
phases: toolsData.phases,
|
||||||
|
'domain-agnostic-software': toolsData['domain-agnostic-software']
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// src/utils/aiPipeline.ts - FIXED: De-biased AI selection prompt
|
||||||
|
|
||||||
|
private async aiSelectionWithFullData(
|
||||||
|
userQuery: string,
|
||||||
|
candidateTools: any[],
|
||||||
|
candidateConcepts: any[],
|
||||||
|
mode: string,
|
||||||
|
selectionMethod: string
|
||||||
|
) {
|
||||||
|
const modeInstruction = mode === 'workflow'
|
||||||
|
? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases. Select 15-25 tools that cover the full investigation lifecycle.'
|
||||||
|
: 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem. Select 3-8 tools that are most relevant and effective.';
|
||||||
|
|
||||||
|
// FIXED: Give AI the COMPLETE tool data, not truncated
|
||||||
|
const toolsWithFullData = candidateTools.map((tool: any) => ({
|
||||||
|
name: tool.name,
|
||||||
|
type: tool.type,
|
||||||
|
description: tool.description,
|
||||||
|
domains: tool.domains,
|
||||||
|
phases: tool.phases,
|
||||||
|
platforms: tool.platforms || [],
|
||||||
|
tags: tool.tags || [],
|
||||||
|
skillLevel: tool.skillLevel,
|
||||||
|
license: tool.license,
|
||||||
|
accessType: tool.accessType,
|
||||||
|
projectUrl: tool.projectUrl,
|
||||||
|
knowledgebase: tool.knowledgebase,
|
||||||
|
related_concepts: tool.related_concepts || [],
|
||||||
|
related_software: tool.related_software || []
|
||||||
|
}));
|
||||||
|
|
||||||
|
const conceptsWithFullData = candidateConcepts.map((concept: any) => ({
|
||||||
|
name: concept.name,
|
||||||
|
type: 'concept',
|
||||||
|
description: concept.description,
|
||||||
|
domains: concept.domains,
|
||||||
|
phases: concept.phases,
|
||||||
|
tags: concept.tags || [],
|
||||||
|
skillLevel: concept.skillLevel,
|
||||||
|
related_concepts: concept.related_concepts || [],
|
||||||
|
related_software: concept.related_software || []
|
||||||
|
}));
|
||||||
|
|
||||||
|
const prompt = `You are a DFIR expert with access to the complete forensics tool database. You need to select the most relevant tools and concepts for this specific query.
|
||||||
|
|
||||||
|
SELECTION METHOD: ${selectionMethod}
|
||||||
|
${selectionMethod === 'embeddings_candidates' ?
|
||||||
|
'These tools were pre-filtered by vector similarity, so they are already relevant. Your job is to select the BEST ones from this relevant set.' :
|
||||||
|
'You have access to the full tool database. Select the most relevant tools for the query.'}
|
||||||
|
|
||||||
|
${modeInstruction}
|
||||||
|
|
||||||
|
USER QUERY: "${userQuery}"
|
||||||
|
|
||||||
|
CRITICAL SELECTION PRINCIPLES:
|
||||||
|
1. **CONTEXT OVER POPULARITY**: Don't default to "famous" tools like Volatility, Wireshark, or Autopsy just because they're well-known. Choose based on SPECIFIC scenario needs.
|
||||||
|
|
||||||
|
2. **METHODOLOGY vs SOFTWARE**:
|
||||||
|
- For RAPID/URGENT scenarios → Prioritize METHODS and rapid response approaches
|
||||||
|
- For TIME-CRITICAL incidents → Choose triage methods over deep analysis tools
|
||||||
|
- For COMPREHENSIVE analysis → Then consider detailed software tools
|
||||||
|
- METHODS (type: "method") are often better than SOFTWARE for procedural guidance
|
||||||
|
|
||||||
|
3. **SCENARIO-SPECIFIC LOGIC**:
|
||||||
|
- "Rapid/Quick/Urgent/Triage" scenarios → Rapid Incident Response and Triage METHOD > Volatility
|
||||||
|
- "Industrial/SCADA/ICS" scenarios → Specialized ICS tools > generic network tools
|
||||||
|
- "Mobile/Android/iOS" scenarios → Mobile-specific tools > desktop forensics tools
|
||||||
|
- "Memory analysis needed urgently" → Quick memory tools/methods > comprehensive Volatility analysis
|
||||||
|
|
||||||
|
4. **AVOID TOOL BIAS**:
|
||||||
|
- Volatility is NOT always the answer for memory analysis
|
||||||
|
- Wireshark is NOT always the answer for network analysis
|
||||||
|
- Autopsy is NOT always the answer for disk analysis
|
||||||
|
- Consider lighter, faster, more appropriate alternatives
|
||||||
|
|
||||||
|
AVAILABLE TOOLS (with complete data):
|
||||||
|
${JSON.stringify(toolsWithFullData.slice(0, 30), null, 2)}
|
||||||
|
|
||||||
|
AVAILABLE CONCEPTS (with complete data):
|
||||||
|
${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}
|
||||||
|
|
||||||
|
ANALYSIS INSTRUCTIONS:
|
||||||
|
1. Read the FULL description of each tool/concept
|
||||||
|
2. Consider ALL tags, platforms, related tools, and metadata
|
||||||
|
3. **MATCH URGENCY LEVEL**: Rapid scenarios need rapid methods, not deep analysis tools
|
||||||
|
4. **MATCH SPECIFICITY**: Specialized scenarios need specialized tools, not generic ones
|
||||||
|
5. **CONSIDER TYPE**: Methods provide procedural guidance, software provides technical capability
|
||||||
|
6. For SCADA/ICS queries: prioritize specialized ICS tools over generic network tools
|
||||||
|
7. For mobile queries: prioritize mobile-specific tools over desktop tools
|
||||||
|
8. For rapid/urgent queries: prioritize methodology and triage approaches
|
||||||
|
|
||||||
|
BIAS PREVENTION:
|
||||||
|
- If query mentions "rapid", "quick", "urgent", "triage" → Strongly favor METHODS over deep analysis SOFTWARE
|
||||||
|
- If query mentions specific technologies (SCADA, Android, etc.) → Strongly favor specialized tools
|
||||||
|
- Don't recommend Volatility unless deep memory analysis is specifically needed AND time allows
|
||||||
|
- Don't recommend generic tools when specialized ones are available
|
||||||
|
- Consider the SKILL LEVEL and TIME CONSTRAINTS implied by the query
|
||||||
|
|
||||||
|
Select the most relevant items (max ${this.maxSelectedItems} total).
|
||||||
|
|
||||||
|
Respond with ONLY this JSON format:
|
||||||
|
{
|
||||||
|
"selectedTools": ["Tool Name 1", "Tool Name 2", ...],
|
||||||
|
"selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
|
||||||
|
"reasoning": "Detailed explanation of why these specific tools were selected for this query, addressing why certain popular tools were NOT selected if they were inappropriate for the scenario context"
|
||||||
|
}`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await this.callAI(prompt, 2500); // More tokens for bias prevention logic
|
||||||
|
|
||||||
|
const result = this.safeParseJSON(response, null);
|
||||||
|
|
||||||
|
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
|
||||||
|
console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
|
||||||
|
throw new Error('AI selection failed to return valid tool selection');
|
||||||
|
}
|
||||||
|
|
||||||
|
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
|
||||||
|
if (totalSelected === 0) {
|
||||||
|
console.error('[IMPROVED PIPELINE] AI selection returned no tools');
|
||||||
|
throw new Error('AI selection returned empty selection');
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[IMPROVED PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts`);
|
||||||
|
console.log(`[IMPROVED PIPELINE] AI reasoning: ${result.reasoning}`);
|
||||||
|
|
||||||
|
// Return the actual tool/concept objects
|
||||||
|
const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
|
||||||
|
const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));
|
||||||
|
|
||||||
|
console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
selectedTools,
|
||||||
|
selectedConcepts
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[IMPROVED PIPELINE] AI selection failed:', error);
|
||||||
|
|
||||||
|
// Emergency fallback with bias awareness
|
||||||
|
console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
|
||||||
|
return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
|
||||||
|
const queryLower = userQuery.toLowerCase();
|
||||||
|
const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
|
||||||
|
|
||||||
|
// Score tools based on keyword matches in full data
|
||||||
|
const scoredTools = candidateTools.map(tool => {
|
||||||
|
const toolText = (
|
||||||
|
tool.name + ' ' +
|
||||||
|
tool.description + ' ' +
|
||||||
|
(tool.tags || []).join(' ') + ' ' +
|
||||||
|
(tool.platforms || []).join(' ') + ' ' +
|
||||||
|
(tool.domains || []).join(' ')
|
||||||
|
).toLowerCase();
|
||||||
|
|
||||||
|
const score = keywords.reduce((acc, keyword) => {
|
||||||
|
return acc + (toolText.includes(keyword) ? 1 : 0);
|
||||||
|
}, 0);
|
||||||
|
|
||||||
|
return { tool, score };
|
||||||
|
}).filter(item => item.score > 0)
|
||||||
|
.sort((a, b) => b.score - a.score);
|
||||||
|
|
||||||
|
const maxTools = mode === 'workflow' ? 20 : 8;
|
||||||
|
const selectedTools = scoredTools.slice(0, maxTools).map(item => item.tool);
|
||||||
|
|
||||||
|
console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
selectedTools,
|
||||||
|
selectedConcepts: candidateConcepts.slice(0, 3)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async delay(ms: number): Promise<void> {
|
||||||
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 300): Promise<MicroTaskResult> {
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
// FIXED: Build context prompt with token management
|
||||||
|
let contextPrompt = prompt;
|
||||||
|
if (context.contextHistory.length > 0) {
|
||||||
|
const contextSection = `BISHERIGE ANALYSE:\n${context.contextHistory.join('\n\n')}\n\nAKTUELLE AUFGABE:\n`;
|
||||||
|
const combinedPrompt = contextSection + prompt;
|
||||||
|
|
||||||
|
// Check if combined prompt exceeds limits
|
||||||
|
if (this.estimateTokens(combinedPrompt) <= this.maxPromptTokens) {
|
||||||
|
contextPrompt = combinedPrompt;
|
||||||
|
} else {
|
||||||
|
console.warn('[AI PIPELINE] Context too long, using prompt only');
|
||||||
|
// Could implement smarter context truncation here
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await this.callAI(contextPrompt, maxTokens);
|
||||||
|
|
||||||
|
return {
|
||||||
|
taskType: 'micro-task',
|
||||||
|
content: response.trim(),
|
||||||
|
processingTimeMs: Date.now() - startTime,
|
||||||
|
success: true
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
return {
|
||||||
|
taskType: 'micro-task',
|
||||||
|
content: '',
|
||||||
|
processingTimeMs: Date.now() - startTime,
|
||||||
|
success: false,
|
||||||
|
error: error.message
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||||
|
const isWorkflow = context.mode === 'workflow';
|
||||||
|
|
||||||
|
const prompt = `Sie sind ein erfahrener DFIR-Experte. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
|
||||||
|
|
||||||
|
${isWorkflow ? 'FORENSISCHES SZENARIO' : 'TECHNISCHES PROBLEM'}: "${context.userQuery}"
|
||||||
|
|
||||||
|
Führen Sie eine systematische ${isWorkflow ? 'Szenario-Analyse' : 'Problem-Analyse'} durch und berücksichtigen Sie dabei:
|
||||||
|
|
||||||
|
${isWorkflow ?
|
||||||
|
`- Angriffsvektoren und Bedrohungsmodellierung nach MITRE ATT&CK
|
||||||
|
- Betroffene Systeme und kritische Infrastrukturen
|
||||||
|
- Zeitkritische Faktoren und Beweiserhaltung
|
||||||
|
- Forensische Artefakte und Datenquellen` :
|
||||||
|
`- Spezifische forensische Herausforderungen
|
||||||
|
- Verfügbare Datenquellen und deren Integrität
|
||||||
|
- Methodische Anforderungen für rechtssichere Analyse`
|
||||||
|
}
|
||||||
|
|
||||||
|
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Maximum 150 Wörter.`;
|
||||||
|
|
||||||
|
const result = await this.callMicroTaskAI(prompt, context, 220);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
if (isWorkflow) {
|
||||||
|
context.scenarioAnalysis = result.content;
|
||||||
|
} else {
|
||||||
|
context.problemAnalysis = result.content;
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXED: Use new context management
|
||||||
|
this.addToContextHistory(context, `${isWorkflow ? 'Szenario' : 'Problem'}-Analyse: ${result.content.slice(0, 200)}...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async generateApproach(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||||
|
const isWorkflow = context.mode === 'workflow';
|
||||||
|
|
||||||
|
const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} nach NIST SP 800-86 Methodik.
|
||||||
|
|
||||||
|
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
|
||||||
|
|
||||||
|
Entwickeln Sie einen systematischen ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} unter Berücksichtigung von:
|
||||||
|
|
||||||
|
${isWorkflow ?
|
||||||
|
`- Triage-Prioritäten nach forensischer Dringlichkeit
|
||||||
|
- Phasenabfolge nach NIST-Methodik
|
||||||
|
- Kontaminationsvermeidung und forensische Isolierung` :
|
||||||
|
`- Methodik-Auswahl nach wissenschaftlichen Kriterien
|
||||||
|
- Validierung und Verifizierung der gewählten Ansätze
|
||||||
|
- Integration in bestehende forensische Workflows`
|
||||||
|
}
|
||||||
|
|
||||||
|
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 150 Wörter.`;
|
||||||
|
|
||||||
|
const result = await this.callMicroTaskAI(prompt, context, 220);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
context.investigationApproach = result.content;
|
||||||
|
this.addToContextHistory(context, `${isWorkflow ? 'Untersuchungs' : 'Lösungs'}ansatz: ${result.content.slice(0, 200)}...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async generateCriticalConsiderations(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||||
|
const isWorkflow = context.mode === 'workflow';
|
||||||
|
|
||||||
|
const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall.
|
||||||
|
|
||||||
|
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
|
||||||
|
|
||||||
|
Berücksichtigen Sie folgende forensische Aspekte:
|
||||||
|
|
||||||
|
${isWorkflow ?
|
||||||
|
`- Time-sensitive evidence preservation
|
||||||
|
- Chain of custody requirements und rechtliche Verwertbarkeit
|
||||||
|
- Incident containment vs. evidence preservation Dilemma
|
||||||
|
- Privacy- und Compliance-Anforderungen` :
|
||||||
|
`- Tool-Validierung und Nachvollziehbarkeit
|
||||||
|
- False positive/negative Risiken bei der gewählten Methodik
|
||||||
|
- Qualifikationsanforderungen für die Durchführung
|
||||||
|
- Dokumentations- und Reporting-Standards`
|
||||||
|
}
|
||||||
|
|
||||||
|
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.`;
|
||||||
|
|
||||||
|
const result = await this.callMicroTaskAI(prompt, context, 180);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
context.criticalConsiderations = result.content;
|
||||||
|
this.addToContextHistory(context, `Kritische Überlegungen: ${result.content.slice(0, 200)}...`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async selectToolsForPhase(context: AnalysisContext, phase: any): Promise<MicroTaskResult> {
|
||||||
|
const phaseTools = context.filteredData.tools.filter((tool: any) =>
|
||||||
|
tool.phases && tool.phases.includes(phase.id)
|
||||||
|
);
|
||||||
|
|
||||||
|
if (phaseTools.length === 0) {
|
||||||
|
return {
|
||||||
|
taskType: 'tool-selection',
|
||||||
|
content: JSON.stringify([]),
|
||||||
|
processingTimeMs: 0,
|
||||||
|
success: true
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const prompt = `Wählen Sie 2-3 Methoden/Tools für die Phase "${phase.name}" basierend auf objektiven, fallbezogenen Kriterien.
|
||||||
|
|
||||||
|
SZENARIO: "${context.userQuery}"
|
||||||
|
|
||||||
|
VERFÜGBARE TOOLS FÜR ${phase.name.toUpperCase()}:
|
||||||
|
${phaseTools.map((tool: any) => `- ${tool.name}: ${tool.description.slice(0, 100)}...`).join('\n')}
|
||||||
|
|
||||||
|
Wählen Sie Methoden/Tools nach forensischen Kriterien aus:
|
||||||
|
- Court admissibility und Chain of Custody Kompatibilität
|
||||||
|
- Integration in forensische Standard-Workflows
|
||||||
|
- Reproduzierbarkeit und Dokumentationsqualität
|
||||||
|
- Objektivität
|
||||||
|
|
||||||
|
Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"toolName": "Exakter Methoden/Tool-Name",
|
||||||
|
"priority": "high|medium|low",
|
||||||
|
"justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist"
|
||||||
|
}
|
||||||
|
]`;
|
||||||
|
|
||||||
|
const result = await this.callMicroTaskAI(prompt, context, 450);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
// FIXED: Safe JSON parsing with validation
|
||||||
|
const selections = this.safeParseJSON(result.content, []);
|
||||||
|
|
||||||
|
if (Array.isArray(selections)) {
|
||||||
|
const validSelections = selections.filter((sel: any) =>
|
||||||
|
sel.toolName && phaseTools.some((tool: any) => tool.name === sel.toolName)
|
||||||
|
);
|
||||||
|
|
||||||
|
validSelections.forEach((sel: any) => {
|
||||||
|
const tool = phaseTools.find((t: any) => t.name === sel.toolName);
|
||||||
|
if (tool) {
|
||||||
|
// FIXED: Use deduplication helper
|
||||||
|
this.addToolToSelection(context, tool, phase.id, sel.priority, sel.justification);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
|
||||||
|
const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem nach forensischen Qualitätskriterien.
|
||||||
|
|
||||||
|
PROBLEM: "${context.userQuery}"
|
||||||
|
|
||||||
|
TOOL: ${tool.name}
|
||||||
|
BESCHREIBUNG: ${tool.description}
|
||||||
|
PLATTFORMEN: ${tool.platforms?.join(', ') || 'N/A'}
|
||||||
|
SKILL LEVEL: ${tool.skillLevel}
|
||||||
|
|
||||||
|
Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
|
||||||
|
{
|
||||||
|
"suitability_score": "high|medium|low",
|
||||||
|
"detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
|
||||||
|
"implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
|
||||||
|
"pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
|
||||||
|
"cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
|
||||||
|
"alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
|
||||||
|
}`;
|
||||||
|
|
||||||
|
const result = await this.callMicroTaskAI(prompt, context, 650);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
// FIXED: Safe JSON parsing
|
||||||
|
const evaluation = this.safeParseJSON(result.content, {
|
||||||
|
suitability_score: 'medium',
|
||||||
|
detailed_explanation: 'Evaluation failed',
|
||||||
|
implementation_approach: '',
|
||||||
|
pros: [],
|
||||||
|
cons: [],
|
||||||
|
alternatives: ''
|
||||||
|
});
|
||||||
|
|
||||||
|
// FIXED: Use deduplication helper
|
||||||
|
this.addToolToSelection(context, {
|
||||||
|
...tool,
|
||||||
|
evaluation: {
|
||||||
|
...evaluation,
|
||||||
|
rank
|
||||||
|
}
|
||||||
|
}, 'evaluation', evaluation.suitability_score);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async selectBackgroundKnowledge(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||||
|
const availableConcepts = context.filteredData.concepts;
|
||||||
|
|
||||||
|
if (availableConcepts.length === 0) {
|
||||||
|
return {
|
||||||
|
taskType: 'background-knowledge',
|
||||||
|
content: JSON.stringify([]),
|
||||||
|
processingTimeMs: 0,
|
||||||
|
success: true
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
|
||||||
|
|
||||||
|
const prompt = `Wählen Sie relevante forensische Konzepte für das Verständnis der empfohlenen Methodik.
|
||||||
|
|
||||||
|
${context.mode === 'workflow' ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
|
||||||
|
EMPFOHLENE TOOLS: ${selectedToolNames.join(', ')}
|
||||||
|
|
||||||
|
VERFÜGBARE KONZEPTE:
|
||||||
|
${availableConcepts.slice(0, 15).map((concept: any) => `- ${concept.name}: ${concept.description.slice(0, 80)}...`).join('\n')}
|
||||||
|
|
||||||
|
Wählen Sie 2-4 Konzepte aus, die für das Verständnis der forensischen Methodik essentiell sind.
|
||||||
|
|
||||||
|
Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"conceptName": "Exakter Konzept-Name",
|
||||||
|
"relevance": "Forensische Relevanz: Warum dieses Konzept für das Verständnis der Methodik kritisch ist"
|
||||||
|
}
|
||||||
|
]`;
|
||||||
|
|
||||||
|
const result = await this.callMicroTaskAI(prompt, context, 400);
|
||||||
|
|
||||||
|
if (result.success) {
|
||||||
|
// FIXED: Safe JSON parsing
|
||||||
|
const selections = this.safeParseJSON(result.content, []);
|
||||||
|
|
||||||
|
if (Array.isArray(selections)) {
|
||||||
|
context.backgroundKnowledge = selections.filter((sel: any) =>
|
||||||
|
sel.conceptName && availableConcepts.some((concept: any) => concept.name === sel.conceptName)
|
||||||
|
).map((sel: any) => ({
|
||||||
|
concept: availableConcepts.find((c: any) => c.name === sel.conceptName),
|
||||||
|
relevance: sel.relevance
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async generateFinalRecommendations(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||||
|
const isWorkflow = context.mode === 'workflow';
|
||||||
|
|
||||||
|
const prompt = isWorkflow ?
|
||||||
|
`Erstellen Sie eine forensisch fundierte Workflow-Empfehlung basierend auf DFIR-Prinzipien.
|
||||||
|
|
||||||
|
SZENARIO: "${context.userQuery}"
|
||||||
|
AUSGEWÄHLTE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Tools ausgewählt'}
|
||||||
|
|
||||||
|
Erstellen Sie konkrete methodische Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung forensischer Best Practices, Objektivität und rechtlicher Verwertbarkeit.
|
||||||
|
|
||||||
|
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.` :
|
||||||
|
|
||||||
|
`Erstellen Sie wichtige methodische Überlegungen für die korrekte Methoden-/Tool-Anwendung.
|
||||||
|
|
||||||
|
PROBLEM: "${context.userQuery}"
|
||||||
|
EMPFOHLENE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Methoden/Tools ausgewählt'}
|
||||||
|
|
||||||
|
Geben Sie kritische methodische Überlegungen, Validierungsanforderungen und Qualitätssicherungsmaßnahmen für die korrekte Anwendung der empfohlenen Methoden/Tools.
|
||||||
|
|
||||||
|
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 100 Wörter.`;
|
||||||
|
|
||||||
|
const result = await this.callMicroTaskAI(prompt, context, 180);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
|
||||||
|
const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${this.config.apiKey}`
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: this.config.model,
|
||||||
|
messages: [{ role: 'user', content: prompt }],
|
||||||
|
max_tokens: maxTokens,
|
||||||
|
temperature: 0.3
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const errorText = await response.text();
|
||||||
|
throw new Error(`AI API error: ${response.status} - ${errorText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
const content = data.choices?.[0]?.message?.content;
|
||||||
|
|
||||||
|
if (!content) {
|
||||||
|
throw new Error('No response from AI model');
|
||||||
|
}
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
|
||||||
|
const startTime = Date.now();
|
||||||
|
let completedTasks = 0;
|
||||||
|
let failedTasks = 0;
|
||||||
|
|
||||||
|
console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Stage 1: Get intelligent candidates (embeddings + AI selection)
|
||||||
|
const toolsData = await getCompressedToolsDataForAI();
|
||||||
|
const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
|
||||||
|
|
||||||
|
// FIXED: Initialize context with proper state management
|
||||||
|
const context: AnalysisContext = {
|
||||||
|
userQuery,
|
||||||
|
mode,
|
||||||
|
filteredData,
|
||||||
|
contextHistory: [],
|
||||||
|
maxContextLength: this.maxContextTokens,
|
||||||
|
currentContextLength: 0,
|
||||||
|
seenToolNames: new Set<string>() // FIXED: Add deduplication tracking
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
|
||||||
|
|
||||||
|
// MICRO-TASK SEQUENCE
|
||||||
|
|
||||||
|
// Task 1: Scenario/Problem Analysis
|
||||||
|
const analysisResult = await this.analyzeScenario(context);
|
||||||
|
if (analysisResult.success) completedTasks++; else failedTasks++;
|
||||||
|
await this.delay(this.microTaskDelay);
|
||||||
|
|
||||||
|
// Task 2: Investigation/Solution Approach
|
||||||
|
const approachResult = await this.generateApproach(context);
|
||||||
|
if (approachResult.success) completedTasks++; else failedTasks++;
|
||||||
|
await this.delay(this.microTaskDelay);
|
||||||
|
|
||||||
|
// Task 3: Critical Considerations
|
||||||
|
const considerationsResult = await this.generateCriticalConsiderations(context);
|
||||||
|
if (considerationsResult.success) completedTasks++; else failedTasks++;
|
||||||
|
await this.delay(this.microTaskDelay);
|
||||||
|
|
||||||
|
// Task 4: Tool Selection/Evaluation (mode-dependent)
|
||||||
|
if (mode === 'workflow') {
|
||||||
|
// Select tools for each phase
|
||||||
|
const phases = toolsData.phases || [];
|
||||||
|
for (const phase of phases) {
|
||||||
|
const toolSelectionResult = await this.selectToolsForPhase(context, phase);
|
||||||
|
if (toolSelectionResult.success) completedTasks++; else failedTasks++;
|
||||||
|
await this.delay(this.microTaskDelay);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Evaluate top 3 tools for specific problem
|
||||||
|
const topTools = filteredData.tools.slice(0, 3);
|
||||||
|
for (let i = 0; i < topTools.length; i++) {
|
||||||
|
const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
|
||||||
|
if (evaluationResult.success) completedTasks++; else failedTasks++;
|
||||||
|
await this.delay(this.microTaskDelay);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Task 5: Background Knowledge Selection
|
||||||
|
const knowledgeResult = await this.selectBackgroundKnowledge(context);
|
||||||
|
if (knowledgeResult.success) completedTasks++; else failedTasks++;
|
||||||
|
await this.delay(this.microTaskDelay);
|
||||||
|
|
||||||
|
// Task 6: Final Recommendations
|
||||||
|
const finalResult = await this.generateFinalRecommendations(context);
|
||||||
|
if (finalResult.success) completedTasks++; else failedTasks++;
|
||||||
|
|
||||||
|
// Build final recommendation
|
||||||
|
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
|
||||||
|
|
||||||
|
const processingStats = {
|
||||||
|
embeddingsUsed: embeddingsService.isEnabled(),
|
||||||
|
candidatesFromEmbeddings: filteredData.tools.length,
|
||||||
|
finalSelectedItems: (context.selectedTools?.length || 0) +
|
||||||
|
(context.backgroundKnowledge?.length || 0),
|
||||||
|
processingTimeMs: Date.now() - startTime,
|
||||||
|
microTasksCompleted: completedTasks,
|
||||||
|
microTasksFailed: failedTasks,
|
||||||
|
contextContinuityUsed: true
|
||||||
|
};
|
||||||
|
|
||||||
|
console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
|
||||||
|
console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
|
||||||
|
|
||||||
|
return {
|
||||||
|
recommendation,
|
||||||
|
processingStats
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[IMPROVED PIPELINE] Processing failed:', error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build recommendation (same structure but using fixed context)
|
||||||
|
private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
|
||||||
|
const isWorkflow = mode === 'workflow';
|
||||||
|
|
||||||
|
const base = {
|
||||||
|
[isWorkflow ? 'scenario_analysis' : 'problem_analysis']:
|
||||||
|
isWorkflow ? context.scenarioAnalysis : context.problemAnalysis,
|
||||||
|
investigation_approach: context.investigationApproach,
|
||||||
|
critical_considerations: context.criticalConsiderations,
|
||||||
|
background_knowledge: context.backgroundKnowledge?.map(bk => ({
|
||||||
|
concept_name: bk.concept.name,
|
||||||
|
relevance: bk.relevance
|
||||||
|
})) || []
|
||||||
|
};
|
||||||
|
|
||||||
|
if (isWorkflow) {
|
||||||
|
return {
|
||||||
|
...base,
|
||||||
|
recommended_tools: context.selectedTools?.map(st => ({
|
||||||
|
name: st.tool.name,
|
||||||
|
phase: st.phase,
|
||||||
|
priority: st.priority,
|
||||||
|
justification: st.justification || `Empfohlen für ${st.phase}`
|
||||||
|
})) || [],
|
||||||
|
workflow_suggestion: finalContent
|
||||||
|
};
|
||||||
|
} else {
|
||||||
|
return {
|
||||||
|
...base,
|
||||||
|
recommended_tools: context.selectedTools?.map(st => ({
|
||||||
|
name: st.tool.name,
|
||||||
|
rank: st.tool.evaluation?.rank || 1,
|
||||||
|
suitability_score: st.priority,
|
||||||
|
detailed_explanation: st.tool.evaluation?.detailed_explanation || '',
|
||||||
|
implementation_approach: st.tool.evaluation?.implementation_approach || '',
|
||||||
|
pros: st.tool.evaluation?.pros || [],
|
||||||
|
cons: st.tool.evaluation?.cons || [],
|
||||||
|
alternatives: st.tool.evaluation?.alternatives || ''
|
||||||
|
})) || [],
|
||||||
|
additional_considerations: finalContent
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Global instance
|
||||||
|
const aiPipeline = new ImprovedMicroTaskAIPipeline();
|
||||||
|
|
||||||
|
export { aiPipeline, type AnalysisResult };
|
@ -1,3 +1,4 @@
|
|||||||
|
// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
|
||||||
import { promises as fs } from 'fs';
|
import { promises as fs } from 'fs';
|
||||||
import { load } from 'js-yaml';
|
import { load } from 'js-yaml';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
@ -21,30 +22,44 @@ const ToolSchema = z.object({
|
|||||||
accessType: z.string().optional().nullable(),
|
accessType: z.string().optional().nullable(),
|
||||||
'domain-agnostic-software': z.array(z.string()).optional().nullable(),
|
'domain-agnostic-software': z.array(z.string()).optional().nullable(),
|
||||||
related_concepts: z.array(z.string()).optional().nullable().default([]),
|
related_concepts: z.array(z.string()).optional().nullable().default([]),
|
||||||
related_software: z.array(z.string()).optional().nullable().default([]), // Added this line
|
related_software: z.array(z.string()).optional().nullable().default([]),
|
||||||
});
|
});
|
||||||
|
|
||||||
const ToolsDataSchema = z.object({
|
const ToolsDataSchema = z.object({
|
||||||
tools: z.array(ToolSchema),
|
tools: z.array(ToolSchema),
|
||||||
domains: z.array(z.object({
|
domains: z.array(z.object({
|
||||||
id: z.string(),
|
id: z.string(),
|
||||||
name: z.string()
|
name: z.string(),
|
||||||
|
description: z.string().optional()
|
||||||
})),
|
})),
|
||||||
phases: z.array(z.object({
|
phases: z.array(z.object({
|
||||||
id: z.string(),
|
id: z.string(),
|
||||||
name: z.string(),
|
name: z.string(),
|
||||||
description: z.string().optional()
|
description: z.string().optional(),
|
||||||
|
typical_tools: z.array(z.string()).optional().default([]),
|
||||||
|
key_activities: z.array(z.string()).optional().default([])
|
||||||
})),
|
})),
|
||||||
'domain-agnostic-software': z.array(z.object({
|
'domain-agnostic-software': z.array(z.object({
|
||||||
id: z.string(),
|
id: z.string(),
|
||||||
name: z.string(),
|
name: z.string(),
|
||||||
description: z.string().optional()
|
description: z.string().optional(),
|
||||||
|
use_cases: z.array(z.string()).optional().default([])
|
||||||
})).optional().default([]),
|
})).optional().default([]),
|
||||||
scenarios: z.array(z.object({
|
scenarios: z.array(z.object({
|
||||||
id: z.string(),
|
id: z.string(),
|
||||||
icon: z.string(),
|
icon: z.string(),
|
||||||
friendly_name: z.string()
|
friendly_name: z.string(),
|
||||||
|
description: z.string().optional(),
|
||||||
|
typical_phases: z.array(z.string()).optional().default([]),
|
||||||
|
complexity: z.enum(['low', 'medium', 'high']).optional()
|
||||||
})).optional().default([]),
|
})).optional().default([]),
|
||||||
|
skill_levels: z.object({
|
||||||
|
novice: z.string().optional(),
|
||||||
|
beginner: z.string().optional(),
|
||||||
|
intermediate: z.string().optional(),
|
||||||
|
advanced: z.string().optional(),
|
||||||
|
expert: z.string().optional()
|
||||||
|
}).optional().default({})
|
||||||
});
|
});
|
||||||
|
|
||||||
interface ToolsData {
|
interface ToolsData {
|
||||||
@ -53,20 +68,49 @@ interface ToolsData {
|
|||||||
phases: any[];
|
phases: any[];
|
||||||
'domain-agnostic-software': any[];
|
'domain-agnostic-software': any[];
|
||||||
scenarios: any[];
|
scenarios: any[];
|
||||||
|
skill_levels?: any;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface CompressedToolsData {
|
interface EnhancedCompressedToolsData {
|
||||||
tools: any[];
|
tools: any[];
|
||||||
concepts: any[];
|
concepts: any[];
|
||||||
domains: any[];
|
domains: any[];
|
||||||
phases: any[];
|
phases: any[];
|
||||||
'domain-agnostic-software': any[];
|
'domain-agnostic-software': any[];
|
||||||
|
scenarios?: any[]; // Optional for AI processing
|
||||||
|
skill_levels: any;
|
||||||
|
// Enhanced context for micro-tasks
|
||||||
|
domain_relationships: DomainRelationship[];
|
||||||
|
phase_dependencies: PhaseDependency[];
|
||||||
|
tool_compatibility_matrix: CompatibilityMatrix[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DomainRelationship {
|
||||||
|
domain_id: string;
|
||||||
|
tool_count: number;
|
||||||
|
common_tags: string[];
|
||||||
|
skill_distribution: Record<string, number>;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PhaseDependency {
|
||||||
|
phase_id: string;
|
||||||
|
order: number;
|
||||||
|
depends_on: string | null;
|
||||||
|
enables: string | null;
|
||||||
|
is_parallel_capable: boolean;
|
||||||
|
typical_duration: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CompatibilityMatrix {
|
||||||
|
type: string;
|
||||||
|
groups: Record<string, string[]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let cachedData: ToolsData | null = null;
|
let cachedData: ToolsData | null = null;
|
||||||
let cachedRandomizedData: ToolsData | null = null;
|
let cachedRandomizedData: ToolsData | null = null;
|
||||||
let cachedCompressedData: CompressedToolsData | null = null;
|
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
|
||||||
let lastRandomizationDate: string | null = null;
|
let lastRandomizationDate: string | null = null;
|
||||||
|
let dataVersion: string | null = null;
|
||||||
|
|
||||||
function seededRandom(seed: number): () => number {
|
function seededRandom(seed: number): () => number {
|
||||||
let x = Math.sin(seed) * 10000;
|
let x = Math.sin(seed) * 10000;
|
||||||
@ -91,6 +135,115 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
|
|||||||
return shuffled;
|
return shuffled;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function generateDataVersion(data: any): string {
|
||||||
|
const str = JSON.stringify(data, Object.keys(data).sort());
|
||||||
|
let hash = 0;
|
||||||
|
for (let i = 0; i < str.length; i++) {
|
||||||
|
const char = str.charCodeAt(i);
|
||||||
|
hash = ((hash << 5) - hash) + char;
|
||||||
|
hash = hash & hash;
|
||||||
|
}
|
||||||
|
return Math.abs(hash).toString(36);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enhanced: Generate domain relationships for better AI understanding
|
||||||
|
function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
|
||||||
|
const relationships: DomainRelationship[] = [];
|
||||||
|
|
||||||
|
for (const domain of domains) {
|
||||||
|
const domainTools = tools.filter(tool =>
|
||||||
|
tool.domains && tool.domains.includes(domain.id)
|
||||||
|
);
|
||||||
|
|
||||||
|
const commonTags = domainTools
|
||||||
|
.flatMap(tool => tool.tags || [])
|
||||||
|
.reduce((acc: any, tag: string) => {
|
||||||
|
acc[tag] = (acc[tag] || 0) + 1;
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const topTags = Object.entries(commonTags)
|
||||||
|
.sort(([,a], [,b]) => (b as number) - (a as number))
|
||||||
|
.slice(0, 5)
|
||||||
|
.map(([tag]) => tag);
|
||||||
|
|
||||||
|
relationships.push({
|
||||||
|
domain_id: domain.id,
|
||||||
|
tool_count: domainTools.length,
|
||||||
|
common_tags: topTags,
|
||||||
|
skill_distribution: domainTools.reduce((acc: any, tool: any) => {
|
||||||
|
acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
|
||||||
|
return acc;
|
||||||
|
}, {})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return relationships;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enhanced: Generate phase dependencies
|
||||||
|
function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
|
||||||
|
const dependencies: PhaseDependency[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < phases.length; i++) {
|
||||||
|
const phase = phases[i];
|
||||||
|
const nextPhase = phases[i + 1];
|
||||||
|
const prevPhase = phases[i - 1];
|
||||||
|
|
||||||
|
dependencies.push({
|
||||||
|
phase_id: phase.id,
|
||||||
|
order: i + 1,
|
||||||
|
depends_on: prevPhase?.id || null,
|
||||||
|
enables: nextPhase?.id || null,
|
||||||
|
is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
|
||||||
|
typical_duration: phase.id === 'data-collection' ? 'hours-days' :
|
||||||
|
phase.id === 'examination' ? 'hours-weeks' :
|
||||||
|
phase.id === 'analysis' ? 'days-weeks' :
|
||||||
|
'hours-days'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return dependencies;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enhanced: Generate tool compatibility matrix
|
||||||
|
function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
|
||||||
|
const matrix: CompatibilityMatrix[] = [];
|
||||||
|
|
||||||
|
// Group tools by common characteristics
|
||||||
|
const platformGroups = tools.reduce((acc: any, tool: any) => {
|
||||||
|
if (tool.platforms) {
|
||||||
|
tool.platforms.forEach((platform: string) => {
|
||||||
|
if (!acc[platform]) acc[platform] = [];
|
||||||
|
acc[platform].push(tool.name);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const phaseGroups = tools.reduce((acc: any, tool: any) => {
|
||||||
|
if (tool.phases) {
|
||||||
|
tool.phases.forEach((phase: string) => {
|
||||||
|
if (!acc[phase]) acc[phase] = [];
|
||||||
|
acc[phase].push(tool.name);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
matrix.push({
|
||||||
|
type: 'platform_compatibility',
|
||||||
|
groups: platformGroups
|
||||||
|
});
|
||||||
|
|
||||||
|
matrix.push({
|
||||||
|
type: 'phase_synergy',
|
||||||
|
groups: phaseGroups
|
||||||
|
});
|
||||||
|
|
||||||
|
return matrix;
|
||||||
|
}
|
||||||
|
|
||||||
async function loadRawData(): Promise<ToolsData> {
|
async function loadRawData(): Promise<ToolsData> {
|
||||||
if (!cachedData) {
|
if (!cachedData) {
|
||||||
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
||||||
@ -99,6 +252,21 @@ async function loadRawData(): Promise<ToolsData> {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
cachedData = ToolsDataSchema.parse(rawData);
|
cachedData = ToolsDataSchema.parse(rawData);
|
||||||
|
|
||||||
|
// Enhanced: Add default skill level descriptions if not provided
|
||||||
|
if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
|
||||||
|
cachedData.skill_levels = {
|
||||||
|
novice: "Minimal technical background required, guided interfaces",
|
||||||
|
beginner: "Basic IT knowledge, some command-line familiarity helpful",
|
||||||
|
intermediate: "Solid technical foundation, comfortable with various tools",
|
||||||
|
advanced: "Extensive experience, deep technical understanding required",
|
||||||
|
expert: "Specialist knowledge, cutting-edge techniques and complex scenarios"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
dataVersion = generateDataVersion(cachedData);
|
||||||
|
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('YAML validation failed:', error);
|
console.error('YAML validation failed:', error);
|
||||||
throw new Error('Invalid tools.yaml structure');
|
throw new Error('Invalid tools.yaml structure');
|
||||||
@ -123,47 +291,88 @@ export async function getToolsData(): Promise<ToolsData> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
lastRandomizationDate = today;
|
lastRandomizationDate = today;
|
||||||
|
|
||||||
cachedCompressedData = null;
|
cachedCompressedData = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return cachedRandomizedData;
|
return cachedRandomizedData;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData> {
|
export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedToolsData> {
|
||||||
if (!cachedCompressedData) {
|
if (!cachedCompressedData) {
|
||||||
const data = await getToolsData();
|
const data = await getToolsData();
|
||||||
|
|
||||||
|
// Enhanced: More detailed tool information for micro-tasks
|
||||||
const compressedTools = data.tools
|
const compressedTools = data.tools
|
||||||
.filter(tool => tool.type !== 'concept')
|
.filter(tool => tool.type !== 'concept')
|
||||||
.map(tool => {
|
.map(tool => {
|
||||||
const { projectUrl, statusUrl, ...compressedTool } = tool;
|
const { projectUrl, statusUrl, ...compressedTool } = tool;
|
||||||
return compressedTool;
|
return {
|
||||||
|
...compressedTool,
|
||||||
|
// Enhanced: Add computed fields for AI
|
||||||
|
is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
|
||||||
|
is_open_source: tool.license && tool.license !== 'Proprietary',
|
||||||
|
complexity_score: tool.skillLevel === 'expert' ? 5 :
|
||||||
|
tool.skillLevel === 'advanced' ? 4 :
|
||||||
|
tool.skillLevel === 'intermediate' ? 3 :
|
||||||
|
tool.skillLevel === 'beginner' ? 2 : 1,
|
||||||
|
// Enhanced: Phase-specific suitability hints
|
||||||
|
phase_suitability: tool.phases?.map(phase => ({
|
||||||
|
phase,
|
||||||
|
primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
|
||||||
|
})) || []
|
||||||
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
const concepts = data.tools
|
const concepts = data.tools
|
||||||
.filter(tool => tool.type === 'concept')
|
.filter(tool => tool.type === 'concept')
|
||||||
.map(concept => {
|
.map(concept => {
|
||||||
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
|
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
|
||||||
return compressedConcept;
|
return {
|
||||||
|
...compressedConcept,
|
||||||
|
// Enhanced: Learning difficulty indicator
|
||||||
|
learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
|
||||||
|
concept.skillLevel === 'advanced' ? 'high' :
|
||||||
|
concept.skillLevel === 'intermediate' ? 'medium' :
|
||||||
|
'low'
|
||||||
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Enhanced: Add rich context data
|
||||||
|
const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
|
||||||
|
const phaseDependencies = generatePhaseDependencies(data.phases);
|
||||||
|
const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
|
||||||
|
|
||||||
cachedCompressedData = {
|
cachedCompressedData = {
|
||||||
tools: compressedTools,
|
tools: compressedTools,
|
||||||
concepts: concepts,
|
concepts: concepts,
|
||||||
domains: data.domains,
|
domains: data.domains,
|
||||||
phases: data.phases,
|
phases: data.phases,
|
||||||
'domain-agnostic-software': data['domain-agnostic-software']
|
'domain-agnostic-software': data['domain-agnostic-software'],
|
||||||
// scenarios intentionally excluded from AI data
|
scenarios: data.scenarios, // Include scenarios for context
|
||||||
|
skill_levels: data.skill_levels || {},
|
||||||
|
// Enhanced context for micro-tasks
|
||||||
|
domain_relationships: domainRelationships,
|
||||||
|
phase_dependencies: phaseDependencies,
|
||||||
|
tool_compatibility_matrix: toolCompatibilityMatrix
|
||||||
};
|
};
|
||||||
|
|
||||||
|
console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
|
||||||
|
console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return cachedCompressedData;
|
return cachedCompressedData;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function getDataVersion(): string | null {
|
||||||
|
return dataVersion;
|
||||||
|
}
|
||||||
|
|
||||||
export function clearCache(): void {
|
export function clearCache(): void {
|
||||||
cachedData = null;
|
cachedData = null;
|
||||||
cachedRandomizedData = null;
|
cachedRandomizedData = null;
|
||||||
cachedCompressedData = null;
|
cachedCompressedData = null;
|
||||||
lastRandomizationDate = null;
|
lastRandomizationDate = null;
|
||||||
|
dataVersion = null;
|
||||||
|
|
||||||
|
console.log('[DATA SERVICE] Enhanced cache cleared');
|
||||||
}
|
}
|
267
src/utils/embeddings.ts
Normal file
267
src/utils/embeddings.ts
Normal file
@ -0,0 +1,267 @@
|
|||||||
|
// src/utils/embeddings.ts
|
||||||
|
import { promises as fs } from 'fs';
|
||||||
|
import path from 'path';
|
||||||
|
import { getCompressedToolsDataForAI } from './dataService.js';
|
||||||
|
|
||||||
|
interface EmbeddingData {
|
||||||
|
id: string;
|
||||||
|
type: 'tool' | 'concept';
|
||||||
|
name: string;
|
||||||
|
content: string;
|
||||||
|
embedding: number[];
|
||||||
|
metadata: {
|
||||||
|
domains?: string[];
|
||||||
|
phases?: string[];
|
||||||
|
tags?: string[];
|
||||||
|
skillLevel?: string;
|
||||||
|
type?: string;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
interface EmbeddingsDatabase {
|
||||||
|
version: string;
|
||||||
|
lastUpdated: number;
|
||||||
|
embeddings: EmbeddingData[];
|
||||||
|
}
|
||||||
|
|
||||||
|
class EmbeddingsService {
|
||||||
|
private embeddings: EmbeddingData[] = [];
|
||||||
|
private isInitialized = false;
|
||||||
|
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
|
||||||
|
private readonly batchSize: number;
|
||||||
|
private readonly batchDelay: number;
|
||||||
|
private readonly enabled: boolean;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
|
||||||
|
this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
|
||||||
|
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
async initialize(): Promise<void> {
|
||||||
|
if (!this.enabled) {
|
||||||
|
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
console.log('[EMBEDDINGS] Initializing embeddings system...');
|
||||||
|
|
||||||
|
// Create data directory if it doesn't exist
|
||||||
|
await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
|
||||||
|
|
||||||
|
const toolsData = await getCompressedToolsDataForAI();
|
||||||
|
const currentDataHash = this.hashData(toolsData);
|
||||||
|
|
||||||
|
// Try to load existing embeddings
|
||||||
|
const existingEmbeddings = await this.loadEmbeddings();
|
||||||
|
|
||||||
|
if (existingEmbeddings && existingEmbeddings.version === currentDataHash) {
|
||||||
|
console.log('[EMBEDDINGS] Using cached embeddings');
|
||||||
|
this.embeddings = existingEmbeddings.embeddings;
|
||||||
|
} else {
|
||||||
|
console.log('[EMBEDDINGS] Generating new embeddings...');
|
||||||
|
await this.generateEmbeddings(toolsData, currentDataHash);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.isInitialized = true;
|
||||||
|
console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings`);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[EMBEDDINGS] Failed to initialize:', error);
|
||||||
|
this.isInitialized = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private hashData(data: any): string {
|
||||||
|
return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
|
||||||
|
try {
|
||||||
|
const data = await fs.readFile(this.embeddingsPath, 'utf8');
|
||||||
|
return JSON.parse(data);
|
||||||
|
} catch (error) {
|
||||||
|
console.log('[EMBEDDINGS] No existing embeddings found');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async saveEmbeddings(version: string): Promise<void> {
|
||||||
|
const database: EmbeddingsDatabase = {
|
||||||
|
version,
|
||||||
|
lastUpdated: Date.now(),
|
||||||
|
embeddings: this.embeddings
|
||||||
|
};
|
||||||
|
|
||||||
|
await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
|
||||||
|
console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
|
||||||
|
}
|
||||||
|
|
||||||
|
private createContentString(item: any): string {
|
||||||
|
const parts = [
|
||||||
|
item.name,
|
||||||
|
item.description || '',
|
||||||
|
...(item.tags || []),
|
||||||
|
...(item.domains || []),
|
||||||
|
...(item.phases || [])
|
||||||
|
];
|
||||||
|
|
||||||
|
return parts.filter(Boolean).join(' ').toLowerCase();
|
||||||
|
}
|
||||||
|
|
||||||
|
private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
|
||||||
|
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
||||||
|
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
|
||||||
|
const model = process.env.AI_EMBEDDINGS_MODEL;
|
||||||
|
|
||||||
|
if (!endpoint || !apiKey || !model) {
|
||||||
|
throw new Error('Missing embeddings API configuration');
|
||||||
|
}
|
||||||
|
|
||||||
|
const response = await fetch(endpoint, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${apiKey}`
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model,
|
||||||
|
input: contents
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const error = await response.text();
|
||||||
|
throw new Error(`Embeddings API error: ${response.status} - ${error}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const data = await response.json();
|
||||||
|
return data.data.map((item: any) => item.embedding);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async generateEmbeddings(toolsData: any, version: string): Promise<void> {
|
||||||
|
const allItems = [
|
||||||
|
...toolsData.tools.map((tool: any) => ({ ...tool, type: 'tool' })),
|
||||||
|
...toolsData.concepts.map((concept: any) => ({ ...concept, type: 'concept' }))
|
||||||
|
];
|
||||||
|
|
||||||
|
const contents = allItems.map(item => this.createContentString(item));
|
||||||
|
this.embeddings = [];
|
||||||
|
|
||||||
|
// Process in batches to respect rate limits
|
||||||
|
for (let i = 0; i < contents.length; i += this.batchSize) {
|
||||||
|
const batch = contents.slice(i, i + this.batchSize);
|
||||||
|
const batchItems = allItems.slice(i, i + this.batchSize);
|
||||||
|
|
||||||
|
console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const embeddings = await this.generateEmbeddingsBatch(batch);
|
||||||
|
|
||||||
|
embeddings.forEach((embedding, index) => {
|
||||||
|
const item = batchItems[index];
|
||||||
|
this.embeddings.push({
|
||||||
|
id: `${item.type}_${item.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
|
||||||
|
type: item.type,
|
||||||
|
name: item.name,
|
||||||
|
content: batch[index],
|
||||||
|
embedding,
|
||||||
|
metadata: {
|
||||||
|
domains: item.domains,
|
||||||
|
phases: item.phases,
|
||||||
|
tags: item.tags,
|
||||||
|
skillLevel: item.skillLevel,
|
||||||
|
type: item.type
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Rate limiting delay between batches
|
||||||
|
if (i + this.batchSize < contents.length) {
|
||||||
|
await new Promise(resolve => setTimeout(resolve, this.batchDelay));
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.saveEmbeddings(version);
|
||||||
|
}
|
||||||
|
|
||||||
|
public async embedText(text: string): Promise<number[]> {
|
||||||
|
// Re‑use the private batch helper to avoid auth duplication
|
||||||
|
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
|
||||||
|
return embedding;
|
||||||
|
}
|
||||||
|
|
||||||
|
private cosineSimilarity(a: number[], b: number[]): number {
|
||||||
|
let dotProduct = 0;
|
||||||
|
let normA = 0;
|
||||||
|
let normB = 0;
|
||||||
|
|
||||||
|
for (let i = 0; i < a.length; i++) {
|
||||||
|
dotProduct += a[i] * b[i];
|
||||||
|
normA += a[i] * a[i];
|
||||||
|
normB += b[i] * b[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
||||||
|
}
|
||||||
|
|
||||||
|
async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<EmbeddingData[]> {
|
||||||
|
if (!this.enabled || !this.isInitialized || this.embeddings.length === 0) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Generate embedding for query
|
||||||
|
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
|
||||||
|
const queryEmbedding = queryEmbeddings[0];
|
||||||
|
|
||||||
|
// Calculate similarities
|
||||||
|
const similarities = this.embeddings.map(item => ({
|
||||||
|
...item,
|
||||||
|
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Filter by threshold and sort by similarity
|
||||||
|
return similarities
|
||||||
|
.filter(item => item.similarity >= threshold)
|
||||||
|
.sort((a, b) => b.similarity - a.similarity)
|
||||||
|
.slice(0, maxResults);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('[EMBEDDINGS] Failed to find similar items:', error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
isEnabled(): boolean {
|
||||||
|
return this.enabled && this.isInitialized;
|
||||||
|
}
|
||||||
|
|
||||||
|
getStats(): { enabled: boolean; initialized: boolean; count: number } {
|
||||||
|
return {
|
||||||
|
enabled: this.enabled,
|
||||||
|
initialized: this.isInitialized,
|
||||||
|
count: this.embeddings.length
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Global instance
|
||||||
|
const embeddingsService = new EmbeddingsService();
|
||||||
|
|
||||||
|
export { embeddingsService, type EmbeddingData };
|
||||||
|
|
||||||
|
// Auto-initialize on import in server environment
|
||||||
|
if (typeof window === 'undefined' && process.env.NODE_ENV !== 'test') {
|
||||||
|
embeddingsService.initialize().catch(error => {
|
||||||
|
console.error('[EMBEDDINGS] Auto-initialization failed:', error);
|
||||||
|
});
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
// src/utils/rateLimitedQueue.ts
|
// src/utils/rateLimitedQueue.ts - FIXED: Memory leak and better cleanup
|
||||||
|
|
||||||
import dotenv from "dotenv";
|
import dotenv from "dotenv";
|
||||||
|
|
||||||
@ -32,6 +32,43 @@ class RateLimitedQueue {
|
|||||||
private lastProcessedAt = 0;
|
private lastProcessedAt = 0;
|
||||||
private currentlyProcessingTaskId: string | null = null;
|
private currentlyProcessingTaskId: string | null = null;
|
||||||
|
|
||||||
|
private cleanupInterval: NodeJS.Timeout;
|
||||||
|
private readonly TASK_RETENTION_MS = 30000;
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.cleanupInterval = setInterval(() => {
|
||||||
|
this.cleanupOldTasks();
|
||||||
|
}, 30000);
|
||||||
|
}
|
||||||
|
|
||||||
|
private cleanupOldTasks(): void {
|
||||||
|
const now = Date.now();
|
||||||
|
const initialLength = this.tasks.length;
|
||||||
|
|
||||||
|
this.tasks = this.tasks.filter(task => {
|
||||||
|
if (task.status === 'queued' || task.status === 'processing') {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (task.completedAt && (now - task.completedAt) > this.TASK_RETENTION_MS) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
|
||||||
|
const cleaned = initialLength - this.tasks.length;
|
||||||
|
if (cleaned > 0) {
|
||||||
|
console.log(`[QUEUE] Cleaned up ${cleaned} old tasks, ${this.tasks.length} remaining`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public shutdown(): void {
|
||||||
|
if (this.cleanupInterval) {
|
||||||
|
clearInterval(this.cleanupInterval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
add<T>(task: Task<T>, taskId?: string): Promise<T> {
|
add<T>(task: Task<T>, taskId?: string): Promise<T> {
|
||||||
const id = taskId || this.generateTaskId();
|
const id = taskId || this.generateTaskId();
|
||||||
|
|
||||||
@ -103,7 +140,6 @@ class RateLimitedQueue {
|
|||||||
const processingOffset = processingTasks.length > 0 ? 1 : 0;
|
const processingOffset = processingTasks.length > 0 ? 1 : 0;
|
||||||
status.currentPosition = processingOffset + positionInQueue + 1;
|
status.currentPosition = processingOffset + positionInQueue + 1;
|
||||||
}
|
}
|
||||||
} else if (task.status === 'completed' || task.status === 'failed') {
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const taskTimestamp = taskId.match(/ai_(\d+)_/)?.[1];
|
const taskTimestamp = taskId.match(/ai_(\d+)_/)?.[1];
|
||||||
@ -152,7 +188,6 @@ class RateLimitedQueue {
|
|||||||
this.currentlyProcessingTaskId = nextTask.id;
|
this.currentlyProcessingTaskId = nextTask.id;
|
||||||
this.lastProcessedAt = Date.now();
|
this.lastProcessedAt = Date.now();
|
||||||
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await nextTask.task();
|
await nextTask.task();
|
||||||
nextTask.status = 'completed';
|
nextTask.status = 'completed';
|
||||||
@ -166,14 +201,6 @@ class RateLimitedQueue {
|
|||||||
|
|
||||||
this.currentlyProcessingTaskId = null;
|
this.currentlyProcessingTaskId = null;
|
||||||
|
|
||||||
setTimeout(() => {
|
|
||||||
const index = this.tasks.findIndex(t => t.id === nextTask.id);
|
|
||||||
if (index >= 0) {
|
|
||||||
console.log(`[QUEUE] Removing completed task ${nextTask.id}`);
|
|
||||||
this.tasks.splice(index, 1);
|
|
||||||
}
|
|
||||||
}, 10000);
|
|
||||||
|
|
||||||
const hasMoreQueued = this.tasks.some(t => t.status === 'queued');
|
const hasMoreQueued = this.tasks.some(t => t.status === 'queued');
|
||||||
if (hasMoreQueued) {
|
if (hasMoreQueued) {
|
||||||
console.log(`[QUEUE] Waiting ${this.delayMs}ms before next task`);
|
console.log(`[QUEUE] Waiting ${this.delayMs}ms before next task`);
|
||||||
@ -201,4 +228,8 @@ export function getQueueStatus(taskId?: string): QueueStatus {
|
|||||||
return queue.getStatus(taskId);
|
return queue.getStatus(taskId);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function shutdownQueue(): void {
|
||||||
|
queue.shutdown();
|
||||||
|
}
|
||||||
|
|
||||||
export default queue;
|
export default queue;
|
Loading…
x
Reference in New Issue
Block a user