Compare commits
12 Commits
61d94ef956
...
f329955c62
Author | SHA1 | Date | |
---|---|---|---|
f329955c62 | |||
![]() |
d3c4d7ccc4 | ||
![]() |
37edc1549e | ||
![]() |
0e66c6e32f | ||
![]() |
8693cd87d4 | ||
![]() |
1b9d9b437b | ||
![]() |
8c9bdf0710 | ||
![]() |
224f717ba8 | ||
![]() |
680a2c311d | ||
![]() |
c96aa70413 | ||
![]() |
895c476476 | ||
![]() |
78779fc8da |
84
.env.example
84
.env.example
@ -2,34 +2,74 @@
|
||||
# ForensicPathways Environment Configuration
|
||||
# ===========================================
|
||||
|
||||
# Authentication & OIDC (Required)
|
||||
AUTH_SECRET=change-this-to-a-strong-secret-key-in-production
|
||||
# === Authentication Configuration ===
|
||||
AUTHENTICATION_NECESSARY=false
|
||||
AUTHENTICATION_NECESSARY_CONTRIBUTIONS=false
|
||||
AUTHENTICATION_NECESSARY_AI=false
|
||||
AUTH_SECRET=your-secret-key-change-in-production
|
||||
|
||||
# OIDC Configuration (if authentication enabled)
|
||||
OIDC_ENDPOINT=https://your-oidc-provider.com
|
||||
OIDC_CLIENT_ID=your-oidc-client-id
|
||||
OIDC_CLIENT_SECRET=your-oidc-client-secret
|
||||
OIDC_CLIENT_ID=your-client-id
|
||||
OIDC_CLIENT_SECRET=your-client-secret
|
||||
|
||||
# Auth Scopes - set to true in prod
|
||||
AUTHENTICATION_NECESSARY_CONTRIBUTIONS=true
|
||||
AUTHENTICATION_NECESSARY_AI=true
|
||||
# ===================================================================
|
||||
# AI CONFIGURATION - Complete Reference for Improved Pipeline
|
||||
# ===================================================================
|
||||
|
||||
# Application Configuration (Required)
|
||||
PUBLIC_BASE_URL=https://your-domain.com
|
||||
NODE_ENV=production
|
||||
# === CORE AI ENDPOINTS & MODELS ===
|
||||
AI_API_ENDPOINT=https://llm.mikoshi.de
|
||||
AI_API_KEY=sREDACTED3w
|
||||
AI_MODEL='mistral/mistral-small-latest'
|
||||
|
||||
# AI Service Configuration (Required for AI features)
|
||||
AI_MODEL=mistral-large-latest
|
||||
AI_API_ENDPOINT=https://api.mistral.ai
|
||||
AI_API_KEY=your-mistral-api-key
|
||||
AI_RATE_LIMIT_DELAY_MS=1000
|
||||
# === IMPROVED PIPELINE: Use separate analyzer model (mistral-small is fine) ===
|
||||
AI_ANALYZER_ENDPOINT=https://llm.mikoshi.de
|
||||
AI_ANALYZER_API_KEY=skREDACTEDw3w
|
||||
AI_ANALYZER_MODEL='mistral/mistral-small-latest'
|
||||
|
||||
# Git Integration (Required for contributions)
|
||||
GIT_REPO_URL=https://git.cc24.dev/mstoeck3/forensic-pathways
|
||||
GIT_PROVIDER=gitea
|
||||
GIT_API_ENDPOINT=https://git.cc24.dev/api/v1
|
||||
GIT_API_TOKEN=your-git-api-token
|
||||
# === EMBEDDINGS CONFIGURATION ===
|
||||
AI_EMBEDDINGS_ENABLED=true
|
||||
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
|
||||
AI_EMBEDDINGS_API_KEY=ZREDACTED3wL
|
||||
AI_EMBEDDINGS_MODEL=mistral-embed
|
||||
AI_EMBEDDINGS_BATCH_SIZE=20
|
||||
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
|
||||
|
||||
# File Upload Configuration (Optional)
|
||||
LOCAL_UPLOAD_PATH=./public/uploads
|
||||
# === PIPELINE: VectorIndex (HNSW) Configuration ===
|
||||
AI_MAX_SELECTED_ITEMS=60 # Tools visible to each micro-task
|
||||
AI_EMBEDDING_CANDIDATES=60 # VectorIndex candidates (HNSW is more efficient)
|
||||
AI_SIMILARITY_THRESHOLD=0.3 # Not used by VectorIndex (uses cosine distance internally)
|
||||
|
||||
# === MICRO-TASK CONFIGURATION ===
|
||||
AI_MICRO_TASK_DELAY_MS=500 # Delay between micro-tasks
|
||||
AI_MICRO_TASK_TIMEOUT_MS=25000 # Timeout per micro-task (increased for full context)
|
||||
|
||||
# === RATE LIMITING ===
|
||||
AI_RATE_LIMIT_DELAY_MS=3000 # Main rate limit delay
|
||||
AI_RATE_LIMIT_MAX_REQUESTS=6 # Main requests per minute (reduced - fewer but richer calls)
|
||||
AI_MICRO_TASK_RATE_LIMIT=15 # Micro-task requests per minute (was 30)
|
||||
|
||||
# === QUEUE MANAGEMENT ===
|
||||
AI_QUEUE_MAX_SIZE=50
|
||||
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
|
||||
|
||||
# === PERFORMANCE & MONITORING ===
|
||||
AI_MICRO_TASK_DEBUG=true
|
||||
AI_PERFORMANCE_METRICS=true
|
||||
AI_RESPONSE_CACHE_TTL_MS=3600000
|
||||
|
||||
# ===================================================================
|
||||
# LEGACY VARIABLES (still used but less important)
|
||||
# ===================================================================
|
||||
|
||||
# These are still used by other parts of the system:
|
||||
AI_RESPONSE_CACHE_TTL_MS=3600000 # For caching responses
|
||||
AI_QUEUE_MAX_SIZE=50 # Queue management
|
||||
AI_QUEUE_CLEANUP_INTERVAL_MS=300000 # Queue cleanup
|
||||
|
||||
# === Application Configuration ===
|
||||
PUBLIC_BASE_URL=http://localhost:4321
|
||||
NODE_ENV=development
|
||||
|
||||
# Nextcloud Integration (Optional)
|
||||
NEXTCLOUD_ENDPOINT=https://your-nextcloud.com
|
||||
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -85,3 +85,4 @@ temp/
|
||||
.astro/data-store.json
|
||||
.astro/content.d.ts
|
||||
prompt.md
|
||||
data/embeddings.json
|
||||
|
358
RAG-Roadmap.md
Normal file
358
RAG-Roadmap.md
Normal file
@ -0,0 +1,358 @@
|
||||
# Forensic-Grade RAG Implementation Roadmap
|
||||
|
||||
## Context & Current State Analysis
|
||||
|
||||
You have access to a forensic tools recommendation system built with:
|
||||
- **Embeddings-based retrieval** (src/utils/embeddings.ts)
|
||||
- **Multi-stage AI pipeline** (src/utils/aiPipeline.ts)
|
||||
- **Micro-task processing** for detailed analysis
|
||||
- **Rate limiting and queue management** (src/utils/rateLimitedQueue.ts)
|
||||
- **YAML-based tool database** (src/data/tools.yaml)
|
||||
|
||||
**Current Architecture**: Basic RAG (Retrieve → AI Selection → Micro-task Generation)
|
||||
|
||||
**Target Architecture**: Forensic-Grade RAG with transparency, objectivity, and reproducibility
|
||||
|
||||
## Implementation Roadmap
|
||||
|
||||
### PHASE 1: Configuration Externalization & AI Architecture Enhancement (Weeks 1-2)
|
||||
|
||||
#### 1.1 Complete Configuration Externalization
|
||||
**Objective**: Remove all hard-coded values from codebase (except AI prompts)
|
||||
|
||||
**Tasks**:
|
||||
1. **Create comprehensive configuration schema** in `src/config/`
|
||||
- `forensic-scoring.yaml` - All scoring criteria, weights, thresholds
|
||||
- `ai-models.yaml` - AI model configurations and routing
|
||||
- `system-parameters.yaml` - Rate limits, queue settings, processing parameters
|
||||
- `validation-criteria.yaml` - Expert validation rules, bias detection parameters
|
||||
|
||||
2. **Implement configuration loader** (`src/utils/configLoader.ts`)
|
||||
- Hot-reload capability for configuration changes
|
||||
- Environment-specific overrides (dev/staging/prod)
|
||||
- Configuration validation and schema enforcement
|
||||
- Default fallbacks for missing values
|
||||
|
||||
3. **Audit existing codebase** for hard-coded values:
|
||||
- Search for literal numbers, strings, arrays in TypeScript files
|
||||
- Extract to configuration files with meaningful names
|
||||
- Ensure all thresholds (similarity scores, rate limits, token counts) are configurable
|
||||
|
||||
#### 1.2 Dual AI Model Architecture Implementation
|
||||
**Objective**: Implement large + small model strategy for optimal cost/performance
|
||||
|
||||
**Tasks**:
|
||||
1. **Extend environment configuration**:
|
||||
```
|
||||
# Strategic Analysis Model (Large, Few Tokens)
|
||||
AI_STRATEGIC_ENDPOINT=
|
||||
AI_STRATEGIC_API_KEY=
|
||||
AI_STRATEGIC_MODEL=mistral-large-latest
|
||||
AI_STRATEGIC_MAX_TOKENS=500
|
||||
AI_STRATEGIC_CONTEXT_WINDOW=32000
|
||||
|
||||
# Content Generation Model (Small, Many Tokens)
|
||||
AI_CONTENT_ENDPOINT=
|
||||
AI_CONTENT_API_KEY=
|
||||
AI_CONTENT_MODEL=mistral-small-latest
|
||||
AI_CONTENT_MAX_TOKENS=2000
|
||||
AI_CONTENT_CONTEXT_WINDOW=8000
|
||||
```
|
||||
|
||||
2. **Create AI router** (`src/utils/aiRouter.ts`):
|
||||
- Route different task types to appropriate models
|
||||
- **Strategic tasks** → Large model: tool selection, bias analysis, methodology decisions
|
||||
- **Content tasks** → Small model: descriptions, explanations, micro-task outputs
|
||||
- Automatic fallback logic if primary model fails
|
||||
- Usage tracking and cost optimization
|
||||
|
||||
3. **Update aiPipeline.ts**:
|
||||
- Replace single `callAI()` method with task-specific methods
|
||||
- Implement intelligent routing based on task complexity
|
||||
- Add token estimation for optimal model selection
|
||||
|
||||
### PHASE 2: Evidence-Based Scoring Framework (Weeks 3-5)
|
||||
|
||||
#### 2.1 Forensic Scoring Engine Implementation
|
||||
**Objective**: Replace subjective AI selection with objective, measurable criteria
|
||||
|
||||
**Tasks**:
|
||||
1. **Create scoring framework** (`src/scoring/ForensicScorer.ts`):
|
||||
```typescript
|
||||
interface ScoringCriterion {
|
||||
name: string;
|
||||
weight: number;
|
||||
methodology: string;
|
||||
dataSources: string[];
|
||||
calculator: (tool: Tool, scenario: Scenario) => Promise<CriterionScore>;
|
||||
}
|
||||
|
||||
interface CriterionScore {
|
||||
value: number; // 0-100
|
||||
confidence: number; // 0-100
|
||||
evidence: Evidence[];
|
||||
lastUpdated: Date;
|
||||
}
|
||||
```
|
||||
|
||||
2. **Implement core scoring criteria**:
|
||||
- **Court Admissibility Scorer**: Based on legal precedent database
|
||||
- **Scientific Validity Scorer**: Based on peer-reviewed research citations
|
||||
- **Methodology Alignment Scorer**: NIST SP 800-86 compliance assessment
|
||||
- **Expert Consensus Scorer**: Practitioner survey data integration
|
||||
- **Error Rate Scorer**: Known false positive/negative rates
|
||||
|
||||
3. **Build evidence provenance system**:
|
||||
- Track source of every score component
|
||||
- Maintain citation database for all claims
|
||||
- Version control for scoring methodologies
|
||||
- Automatic staleness detection for outdated evidence
|
||||
|
||||
#### 2.2 Deterministic Core Implementation
|
||||
**Objective**: Ensure reproducible results for identical inputs
|
||||
|
||||
**Tasks**:
|
||||
1. **Implement deterministic pipeline** (`src/analysis/DeterministicAnalyzer.ts`):
|
||||
- Rule-based scenario classification (SCADA/Mobile/Network/etc.)
|
||||
- Mathematical scoring combination (weighted averages, not AI decisions)
|
||||
- Consistent tool ranking algorithms
|
||||
- Reproducibility validation tests
|
||||
|
||||
2. **Add AI enhancement layer**:
|
||||
- AI provides explanations, NOT decisions
|
||||
- AI generates workflow descriptions based on deterministic selections
|
||||
- AI creates contextual advice around objective tool choices
|
||||
|
||||
### PHASE 3: Transparency & Audit Trail System (Weeks 4-6)
|
||||
|
||||
#### 3.1 Complete Audit Trail Implementation
|
||||
**Objective**: Track every decision with forensic-grade documentation
|
||||
|
||||
**Tasks**:
|
||||
1. **Create audit framework** (`src/audit/AuditTrail.ts`):
|
||||
```typescript
|
||||
interface ForensicAuditTrail {
|
||||
queryId: string;
|
||||
userQuery: string;
|
||||
processingSteps: AuditStep[];
|
||||
finalRecommendation: RecommendationWithEvidence;
|
||||
reproducibilityHash: string;
|
||||
validationStatus: ValidationStatus;
|
||||
}
|
||||
|
||||
interface AuditStep {
|
||||
stepName: string;
|
||||
input: any;
|
||||
methodology: string;
|
||||
output: any;
|
||||
evidence: Evidence[];
|
||||
confidence: number;
|
||||
processingTime: number;
|
||||
modelUsed?: string;
|
||||
}
|
||||
```
|
||||
|
||||
2. **Implement evidence citation system**:
|
||||
- Automatic citation generation for all claims
|
||||
- Link to source standards (NIST, ISO, RFC)
|
||||
- Reference scientific papers for methodology choices
|
||||
- Track expert validation contributors
|
||||
|
||||
3. **Build explanation generator**:
|
||||
- Human-readable reasoning for every recommendation
|
||||
- "Why this tool" and "Why not alternatives" explanations
|
||||
- Confidence level communication
|
||||
- Uncertainty quantification
|
||||
|
||||
#### 3.2 Bias Detection & Mitigation System
|
||||
**Objective**: Actively detect and correct recommendation biases
|
||||
|
||||
**Tasks**:
|
||||
1. **Implement bias detection** (`src/bias/BiasDetector.ts`):
|
||||
- **Popularity bias**: Over-recommendation of well-known tools
|
||||
- **Availability bias**: Preference for easily accessible tools
|
||||
- **Recency bias**: Over-weighting of newest tools
|
||||
- **Cultural bias**: Platform or methodology preferences
|
||||
|
||||
2. **Create mitigation strategies**:
|
||||
- Automatic bias adjustment algorithms
|
||||
- Diversity requirements for recommendations
|
||||
- Fairness metrics across tool categories
|
||||
- Bias reporting in audit trails
|
||||
|
||||
### PHASE 4: Expert Validation & Learning System (Weeks 6-8)
|
||||
|
||||
#### 4.1 Expert Review Integration
|
||||
**Objective**: Enable forensic experts to validate and improve recommendations
|
||||
|
||||
**Tasks**:
|
||||
1. **Build expert validation interface** (`src/validation/ExpertReview.ts`):
|
||||
- Structured feedback collection from forensic practitioners
|
||||
- Agreement/disagreement tracking with detailed reasoning
|
||||
- Expert consensus building over time
|
||||
- Minority opinion preservation
|
||||
|
||||
2. **Implement validation loop**:
|
||||
- Flag recommendations requiring expert review
|
||||
- Track expert validation rates and patterns
|
||||
- Update scoring based on real-world feedback
|
||||
- Methodology improvement based on expert input
|
||||
|
||||
#### 4.2 Real-World Case Learning
|
||||
**Objective**: Learn from actual forensic investigations
|
||||
|
||||
**Tasks**:
|
||||
1. **Create case study integration** (`src/learning/CaseStudyLearner.ts`):
|
||||
- Anonymous case outcome tracking
|
||||
- Tool effectiveness measurement in real scenarios
|
||||
- Methodology success/failure analysis
|
||||
- Continuous improvement based on field results
|
||||
|
||||
2. **Implement feedback loops**:
|
||||
- Post-case recommendation validation
|
||||
- Tool performance tracking in actual investigations
|
||||
- Methodology refinement based on outcomes
|
||||
- Success rate improvement over time
|
||||
|
||||
### PHASE 5: Advanced Features & Scientific Rigor (Weeks 7-10)
|
||||
|
||||
#### 5.1 Confidence & Uncertainty Quantification
|
||||
**Objective**: Provide scientific confidence levels for all recommendations
|
||||
|
||||
**Tasks**:
|
||||
1. **Implement uncertainty quantification** (`src/uncertainty/ConfidenceCalculator.ts`):
|
||||
- Statistical confidence intervals for scores
|
||||
- Uncertainty propagation through scoring pipeline
|
||||
- Risk assessment for recommendation reliability
|
||||
- Alternative recommendation ranking
|
||||
|
||||
2. **Add fallback recommendation system**:
|
||||
- Multiple ranked alternatives for each recommendation
|
||||
- Contingency planning for tool failures
|
||||
- Risk-based recommendation portfolios
|
||||
- Sensitivity analysis for critical decisions
|
||||
|
||||
#### 5.2 Reproducibility Testing Framework
|
||||
**Objective**: Ensure consistent results across time and implementations
|
||||
|
||||
**Tasks**:
|
||||
1. **Build reproducibility testing** (`src/testing/ReproducibilityTester.ts`):
|
||||
- Automated consistency validation
|
||||
- Inter-rater reliability testing
|
||||
- Cross-temporal stability analysis
|
||||
- Version control for methodology changes
|
||||
|
||||
2. **Implement quality assurance**:
|
||||
- Continuous integration for reproducibility
|
||||
- Regression testing for methodology changes
|
||||
- Performance monitoring for consistency
|
||||
- Alert system for unexpected variations
|
||||
|
||||
### PHASE 6: Integration & Production Readiness (Weeks 9-12)
|
||||
|
||||
#### 6.1 System Integration
|
||||
**Objective**: Integrate all forensic-grade components seamlessly
|
||||
|
||||
**Tasks**:
|
||||
1. **Update existing components**:
|
||||
- Modify `aiPipeline.ts` to use new scoring framework
|
||||
- Update `embeddings.ts` with evidence tracking
|
||||
- Enhance `rateLimitedQueue.ts` with audit capabilities
|
||||
- Refactor `query.ts` API to return audit trails
|
||||
|
||||
2. **Performance optimization**:
|
||||
- Caching strategies for expensive evidence lookups
|
||||
- Parallel processing for scoring criteria
|
||||
- Efficient storage for audit trails
|
||||
- Load balancing for dual AI models
|
||||
|
||||
#### 6.2 Production Features
|
||||
**Objective**: Make system ready for professional forensic use
|
||||
|
||||
**Tasks**:
|
||||
1. **Add professional features**:
|
||||
- Export recommendations to forensic report formats
|
||||
- Integration with existing forensic workflows
|
||||
- Batch processing for multiple scenarios
|
||||
- API endpoints for external tool integration
|
||||
|
||||
2. **Implement monitoring & maintenance**:
|
||||
- Health checks for all system components
|
||||
- Performance monitoring for response times
|
||||
- Error tracking and alerting
|
||||
- Automatic system updates for new evidence
|
||||
|
||||
## Technical Implementation Guidelines
|
||||
|
||||
### Configuration Management
|
||||
- Use YAML files for human-readable configuration
|
||||
- Implement JSON Schema validation for all config files
|
||||
- Support environment variable overrides
|
||||
- Hot-reload for development, restart for production changes
|
||||
|
||||
### AI Model Routing Strategy
|
||||
```typescript
|
||||
// Task Classification for Model Selection
|
||||
const AI_TASK_ROUTING = {
|
||||
strategic: ['tool-selection', 'bias-analysis', 'methodology-decisions'],
|
||||
content: ['descriptions', 'explanations', 'micro-tasks', 'workflows']
|
||||
};
|
||||
|
||||
// Cost Optimization Logic
|
||||
if (taskComplexity === 'high' && responseTokens < 500) {
|
||||
useModel = 'large';
|
||||
} else if (taskComplexity === 'low' && responseTokens > 1000) {
|
||||
useModel = 'small';
|
||||
} else {
|
||||
useModel = config.defaultModel;
|
||||
}
|
||||
```
|
||||
|
||||
### Evidence Database Structure
|
||||
```typescript
|
||||
interface EvidenceSource {
|
||||
type: 'standard' | 'paper' | 'case-law' | 'expert-survey';
|
||||
citation: string;
|
||||
reliability: number;
|
||||
lastValidated: Date;
|
||||
content: string;
|
||||
metadata: Record<string, any>;
|
||||
}
|
||||
```
|
||||
|
||||
### Quality Assurance Requirements
|
||||
- All scoring criteria must have documented methodologies
|
||||
- Every recommendation must include confidence levels
|
||||
- All AI-generated content must be marked as such
|
||||
- Reproducibility tests must pass with >95% consistency
|
||||
- Expert validation rate must exceed 80% for production use
|
||||
|
||||
## Success Metrics
|
||||
|
||||
### Forensic Quality Metrics
|
||||
- **Transparency**: 100% of decisions traceable to evidence
|
||||
- **Objectivity**: <5% variance in scoring between runs
|
||||
- **Reproducibility**: >95% identical results for identical inputs
|
||||
- **Expert Agreement**: >80% expert validation rate
|
||||
- **Bias Reduction**: <10% bias score across all categories
|
||||
|
||||
### Performance Metrics
|
||||
- **Response Time**: <30 seconds for workflow recommendations
|
||||
- **Accuracy**: >90% real-world case validation success
|
||||
- **Coverage**: Support for >95% of common forensic scenarios
|
||||
- **Reliability**: <1% system error rate
|
||||
- **Cost Efficiency**: <50% cost reduction vs. single large model
|
||||
|
||||
## Risk Mitigation
|
||||
|
||||
### Technical Risks
|
||||
- **AI Model Failures**: Implement robust fallback mechanisms
|
||||
- **Configuration Errors**: Comprehensive validation and testing
|
||||
- **Performance Issues**: Load testing and optimization
|
||||
- **Data Corruption**: Backup and recovery procedures
|
||||
|
||||
### Forensic Risks
|
||||
- **Bias Introduction**: Continuous monitoring and expert validation
|
||||
- **Methodology Errors**: Peer review and scientific validation
|
||||
- **Legal Challenges**: Ensure compliance with admissibility standards
|
||||
- **Expert Disagreement**: Transparent uncertainty communication
|
File diff suppressed because it is too large
Load Diff
@ -91,119 +91,137 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Advanced Filters Section -->
|
||||
<!-- Advanced Filters Section - COLLAPSIBLE -->
|
||||
<div class="filter-section">
|
||||
<div class="filter-card-compact">
|
||||
<div class="filter-header-compact">
|
||||
<h3>⚙️ Erweiterte Filter</h3>
|
||||
<button class="filter-reset" id="reset-advanced" title="Erweiterte Filter zurücksetzen">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<polyline points="1 4 1 10 7 10"/>
|
||||
<path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
|
||||
</svg>
|
||||
</button>
|
||||
<div class="filter-header-controls">
|
||||
<button class="filter-reset" id="reset-advanced" title="Erweiterte Filter zurücksetzen">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<polyline points="1 4 1 10 7 10"/>
|
||||
<path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
|
||||
</svg>
|
||||
</button>
|
||||
<button class="collapse-toggle" id="toggle-advanced" data-collapsed="true" title="Erweiterte Filter ein/ausblenden">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<polyline points="6 9 12 15 18 9"></polyline>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="advanced-filters-compact">
|
||||
<div class="filter-grid-compact">
|
||||
<div class="filter-group">
|
||||
<label class="filter-label">Tool-Typ</label>
|
||||
<select id="type-select" class="filter-select">
|
||||
<option value="">Alle Typen</option>
|
||||
{toolTypes.map((type: string) => (
|
||||
<option value={type}>{type}</option>
|
||||
))}
|
||||
</select>
|
||||
<div class="collapsible-content hidden" id="advanced-filters-content">
|
||||
<div class="advanced-filters-compact">
|
||||
<div class="filter-grid-compact">
|
||||
<div class="filter-group">
|
||||
<label class="filter-label">Tool-Typ</label>
|
||||
<select id="type-select" class="filter-select">
|
||||
<option value="">Alle Typen</option>
|
||||
{toolTypes.map((type: string) => (
|
||||
<option value={type}>{type}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="filter-group">
|
||||
<label class="filter-label">Skill Level</label>
|
||||
<select id="skill-select" class="filter-select">
|
||||
<option value="">Alle Level</option>
|
||||
{skillLevels.map((level: string) => (
|
||||
<option value={level}>{level}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="filter-group">
|
||||
<label class="filter-label">Plattform</label>
|
||||
<select id="platform-select" class="filter-select">
|
||||
<option value="">Alle Plattformen</option>
|
||||
{platforms.map((platform: string) => (
|
||||
<option value={platform}>{platform}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="filter-group">
|
||||
<label class="filter-label">Lizenztyp</label>
|
||||
<select id="license-select" class="filter-select">
|
||||
<option value="">Alle Lizenzen</option>
|
||||
{licenses.map((license: string) => (
|
||||
<option value={license}>{license}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="filter-group">
|
||||
<label class="filter-label">Zugangsart</label>
|
||||
<select id="access-select" class="filter-select">
|
||||
<option value="">Alle Zugangsarten</option>
|
||||
{accessTypes.map((access: string) => (
|
||||
<option value={access}>{access}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="filter-group">
|
||||
<label class="filter-label">Skill Level</label>
|
||||
<select id="skill-select" class="filter-select">
|
||||
<option value="">Alle Level</option>
|
||||
{skillLevels.map((level: string) => (
|
||||
<option value={level}>{level}</option>
|
||||
))}
|
||||
</select>
|
||||
<div class="filter-toggles-compact">
|
||||
<label class="toggle-wrapper">
|
||||
<input type="checkbox" id="hosted-only" />
|
||||
<span class="toggle-label">🟣 Nur CC24-Server Tools</span>
|
||||
</label>
|
||||
|
||||
<label class="toggle-wrapper">
|
||||
<input type="checkbox" id="knowledgebase-only" />
|
||||
<span class="toggle-label">📖 Nur Tools mit Knowledgebase</span>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div class="filter-group">
|
||||
<label class="filter-label">Plattform</label>
|
||||
<select id="platform-select" class="filter-select">
|
||||
<option value="">Alle Plattformen</option>
|
||||
{platforms.map((platform: string) => (
|
||||
<option value={platform}>{platform}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="filter-group">
|
||||
<label class="filter-label">Lizenztyp</label>
|
||||
<select id="license-select" class="filter-select">
|
||||
<option value="">Alle Lizenzen</option>
|
||||
{licenses.map((license: string) => (
|
||||
<option value={license}>{license}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div class="filter-group">
|
||||
<label class="filter-label">Zugangsart</label>
|
||||
<select id="access-select" class="filter-select">
|
||||
<option value="">Alle Zugangsarten</option>
|
||||
{accessTypes.map((access: string) => (
|
||||
<option value={access}>{access}</option>
|
||||
))}
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="filter-toggles-compact">
|
||||
<label class="toggle-wrapper">
|
||||
<input type="checkbox" id="hosted-only" />
|
||||
<span class="toggle-label">🟣 Nur CC24-Server Tools</span>
|
||||
</label>
|
||||
|
||||
<label class="toggle-wrapper">
|
||||
<input type="checkbox" id="knowledgebase-only" />
|
||||
<span class="toggle-label">📖 Nur Tools mit Knowledgebase</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Tag Filters Section -->
|
||||
<!-- Tag Filters Section - COLLAPSIBLE -->
|
||||
<div class="filter-section">
|
||||
<div class="filter-card-compact">
|
||||
<div class="filter-header-compact">
|
||||
<h3>🏷️ Tag-Filter</h3>
|
||||
<div class="tag-controls">
|
||||
<div class="filter-header-controls">
|
||||
<button class="filter-reset" id="reset-tags" title="Tags zurücksetzen">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<polyline points="1 4 1 10 7 10"/>
|
||||
<path d="M3.51 15a9 9 0 1 0 2.13-9.36L1 10"/>
|
||||
</svg>
|
||||
</button>
|
||||
<button id="tag-cloud-toggle" class="tag-toggle" data-expanded="false">
|
||||
Mehr zeigen
|
||||
<button class="collapse-toggle" id="toggle-tags" data-collapsed="true" title="Tag-Filter ein/ausblenden">
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<polyline points="6 9 12 15 18 9"></polyline>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="tag-section">
|
||||
<div class="selected-tags" id="selected-tags"></div>
|
||||
<div class="tag-cloud" id="tag-cloud">
|
||||
{sortedTags.map((tag, index) => (
|
||||
<button
|
||||
class="tag-cloud-item"
|
||||
data-tag={tag}
|
||||
data-frequency={tagFrequency[tag]}
|
||||
data-index={index}
|
||||
>
|
||||
{tag}
|
||||
<span class="tag-frequency">({tagFrequency[tag]})</span>
|
||||
<div class="collapsible-content hidden" id="tag-filters-content">
|
||||
<div class="tag-section">
|
||||
<div class="selected-tags" id="selected-tags"></div>
|
||||
<div class="tag-controls">
|
||||
<button id="tag-cloud-toggle" class="tag-toggle" data-expanded="false">
|
||||
Mehr zeigen
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
<div class="tag-cloud" id="tag-cloud">
|
||||
{sortedTags.map((tag, index) => (
|
||||
<button
|
||||
class="tag-cloud-item"
|
||||
data-tag={tag}
|
||||
data-frequency={tagFrequency[tag]}
|
||||
data-index={index}
|
||||
>
|
||||
{tag}
|
||||
<span class="tag-frequency">({tagFrequency[tag]})</span>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -293,7 +311,12 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
advanced: document.getElementById('reset-advanced'),
|
||||
tags: document.getElementById('reset-tags'),
|
||||
all: document.getElementById('reset-all-filters')
|
||||
}
|
||||
},
|
||||
// Collapsible elements
|
||||
toggleAdvanced: document.getElementById('toggle-advanced'),
|
||||
toggleTags: document.getElementById('toggle-tags'),
|
||||
advancedContent: document.getElementById('advanced-filters-content'),
|
||||
tagContent: document.getElementById('tag-filters-content')
|
||||
};
|
||||
|
||||
// Verify critical elements exist
|
||||
@ -307,6 +330,52 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
let selectedPhase = '';
|
||||
let isTagCloudExpanded = false;
|
||||
|
||||
// Collapsible functionality
|
||||
function toggleCollapsible(toggleBtn, content, storageKey) {
|
||||
const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true';
|
||||
const newState = !isCollapsed;
|
||||
|
||||
toggleBtn.setAttribute('data-collapsed', newState.toString());
|
||||
|
||||
if (newState) {
|
||||
// Collapse
|
||||
content.classList.add('hidden');
|
||||
toggleBtn.style.transform = 'rotate(0deg)';
|
||||
} else {
|
||||
// Expand
|
||||
content.classList.remove('hidden');
|
||||
toggleBtn.style.transform = 'rotate(180deg)';
|
||||
}
|
||||
|
||||
// Store state in sessionStorage
|
||||
sessionStorage.setItem(storageKey, newState.toString());
|
||||
}
|
||||
|
||||
// Initialize collapsible sections (collapsed by default)
|
||||
function initializeCollapsible() {
|
||||
// Advanced filters
|
||||
const advancedCollapsed = sessionStorage.getItem('advanced-collapsed') !== 'false';
|
||||
elements.toggleAdvanced.setAttribute('data-collapsed', advancedCollapsed.toString());
|
||||
if (advancedCollapsed) {
|
||||
elements.advancedContent.classList.add('hidden');
|
||||
elements.toggleAdvanced.style.transform = 'rotate(0deg)';
|
||||
} else {
|
||||
elements.advancedContent.classList.remove('hidden');
|
||||
elements.toggleAdvanced.style.transform = 'rotate(180deg)';
|
||||
}
|
||||
|
||||
// Tag filters
|
||||
const tagsCollapsed = sessionStorage.getItem('tags-collapsed') !== 'false';
|
||||
elements.toggleTags.setAttribute('data-collapsed', tagsCollapsed.toString());
|
||||
if (tagsCollapsed) {
|
||||
elements.tagContent.classList.add('hidden');
|
||||
elements.toggleTags.style.transform = 'rotate(0deg)';
|
||||
} else {
|
||||
elements.tagContent.classList.remove('hidden');
|
||||
elements.toggleTags.style.transform = 'rotate(180deg)';
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to check if tool is hosted
|
||||
function isToolHosted(tool) {
|
||||
return tool.projectUrl !== undefined &&
|
||||
@ -418,18 +487,23 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
});
|
||||
}
|
||||
|
||||
// Add/remove tags
|
||||
// Add/remove tags - FIXED: Update ALL matching elements
|
||||
function addTag(tag) {
|
||||
selectedTags.add(tag);
|
||||
document.querySelector(`[data-tag="${tag}"]`).classList.add('active');
|
||||
// FIXED: Use querySelectorAll to update ALL matching tag elements
|
||||
document.querySelectorAll(`[data-tag="${tag}"]`).forEach(element => {
|
||||
element.classList.add('active');
|
||||
});
|
||||
updateSelectedTags();
|
||||
filterTools();
|
||||
}
|
||||
|
||||
function removeTag(tag) {
|
||||
selectedTags.delete(tag);
|
||||
const tagElement = document.querySelector(`[data-tag="${tag}"]`);
|
||||
if (tagElement) tagElement.classList.remove('active');
|
||||
// FIXED: Use querySelectorAll to update ALL matching tag elements
|
||||
document.querySelectorAll(`[data-tag="${tag}"]`).forEach(element => {
|
||||
element.classList.remove('active');
|
||||
});
|
||||
updateSelectedTags();
|
||||
filterTools();
|
||||
}
|
||||
@ -553,7 +627,10 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
|
||||
function resetTags() {
|
||||
selectedTags.clear();
|
||||
elements.tagCloudItems.forEach(item => item.classList.remove('active'));
|
||||
// FIXED: Update ALL tag elements
|
||||
document.querySelectorAll('.tag-cloud-item').forEach(item => {
|
||||
item.classList.remove('active');
|
||||
});
|
||||
updateSelectedTags();
|
||||
filterTools();
|
||||
}
|
||||
@ -630,11 +707,21 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
elements.resetButtons.tags.addEventListener('click', resetTags);
|
||||
elements.resetButtons.all.addEventListener('click', resetAllFilters);
|
||||
|
||||
// Collapsible toggle listeners
|
||||
elements.toggleAdvanced.addEventListener('click', () => {
|
||||
toggleCollapsible(elements.toggleAdvanced, elements.advancedContent, 'advanced-collapsed');
|
||||
});
|
||||
|
||||
elements.toggleTags.addEventListener('click', () => {
|
||||
toggleCollapsible(elements.toggleTags, elements.tagContent, 'tags-collapsed');
|
||||
});
|
||||
|
||||
// Expose functions globally for backwards compatibility
|
||||
window.clearTagFilters = resetTags;
|
||||
window.clearAllFilters = resetAllFilters;
|
||||
|
||||
// Initialize
|
||||
initializeCollapsible();
|
||||
initTagCloud();
|
||||
filterTagCloud();
|
||||
updateSelectedTags();
|
||||
|
22
src/pages/api/ai/embeddings.status.ts
Normal file
22
src/pages/api/ai/embeddings.status.ts
Normal file
@ -0,0 +1,22 @@
|
||||
// src/pages/api/ai/embeddings-status.ts
|
||||
import type { APIRoute } from 'astro';
|
||||
import { embeddingsService } from '../../../utils/embeddings.js';
|
||||
import { apiResponse, apiServerError } from '../../../utils/api.js';
|
||||
|
||||
export const prerender = false;
|
||||
|
||||
export const GET: APIRoute = async () => {
|
||||
try {
|
||||
const stats = embeddingsService.getStats();
|
||||
|
||||
return apiResponse.success({
|
||||
embeddings: stats,
|
||||
timestamp: new Date().toISOString(),
|
||||
status: stats.enabled && stats.initialized ? 'ready' :
|
||||
stats.enabled && !stats.initialized ? 'initializing' : 'disabled'
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Embeddings status error:', error);
|
||||
return apiServerError.internal('Failed to get embeddings status');
|
||||
}
|
||||
};
|
@ -1,4 +1,4 @@
|
||||
// src/pages/api/ai/enhance-input.ts
|
||||
// src/pages/api/ai/enhance-input.ts - ENHANCED with forensics methodology
|
||||
import type { APIRoute } from 'astro';
|
||||
import { withAPIAuth } from '../../../utils/auth.js';
|
||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||
@ -14,7 +14,11 @@ function getEnv(key: string): string {
|
||||
return value;
|
||||
}
|
||||
|
||||
const AI_MODEL = getEnv('AI_MODEL');
|
||||
// Use the analyzer AI for smart prompting (smaller, faster model)
|
||||
const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
|
||||
const AI_API_KEY = getEnv('AI_ANALYZER_API_KEY');
|
||||
const AI_MODEL = getEnv('AI_ANALYZER_MODEL');
|
||||
|
||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
||||
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
|
||||
const RATE_LIMIT_MAX = 5; // 5 enhancement requests per minute per user
|
||||
@ -59,29 +63,38 @@ function cleanupExpiredRateLimits() {
|
||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||
|
||||
function createEnhancementPrompt(input: string): string {
|
||||
return `
|
||||
Du bist eine KI für digitale Forensik. Der Nutzer beschreibt ein forensisches Szenario. Analysiere die Eingabe.
|
||||
return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein forensisches Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte forensische Untersuchung.
|
||||
|
||||
Wenn die Beschreibung unvollständig oder vage ist, stelle bis zu drei präzise Rückfragen im JSON-Array-Format, um wichtige Details zu klären (z. B. Vorfalltyp, System, Ziel, Datenquellen, Zeit, Beteiligte, rechtlicher Rahmen).
|
||||
ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
|
||||
1. **Incident Context**: Was ist passiert? Welche Angriffsvektoren oder technischen Probleme liegen vor?
|
||||
2. **Affected Systems**: Welche spezifischen Technologien/Plattformen sind betroffen? (Windows/Linux/ICS/SCADA/Mobile/Cloud/Network Infrastructure)
|
||||
3. **Available Evidence**: Welche forensischen Datenquellen stehen zur Verfügung? (RAM-Dumps, Disk-Images, Log-Files, Network-Captures, Registry-Hives)
|
||||
4. **Investigation Objectives**: Was soll erreicht werden? (IOC-Extraktion, Timeline-Rekonstruktion, Attribution, Impact-Assessment)
|
||||
5. **Timeline Constraints**: Wie zeitkritisch ist die Untersuchung?
|
||||
6. **Legal & Compliance**: Rechtliche Anforderungen, Chain of Custody, Compliance-Rahmen (DSGVO, sector-specific regulations)
|
||||
7. **Technical Constraints**: Verfügbare Ressourcen, Skills, Infrastrukturbeschränkungen
|
||||
|
||||
Wenn die Eingabe bereits klar, spezifisch und vollständig ist, gib stattdessen nur eine leere Liste [] zurück.
|
||||
WENN die Beschreibung alle kritischen forensischen Aspekte abdeckt: Geben Sie eine leere Liste [] zurück.
|
||||
|
||||
Antwortformat strikt:
|
||||
WENN wichtige forensische Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte forensische Analyse schließen.
|
||||
|
||||
\`\`\`json
|
||||
QUALITÄTSKRITERIEN FÜR FRAGEN:
|
||||
- Forensisch spezifisch, nicht allgemein (❌ "Mehr Details?" ✅ "Welche forensischen Artefakte (RAM-Dumps, Disk-Images, Logs) stehen zur Verfügung?")
|
||||
- Methodisch relevant (❌ "Wann passierte das?" ✅ "Liegen Log-Dateien aus dem Incident-Zeitraum vor, und welche Retention-Policy gilt?")
|
||||
- Priorisiert nach Auswirkung auf die forensische Untersuchungsqualität
|
||||
|
||||
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
|
||||
[
|
||||
"Frage 1?",
|
||||
"Frage 2?",
|
||||
"Frage 3?"
|
||||
"Forensisch spezifische Frage 1?",
|
||||
"Forensisch spezifische Frage 2?",
|
||||
"Forensisch spezifische Frage 3?"
|
||||
]
|
||||
\`\`\`
|
||||
|
||||
Nutzer-Eingabe:
|
||||
NUTZER-EINGABE:
|
||||
${input}
|
||||
`.trim();
|
||||
}
|
||||
|
||||
|
||||
export const POST: APIRoute = async ({ request }) => {
|
||||
try {
|
||||
const authResult = await withAPIAuth(request, 'ai');
|
||||
@ -98,12 +111,12 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
const body = await request.json();
|
||||
const { input } = body;
|
||||
|
||||
if (!input || typeof input !== 'string' || input.length < 20) {
|
||||
return apiError.badRequest('Input too short for enhancement');
|
||||
if (!input || typeof input !== 'string' || input.length < 40) {
|
||||
return apiError.badRequest('Input too short for enhancement (minimum 40 characters)');
|
||||
}
|
||||
|
||||
const sanitizedInput = sanitizeInput(input);
|
||||
if (sanitizedInput.length < 20) {
|
||||
if (sanitizedInput.length < 40) {
|
||||
return apiError.badRequest('Input too short after sanitization');
|
||||
}
|
||||
|
||||
@ -111,11 +124,11 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
|
||||
|
||||
const aiResponse = await enqueueApiCall(() =>
|
||||
fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
|
||||
fetch(`${AI_ENDPOINT}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${process.env.AI_API_KEY}`
|
||||
'Authorization': `Bearer ${AI_API_KEY}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: AI_MODEL,
|
||||
@ -125,8 +138,12 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
content: systemPrompt
|
||||
}
|
||||
],
|
||||
max_tokens: 200,
|
||||
temperature: 0.7
|
||||
max_tokens: 300,
|
||||
temperature: 0.7,
|
||||
// Enhanced: Better parameters for consistent forensics output
|
||||
top_p: 0.9,
|
||||
frequency_penalty: 0.2,
|
||||
presence_penalty: 0.1
|
||||
})
|
||||
}), taskId);
|
||||
|
||||
@ -144,36 +161,47 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
|
||||
let questions;
|
||||
try {
|
||||
const cleanedContent = aiContent
|
||||
const cleanedContent = aiContent
|
||||
.replace(/^```json\s*/i, '')
|
||||
.replace(/\s*```\s*$/, '')
|
||||
.trim();
|
||||
questions = JSON.parse(cleanedContent);
|
||||
questions = JSON.parse(cleanedContent);
|
||||
|
||||
if (!Array.isArray(questions) || questions.length === 0) {
|
||||
throw new Error('Invalid questions format');
|
||||
if (!Array.isArray(questions)) {
|
||||
throw new Error('Response is not an array');
|
||||
}
|
||||
|
||||
// Validate and clean questions
|
||||
// Enhanced validation and cleaning for forensics context
|
||||
questions = questions
|
||||
.filter(q => typeof q === 'string' && q.length > 5 && q.length < 120)
|
||||
.slice(0, 3);
|
||||
.filter(q => typeof q === 'string' && q.length > 20 && q.length < 200) // More appropriate length for forensics questions
|
||||
.filter(q => q.includes('?')) // Must be a question
|
||||
.filter(q => {
|
||||
// Enhanced: Filter for forensics-relevant questions
|
||||
const forensicsTerms = ['forensisch', 'log', 'dump', 'image', 'artefakt', 'evidence', 'incident', 'system', 'netzwerk', 'zeitraum', 'verfügbar'];
|
||||
const lowerQ = q.toLowerCase();
|
||||
return forensicsTerms.some(term => lowerQ.includes(term));
|
||||
})
|
||||
.map(q => q.trim())
|
||||
.slice(0, 3); // Max 3 questions
|
||||
|
||||
// If no valid forensics questions, return empty array (means input is complete)
|
||||
if (questions.length === 0) {
|
||||
throw new Error('No valid questions found');
|
||||
questions = [];
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('Failed to parse enhancement response:', aiContent);
|
||||
return apiServerError.unavailable('Invalid enhancement response format');
|
||||
// If parsing fails, assume input is complete enough
|
||||
questions = [];
|
||||
}
|
||||
|
||||
console.log(`[AI Enhancement] User: ${userId}, Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
||||
console.log(`[AI Enhancement] User: ${userId}, Forensics Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
questions,
|
||||
taskId
|
||||
taskId,
|
||||
inputComplete: questions.length === 0 // Flag to indicate if input seems complete
|
||||
}), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
|
@ -1,275 +1,105 @@
|
||||
// src/pages/api/ai/query.ts
|
||||
// src/pages/api/ai/query.ts - FIXED: Rate limiting for micro-task pipeline
|
||||
|
||||
import type { APIRoute } from 'astro';
|
||||
import { withAPIAuth } from '../../../utils/auth.js';
|
||||
import { getCompressedToolsDataForAI } from '../../../utils/dataService.js';
|
||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
||||
import { aiPipeline } from '../../../utils/aiPipeline.js';
|
||||
|
||||
export const prerender = false;
|
||||
|
||||
function getEnv(key: string): string {
|
||||
const value = process.env[key];
|
||||
if (!value) {
|
||||
throw new Error(`Missing environment variable: ${key}`);
|
||||
}
|
||||
return value;
|
||||
interface RateLimitData {
|
||||
count: number;
|
||||
resetTime: number;
|
||||
microTaskCount: number;
|
||||
}
|
||||
|
||||
const AI_MODEL = getEnv('AI_MODEL');
|
||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
||||
const RATE_LIMIT_WINDOW = 60 * 1000;
|
||||
const RATE_LIMIT_MAX = 10;
|
||||
const rateLimitStore = new Map<string, RateLimitData>();
|
||||
|
||||
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
|
||||
const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '4', 10);
|
||||
const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10);
|
||||
|
||||
function sanitizeInput(input: string): string {
|
||||
let sanitized = input
|
||||
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]') // Remove code blocks
|
||||
.replace(/\<\/?[^>]+(>|$)/g, '') // Remove HTML tags
|
||||
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
|
||||
.replace(/\<\/?[^>]+(>|$)/g, '')
|
||||
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
||||
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
||||
.trim();
|
||||
|
||||
sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
function stripMarkdownJson(content: string): string {
|
||||
return content
|
||||
.replace(/^```json\s*/i, '')
|
||||
.replace(/\s*```\s*$/, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function checkRateLimit(userId: string): boolean {
|
||||
function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
|
||||
const now = Date.now();
|
||||
const userLimit = rateLimitStore.get(userId);
|
||||
|
||||
if (!userLimit || now > userLimit.resetTime) {
|
||||
rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
|
||||
return true;
|
||||
rateLimitStore.set(userId, {
|
||||
count: 1,
|
||||
resetTime: now + RATE_LIMIT_WINDOW,
|
||||
microTaskCount: 0
|
||||
});
|
||||
return {
|
||||
allowed: true,
|
||||
microTasksRemaining: MICRO_TASK_TOTAL_LIMIT
|
||||
};
|
||||
}
|
||||
|
||||
if (userLimit.count >= RATE_LIMIT_MAX) {
|
||||
return false;
|
||||
if (userLimit.count >= MAIN_RATE_LIMIT_MAX) {
|
||||
return {
|
||||
allowed: false,
|
||||
reason: `Main rate limit exceeded. Max ${MAIN_RATE_LIMIT_MAX} requests per minute.`
|
||||
};
|
||||
}
|
||||
|
||||
if (userLimit.microTaskCount >= MICRO_TASK_TOTAL_LIMIT) {
|
||||
return {
|
||||
allowed: false,
|
||||
reason: `Micro-task limit exceeded. Max ${MICRO_TASK_TOTAL_LIMIT} AI calls per minute.`
|
||||
};
|
||||
}
|
||||
|
||||
userLimit.count++;
|
||||
return true;
|
||||
|
||||
return {
|
||||
allowed: true,
|
||||
microTasksRemaining: MICRO_TASK_TOTAL_LIMIT - userLimit.microTaskCount
|
||||
};
|
||||
}
|
||||
|
||||
function incrementMicroTaskCount(userId: string, aiCallsMade: number): void {
|
||||
const userLimit = rateLimitStore.get(userId);
|
||||
if (userLimit) {
|
||||
userLimit.microTaskCount += aiCallsMade;
|
||||
console.log(`[RATE LIMIT] User ${userId} now at ${userLimit.microTaskCount}/${MICRO_TASK_TOTAL_LIMIT} micro-task calls`);
|
||||
}
|
||||
}
|
||||
|
||||
function cleanupExpiredRateLimits() {
|
||||
const now = Date.now();
|
||||
const maxStoreSize = 1000;
|
||||
|
||||
for (const [userId, limit] of rateLimitStore.entries()) {
|
||||
if (now > limit.resetTime) {
|
||||
rateLimitStore.delete(userId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||
if (rateLimitStore.size > maxStoreSize) {
|
||||
const entries = Array.from(rateLimitStore.entries());
|
||||
entries.sort((a, b) => a[1].resetTime - b[1].resetTime);
|
||||
|
||||
async function loadToolsDatabase() {
|
||||
try {
|
||||
return await getCompressedToolsDataForAI();
|
||||
} catch (error) {
|
||||
console.error('Failed to load tools database:', error);
|
||||
throw new Error('Database unavailable');
|
||||
const toRemove = entries.slice(0, entries.length - maxStoreSize);
|
||||
toRemove.forEach(([userId]) => rateLimitStore.delete(userId));
|
||||
|
||||
console.log(`[RATE LIMIT] Cleanup: removed ${toRemove.length} old entries`);
|
||||
}
|
||||
}
|
||||
|
||||
function createWorkflowSystemPrompt(toolsData: any): string {
|
||||
const toolsList = toolsData.tools.map((tool: any) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
domains: tool.domains,
|
||||
phases: tool.phases,
|
||||
domainAgnostic: tool['domain-agnostic-software'],
|
||||
platforms: tool.platforms,
|
||||
skillLevel: tool.skillLevel,
|
||||
license: tool.license,
|
||||
tags: tool.tags,
|
||||
related_concepts: tool.related_concepts || []
|
||||
}));
|
||||
|
||||
const conceptsList = toolsData.concepts.map((concept: any) => ({
|
||||
name: concept.name,
|
||||
description: concept.description,
|
||||
domains: concept.domains,
|
||||
phases: concept.phases,
|
||||
skillLevel: concept.skillLevel,
|
||||
tags: concept.tags
|
||||
}));
|
||||
|
||||
const regularPhases = toolsData.phases || [];
|
||||
|
||||
const domainAgnosticSoftware = toolsData['domain-agnostic-software'] || [];
|
||||
|
||||
const allPhaseItems = [
|
||||
...regularPhases,
|
||||
...domainAgnosticSoftware
|
||||
];
|
||||
|
||||
const phasesDescription = allPhaseItems.map((phase: any) =>
|
||||
`- ${phase.id}: ${phase.name}`
|
||||
).join('\n');
|
||||
|
||||
const domainsDescription = toolsData.domains.map((domain: any) =>
|
||||
`- ${domain.id}: ${domain.name}`
|
||||
).join('\n');
|
||||
|
||||
const phaseDescriptions = regularPhases.map((phase: any) =>
|
||||
`- ${phase.name}: ${phase.description || 'Tools/Methods for this phase'}`
|
||||
).join('\n');
|
||||
|
||||
const domainAgnosticDescriptions = domainAgnosticSoftware.map((section: any) =>
|
||||
`- ${section.name}: ${section.description || 'Cross-cutting software and platforms'}`
|
||||
).join('\n');
|
||||
|
||||
const validPhases = [
|
||||
...regularPhases.map((p: any) => p.id),
|
||||
...domainAgnosticSoftware.map((s: any) => s.id)
|
||||
].join('|');
|
||||
|
||||
return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der Ermittlern bei der Auswahl von Software und Methoden hilft.
|
||||
|
||||
VERFÜGBARE TOOLS/METHODEN:
|
||||
${JSON.stringify(toolsList, null, 2)}
|
||||
|
||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
|
||||
${JSON.stringify(conceptsList, null, 2)}
|
||||
|
||||
UNTERSUCHUNGSPHASEN (NIST Framework):
|
||||
${phasesDescription}
|
||||
|
||||
FORENSISCHE DOMÄNEN:
|
||||
${domainsDescription}
|
||||
|
||||
WICHTIGE REGELN:
|
||||
1. Pro Phase 2-3 Tools/Methoden empfehlen (immer mindestens 2 wenn verfügbar)
|
||||
2. Tools/Methoden können in MEHREREN Phasen empfohlen werden wenn sinnvoll - versuche ein Tool/Methode für jede Phase zu empfehlen, selbst wenn die Priorität "low" ist.
|
||||
3. Für Reporting-Phase: Visualisierungs- und Dokumentationssoftware einschließen
|
||||
4. Gib stets dem spezieller für den Fall geeigneten Werkzeug den Vorzug.
|
||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
|
||||
6. Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
|
||||
7. Bevorzuge alles, was nicht proprietär ist (license != "Proprietary"), aber erkenne an, wenn proprietäre Software besser geeignet ist.
|
||||
8. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
|
||||
9. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
|
||||
|
||||
ENHANCED CONTEXTUAL ANALYSIS:
|
||||
10. Analysiere das Szenario detailliert und identifiziere Schlüsselelemente, Bedrohungen und forensische Herausforderungen
|
||||
11. Entwickle einen strategischen Untersuchungsansatz basierend auf dem spezifischen Szenario
|
||||
12. Identifiziere zeitkritische oder besonders wichtige Faktoren für diesen Fall
|
||||
|
||||
SOFTWARE/METHODEN-AUSWAHL NACH PHASE:
|
||||
${phaseDescriptions}
|
||||
|
||||
DOMÄNENAGNOSTISCHE SOFTWARE/METHODEN:
|
||||
${domainAgnosticDescriptions}
|
||||
|
||||
ANTWORT-FORMAT (strict JSON):
|
||||
{
|
||||
"scenario_analysis": "Detaillierte Analyse des Szenarios: Erkannte Schlüsselelemente, Art des Vorfalls, betroffene Systeme, potentielle Bedrohungen und forensische Herausforderungen",
|
||||
"investigation_approach": "Strategischer Untersuchungsansatz für dieses spezifische Szenario: Prioritäten, Reihenfolge der Phasen, besondere Überlegungen",
|
||||
"critical_considerations": "Zeitkritische Faktoren, wichtige Sicherheitsaspekte oder besondere Vorsichtsmaßnahmen für diesen Fall",
|
||||
"recommended_tools": [
|
||||
{
|
||||
"name": "EXAKTER Name aus der Tools-Database",
|
||||
"priority": "high|medium|low",
|
||||
"phase": "${validPhases}",
|
||||
"justification": "Warum diese Methode für diese Phase und dieses spezifische Szenario geeignet ist - mit Bezug zu den erkannten Schlüsselelementen"
|
||||
}
|
||||
],
|
||||
"workflow_suggestion": "Vorgeschlagener Untersuchungsablauf mit konkreten Schritten für dieses Szenario",
|
||||
"background_knowledge": [
|
||||
{
|
||||
"concept_name": "EXAKTER Name aus der Konzepte-Database",
|
||||
"relevance": "Warum dieses Konzept für das Szenario relevant ist, und bei welchen der empfohlenen Methoden/Tools."
|
||||
}
|
||||
],
|
||||
"additional_notes": "Wichtige Überlegungen und Hinweise"
|
||||
}
|
||||
|
||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
|
||||
}
|
||||
|
||||
function createToolSystemPrompt(toolsData: any): string {
|
||||
const toolsList = toolsData.tools.map((tool: any) => ({
|
||||
name: tool.name,
|
||||
description: tool.description,
|
||||
domains: tool.domains,
|
||||
phases: tool.phases,
|
||||
platforms: tool.platforms,
|
||||
skillLevel: tool.skillLevel,
|
||||
license: tool.license,
|
||||
tags: tool.tags,
|
||||
url: tool.url,
|
||||
projectUrl: tool.projectUrl,
|
||||
related_concepts: tool.related_concepts || []
|
||||
}));
|
||||
|
||||
const conceptsList = toolsData.concepts.map((concept: any) => ({
|
||||
name: concept.name,
|
||||
description: concept.description,
|
||||
domains: concept.domains,
|
||||
phases: concept.phases,
|
||||
skillLevel: concept.skillLevel,
|
||||
tags: concept.tags
|
||||
}));
|
||||
|
||||
return `Du bist ein DFIR (Digital Forensics and Incident Response) Experte, der bei der Auswahl spezifischer Software/Methoden für konkrete Probleme hilft.
|
||||
|
||||
VERFÜGBARE TOOLS/METHODEN:
|
||||
${JSON.stringify(toolsList, null, 2)}
|
||||
|
||||
VERFÜGBARE HINTERGRUNDWISSEN-KONZEPTE:
|
||||
${JSON.stringify(conceptsList, null, 2)}
|
||||
|
||||
WICHTIGE REGELN:
|
||||
1. Analysiere das spezifische Problem/die Anforderung sorgfältig
|
||||
2. Empfehle 1-3 Methoden/Tools, sortiert nach Eignung (beste Empfehlung zuerst)
|
||||
3. Gib detaillierte Erklärungen, WARUM und WIE jede Methode/Tool das Problem löst
|
||||
4. Berücksichtige praktische Aspekte: Skill Level, Plattformen, Verfügbarkeit
|
||||
5. Deutsche Antworten für deutsche Anfragen, English for English queries
|
||||
6. Gib konkrete Anwendungshinweise, nicht nur allgemeine Beschreibungen - Methoden haben, sofern für das SZENARIO passend, IMMER Vorrang vor Software.
|
||||
7. Erwähne sowohl Stärken als auch Schwächen/Limitationen
|
||||
8. Schlage alternative Ansätze vor, wenn sinnvoll
|
||||
9. Gib grundsätzliche Hinweise, WIE die Methode/Tool konkret eingesetzt wird
|
||||
10. WICHTIG: Erwähne relevante Hintergrundwissen-Konzepte wenn Tools verwendet werden, die related_concepts haben
|
||||
11. Konzepte sind NICHT Tools - empfehle sie nicht als actionable Schritte, sondern als Wissensbasis
|
||||
|
||||
ENHANCED CONTEXTUAL ANALYSIS:
|
||||
12. Analysiere das Problem detailliert und identifiziere technische Anforderungen, Herausforderungen und Erfolgsfaktoren
|
||||
13. Entwickle einen strategischen Lösungsansatz basierend auf dem spezifischen Problem
|
||||
14. Identifiziere wichtige Voraussetzungen oder Warnungen für die Anwendung
|
||||
|
||||
ANTWORT-FORMAT (strict JSON):
|
||||
{
|
||||
"problem_analysis": "Detaillierte Analyse des Problems: Erkannte technische Anforderungen, Herausforderungen, benötigte Fähigkeiten und Erfolgsfaktoren",
|
||||
"investigation_approach": "Strategischer Lösungsansatz für dieses spezifische Problem: Herangehensweise, Prioritäten, optimale Anwendungsreihenfolge",
|
||||
"critical_considerations": "Wichtige Voraussetzungen, potentielle Fallstricke oder Warnungen für die Anwendung der empfohlenen Lösungen",
|
||||
"recommended_tools": [
|
||||
{
|
||||
"name": "EXAKTER Name aus der Tools-Database",
|
||||
"rank": 1,
|
||||
"suitability_score": "high|medium|low",
|
||||
"detailed_explanation": "Detaillierte Erklärung, warum dieses Tool/diese Methode das spezifische Problem löst - mit Bezug zu den erkannten Anforderungen",
|
||||
"implementation_approach": "Konkrete Schritte/Ansatz zur Anwendung für dieses spezifische Problem",
|
||||
"pros": ["Spezifische Vorteile für diesen Anwendungsfall", "Weitere Vorteile"],
|
||||
"cons": ["Potentielle Nachteile oder Limitationen", "Weitere Einschränkungen"],
|
||||
"alternatives": "Alternative Ansätze oder ergänzende Tools/Methoden, falls relevant"
|
||||
}
|
||||
],
|
||||
"background_knowledge": [
|
||||
{
|
||||
"concept_name": "EXAKTER Name aus der Konzepte-Database",
|
||||
"relevance": "Warum dieses Konzept für die empfohlenen Tools/das Problem relevant ist, und für welche der empfohlenen Methoden/Tools."
|
||||
}
|
||||
],
|
||||
"additional_considerations": "Wichtige Überlegungen, Voraussetzungen oder Warnungen"
|
||||
}
|
||||
|
||||
Antworte NUR mit validen JSON. Keine zusätzlichen Erklärungen außerhalb des JSON.`;
|
||||
}
|
||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||
|
||||
export const POST: APIRoute = async ({ request }) => {
|
||||
try {
|
||||
@ -280,161 +110,100 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
|
||||
const userId = authResult.userId;
|
||||
|
||||
if (!checkRateLimit(userId)) {
|
||||
return apiError.rateLimit('Rate limit exceeded');
|
||||
const rateLimitResult = checkRateLimit(userId);
|
||||
if (!rateLimitResult.allowed) {
|
||||
return apiError.rateLimit(rateLimitResult.reason || 'Rate limit exceeded');
|
||||
}
|
||||
|
||||
const body = await request.json();
|
||||
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
||||
|
||||
// ADD THIS DEBUG LOGGING
|
||||
console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
||||
console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
||||
console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
|
||||
|
||||
if (!query || typeof query !== 'string') {
|
||||
console.log(`[AI API] Invalid query for task ${clientTaskId}`);
|
||||
console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
|
||||
return apiError.badRequest('Query required');
|
||||
}
|
||||
|
||||
if (!['workflow', 'tool'].includes(mode)) {
|
||||
console.log(`[AI API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
||||
console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
||||
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
||||
}
|
||||
|
||||
const sanitizedQuery = sanitizeInput(query);
|
||||
if (sanitizedQuery.includes('[FILTERED]')) {
|
||||
console.log(`[AI API] Filtered input detected for task ${clientTaskId}`);
|
||||
console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
|
||||
return apiError.badRequest('Invalid input detected');
|
||||
}
|
||||
|
||||
const toolsData = await loadToolsDatabase();
|
||||
|
||||
const systemPrompt = mode === 'workflow'
|
||||
? createWorkflowSystemPrompt(toolsData)
|
||||
: createToolSystemPrompt(toolsData);
|
||||
|
||||
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
||||
|
||||
console.log(`[AI API] About to enqueue task ${taskId}`);
|
||||
console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
|
||||
|
||||
|
||||
const aiResponse = await enqueueApiCall(() =>
|
||||
fetch(process.env.AI_API_ENDPOINT + '/v1/chat/completions', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${process.env.AI_API_KEY}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: AI_MODEL,
|
||||
messages: [
|
||||
{
|
||||
role: 'system',
|
||||
content: systemPrompt
|
||||
},
|
||||
{
|
||||
role: 'user',
|
||||
content: sanitizedQuery
|
||||
}
|
||||
],
|
||||
max_tokens: 3500,
|
||||
temperature: 0.3
|
||||
})
|
||||
})
|
||||
const result = await enqueueApiCall(() =>
|
||||
aiPipeline.processQuery(sanitizedQuery, mode)
|
||||
, taskId);
|
||||
|
||||
if (!aiResponse.ok) {
|
||||
console.error('AI API error:', await aiResponse.text());
|
||||
return apiServerError.unavailable('AI service unavailable');
|
||||
if (!result || !result.recommendation) {
|
||||
return apiServerError.unavailable('No response from micro-task AI pipeline');
|
||||
}
|
||||
|
||||
const aiData = await aiResponse.json();
|
||||
const aiContent = aiData.choices?.[0]?.message?.content;
|
||||
const stats = result.processingStats;
|
||||
const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
|
||||
incrementMicroTaskCount(userId, estimatedAICallsMade);
|
||||
|
||||
if (!aiContent) {
|
||||
return apiServerError.unavailable('No response from AI');
|
||||
}
|
||||
console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
|
||||
console.log(` - Mode: ${mode}`);
|
||||
console.log(` - User: ${userId}`);
|
||||
console.log(` - Query length: ${sanitizedQuery.length}`);
|
||||
console.log(` - Processing time: ${stats.processingTimeMs}ms`);
|
||||
console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`);
|
||||
console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`);
|
||||
console.log(` - Estimated AI calls: ${estimatedAICallsMade}`);
|
||||
console.log(` - Embeddings used: ${stats.embeddingsUsed}`);
|
||||
console.log(` - Final items: ${stats.finalSelectedItems}`);
|
||||
|
||||
let recommendation;
|
||||
try {
|
||||
const cleanedContent = stripMarkdownJson(aiContent);
|
||||
recommendation = JSON.parse(cleanedContent);
|
||||
} catch (error) {
|
||||
console.error('Failed to parse AI response:', aiContent);
|
||||
return apiServerError.unavailable('Invalid AI response format');
|
||||
}
|
||||
|
||||
const validToolNames = new Set(toolsData.tools.map((t: any) => t.name));
|
||||
const validConceptNames = new Set(toolsData.concepts.map((c: any) => c.name));
|
||||
|
||||
let validatedRecommendation;
|
||||
|
||||
if (mode === 'workflow') {
|
||||
validatedRecommendation = {
|
||||
...recommendation,
|
||||
// Ensure all new fields are included with fallbacks
|
||||
scenario_analysis: recommendation.scenario_analysis || recommendation.problem_analysis || '',
|
||||
investigation_approach: recommendation.investigation_approach || '',
|
||||
critical_considerations: recommendation.critical_considerations || '',
|
||||
recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
|
||||
if (!validToolNames.has(tool.name)) {
|
||||
console.warn(`AI recommended unknown tool: ${tool.name}`);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}) || [],
|
||||
background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
|
||||
if (!validConceptNames.has(concept.concept_name)) {
|
||||
console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}) || []
|
||||
};
|
||||
} else {
|
||||
validatedRecommendation = {
|
||||
...recommendation,
|
||||
// Ensure all new fields are included with fallbacks
|
||||
problem_analysis: recommendation.problem_analysis || recommendation.scenario_analysis || '',
|
||||
investigation_approach: recommendation.investigation_approach || '',
|
||||
critical_considerations: recommendation.critical_considerations || '',
|
||||
recommended_tools: recommendation.recommended_tools?.filter((tool: any) => {
|
||||
if (!validToolNames.has(tool.name)) {
|
||||
console.warn(`AI recommended unknown tool: ${tool.name}`);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}).map((tool: any, index: number) => ({
|
||||
...tool,
|
||||
rank: tool.rank || (index + 1),
|
||||
suitability_score: tool.suitability_score || 'medium',
|
||||
pros: Array.isArray(tool.pros) ? tool.pros : [],
|
||||
cons: Array.isArray(tool.cons) ? tool.cons : []
|
||||
})) || [],
|
||||
background_knowledge: recommendation.background_knowledge?.filter((concept: any) => {
|
||||
if (!validConceptNames.has(concept.concept_name)) {
|
||||
console.warn(`AI referenced unknown concept: ${concept.concept_name}`);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}) || []
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}, Tools: ${validatedRecommendation.recommended_tools.length}, Concepts: ${validatedRecommendation.background_knowledge?.length || 0}`);
|
||||
const currentLimit = rateLimitStore.get(userId);
|
||||
const remainingMicroTasks = currentLimit ?
|
||||
MICRO_TASK_TOTAL_LIMIT - currentLimit.microTaskCount : MICRO_TASK_TOTAL_LIMIT;
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
mode,
|
||||
taskId,
|
||||
recommendation: validatedRecommendation,
|
||||
query: sanitizedQuery
|
||||
recommendation: result.recommendation,
|
||||
query: sanitizedQuery,
|
||||
processingStats: {
|
||||
...result.processingStats,
|
||||
pipelineType: 'micro-task',
|
||||
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||
estimatedAICallsMade
|
||||
},
|
||||
rateLimitInfo: {
|
||||
mainRequestsRemaining: MAIN_RATE_LIMIT_MAX - (currentLimit?.count || 0),
|
||||
microTaskCallsRemaining: remainingMicroTasks,
|
||||
resetTime: Date.now() + RATE_LIMIT_WINDOW
|
||||
}
|
||||
}), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error('AI query error:', error);
|
||||
return apiServerError.internal('Internal server error');
|
||||
console.error('[MICRO-TASK API] Pipeline error:', error);
|
||||
|
||||
if (error.message.includes('embeddings')) {
|
||||
return apiServerError.unavailable('Embeddings service error - using AI fallback');
|
||||
} else if (error.message.includes('micro-task')) {
|
||||
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed');
|
||||
} else if (error.message.includes('selector')) {
|
||||
return apiServerError.unavailable('AI selector service error');
|
||||
} else if (error.message.includes('rate limit')) {
|
||||
return apiError.rateLimit('AI service rate limits exceeded during micro-task processing');
|
||||
} else {
|
||||
return apiServerError.internal('Micro-task AI pipeline error');
|
||||
}
|
||||
}
|
||||
};
|
@ -1263,6 +1263,12 @@ input[type="checkbox"] {
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.filter-header-controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
/* Search Components */
|
||||
.search-wrapper {
|
||||
position: relative;
|
||||
@ -1315,6 +1321,64 @@ input[type="checkbox"] {
|
||||
color: var(--color-text);
|
||||
}
|
||||
|
||||
.collapse-toggle {
|
||||
background: none;
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: 0.375rem;
|
||||
color: var(--color-text-secondary);
|
||||
cursor: pointer;
|
||||
padding: 0.375rem;
|
||||
transition: var(--transition-fast);
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
}
|
||||
|
||||
.collapse-toggle:hover {
|
||||
background-color: var(--color-bg-secondary);
|
||||
border-color: var(--color-primary);
|
||||
color: var(--color-text);
|
||||
}
|
||||
|
||||
.collapse-toggle svg {
|
||||
transition: transform var(--transition-medium);
|
||||
}
|
||||
|
||||
/* When expanded, rotate the chevron */
|
||||
.collapse-toggle[data-collapsed="false"] svg {
|
||||
transform: rotate(180deg);
|
||||
}
|
||||
|
||||
/* Collapsible Content */
|
||||
.collapsible-content {
|
||||
overflow: hidden;
|
||||
transition: all var(--transition-medium);
|
||||
opacity: 1;
|
||||
max-height: 1000px;
|
||||
}
|
||||
|
||||
.collapsible-content.hidden {
|
||||
opacity: 0;
|
||||
max-height: 0;
|
||||
padding-top: 0;
|
||||
padding-bottom: 0;
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
/* Smooth animation for expanding content */
|
||||
.collapsible-content:not(.hidden) {
|
||||
animation: expandContent 0.3s ease-out;
|
||||
}
|
||||
|
||||
/* Content spacing when expanded */
|
||||
.collapsible-content:not(.hidden) .advanced-filters-compact,
|
||||
.collapsible-content:not(.hidden) .tag-section {
|
||||
padding-top: 0.75rem;
|
||||
}
|
||||
|
||||
/* Filter Grids & Groups */
|
||||
.filter-grid-compact {
|
||||
display: grid;
|
||||
@ -1429,11 +1493,9 @@ input[type="checkbox"] {
|
||||
user-select: none;
|
||||
}
|
||||
|
||||
/* Tag System */
|
||||
.tag-section {
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1rem;
|
||||
.tag-section .tag-controls {
|
||||
order: -1;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
.selected-tags {
|
||||
@ -1574,6 +1636,14 @@ input[type="checkbox"] {
|
||||
transition: var(--transition-fast);
|
||||
}
|
||||
|
||||
.filter-reset {
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.filter-reset:hover {
|
||||
background-color: var(--color-bg-secondary);
|
||||
border-color: var(--color-warning);
|
||||
@ -1591,13 +1661,6 @@ input[type="checkbox"] {
|
||||
opacity: 0.9;
|
||||
}
|
||||
|
||||
/* Tag Controls */
|
||||
.tag-controls {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.75rem;
|
||||
}
|
||||
|
||||
.tag-toggle {
|
||||
padding: 0.375rem 0.75rem;
|
||||
border: 1px solid var(--color-border);
|
||||
@ -1818,6 +1881,130 @@ input[type="checkbox"] {
|
||||
border-left-color: var(--color-warning);
|
||||
}
|
||||
|
||||
/* Add to src/styles/global.css - Micro-Task Progress Styles */
|
||||
|
||||
/* Micro-task progress indicator */
|
||||
.micro-task-progress {
|
||||
background-color: var(--color-bg-secondary);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: 0.5rem;
|
||||
padding: 1rem;
|
||||
margin: 1rem 0;
|
||||
transition: var(--transition-fast);
|
||||
}
|
||||
|
||||
.micro-task-header {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
.micro-task-label {
|
||||
font-weight: 600;
|
||||
color: var(--color-primary);
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.micro-task-counter {
|
||||
background-color: var(--color-primary);
|
||||
color: white;
|
||||
padding: 0.25rem 0.5rem;
|
||||
border-radius: 1rem;
|
||||
font-size: 0.75rem;
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
.micro-task-steps {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.micro-step {
|
||||
background-color: var(--color-bg);
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: 0.375rem;
|
||||
padding: 0.5rem;
|
||||
font-size: 0.75rem;
|
||||
text-align: center;
|
||||
transition: var(--transition-fast);
|
||||
opacity: 0.6;
|
||||
}
|
||||
|
||||
.micro-step.active {
|
||||
background-color: var(--color-primary);
|
||||
color: white;
|
||||
border-color: var(--color-primary);
|
||||
opacity: 1;
|
||||
transform: scale(1.05);
|
||||
}
|
||||
|
||||
.micro-step.completed {
|
||||
background-color: var(--color-accent);
|
||||
color: white;
|
||||
border-color: var(--color-accent);
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.micro-step.failed {
|
||||
background-color: var(--color-error);
|
||||
color: white;
|
||||
border-color: var(--color-error);
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
/* Enhanced queue status for micro-tasks */
|
||||
.queue-status-card.micro-task-mode {
|
||||
border-left: 4px solid var(--color-primary);
|
||||
}
|
||||
|
||||
.queue-status-card.micro-task-mode .queue-header {
|
||||
background: linear-gradient(135deg, var(--color-primary) 0%, var(--color-accent) 100%);
|
||||
color: white;
|
||||
margin: -1rem -1rem 1rem -1rem;
|
||||
padding: 1rem;
|
||||
border-radius: 0.5rem 0.5rem 0 0;
|
||||
}
|
||||
|
||||
/* Mobile responsive adjustments */
|
||||
@media (max-width: 768px) {
|
||||
.micro-task-steps {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
gap: 0.375rem;
|
||||
}
|
||||
|
||||
.micro-step {
|
||||
font-size: 0.6875rem;
|
||||
padding: 0.375rem;
|
||||
}
|
||||
|
||||
.micro-task-header {
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
}
|
||||
|
||||
/* Animation for micro-task progress */
|
||||
@keyframes micro-task-pulse {
|
||||
0%, 100% { opacity: 1; }
|
||||
50% { opacity: 0.7; }
|
||||
}
|
||||
|
||||
.micro-step.active {
|
||||
animation: micro-task-pulse 2s ease-in-out infinite;
|
||||
}
|
||||
|
||||
@keyframes micro-task-complete {
|
||||
0% { transform: scale(1); }
|
||||
50% { transform: scale(1.1); }
|
||||
100% { transform: scale(1); }
|
||||
}
|
||||
|
||||
.micro-step.completed {
|
||||
animation: micro-task-complete 0.6s ease-out;
|
||||
}
|
||||
|
||||
/* ===================================================================
|
||||
17. WORKFLOW SYSTEM (CONSOLIDATED)
|
||||
================================================================= */
|
||||
@ -2267,6 +2454,17 @@ footer {
|
||||
to { opacity: 1; }
|
||||
}
|
||||
|
||||
@keyframes expandContent {
|
||||
from {
|
||||
opacity: 0;
|
||||
transform: translateY(-10px);
|
||||
}
|
||||
to {
|
||||
opacity: 1;
|
||||
transform: translateY(0);
|
||||
}
|
||||
}
|
||||
|
||||
@keyframes fadeInUp {
|
||||
from {
|
||||
opacity: 0;
|
||||
@ -3261,6 +3459,23 @@ footer {
|
||||
.view-toggle {
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
.filter-header-controls {
|
||||
gap: 0.375rem;
|
||||
}
|
||||
|
||||
.collapse-toggle,
|
||||
.filter-reset {
|
||||
width: 28px;
|
||||
height: 28px;
|
||||
padding: 0.25rem;
|
||||
}
|
||||
|
||||
.collapse-toggle svg,
|
||||
.filter-reset svg {
|
||||
width: 14px;
|
||||
height: 14px;
|
||||
}
|
||||
}
|
||||
|
||||
@media (width <= 640px) {
|
||||
@ -3395,6 +3610,21 @@ footer {
|
||||
.filter-card-compact {
|
||||
padding: 0.5rem;
|
||||
}
|
||||
|
||||
.filter-header-compact {
|
||||
flex-wrap: wrap;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.filter-header-compact h3 {
|
||||
flex: 1 1 100%;
|
||||
margin-bottom: 0.25rem;
|
||||
}
|
||||
|
||||
.filter-header-controls {
|
||||
flex: 1 1 100%;
|
||||
justify-content: flex-end;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
882
src/utils/aiPipeline.ts
Normal file
882
src/utils/aiPipeline.ts
Normal file
@ -0,0 +1,882 @@
|
||||
// src/utils/aiPipeline.ts - FIXED: Critical error corrections
|
||||
|
||||
import { getCompressedToolsDataForAI } from './dataService.js';
|
||||
import { embeddingsService, type EmbeddingData } from './embeddings.js';
|
||||
|
||||
interface AIConfig {
|
||||
endpoint: string;
|
||||
apiKey: string;
|
||||
model: string;
|
||||
}
|
||||
|
||||
interface MicroTaskResult {
|
||||
taskType: string;
|
||||
content: string;
|
||||
processingTimeMs: number;
|
||||
success: boolean;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
interface AnalysisResult {
|
||||
recommendation: any;
|
||||
processingStats: {
|
||||
embeddingsUsed: boolean;
|
||||
candidatesFromEmbeddings: number;
|
||||
finalSelectedItems: number;
|
||||
processingTimeMs: number;
|
||||
microTasksCompleted: number;
|
||||
microTasksFailed: number;
|
||||
contextContinuityUsed: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
interface AnalysisContext {
|
||||
userQuery: string;
|
||||
mode: string;
|
||||
filteredData: any;
|
||||
contextHistory: string[];
|
||||
|
||||
// FIXED: Add max context length tracking
|
||||
maxContextLength: number;
|
||||
currentContextLength: number;
|
||||
|
||||
scenarioAnalysis?: string;
|
||||
problemAnalysis?: string;
|
||||
investigationApproach?: string;
|
||||
criticalConsiderations?: string;
|
||||
selectedTools?: Array<{tool: any, phase: string, priority: string, justification?: string}>;
|
||||
backgroundKnowledge?: Array<{concept: any, relevance: string}>;
|
||||
|
||||
// FIXED: Add seen tools tracking to prevent duplicates
|
||||
seenToolNames: Set<string>;
|
||||
}
|
||||
|
||||
class ImprovedMicroTaskAIPipeline {
|
||||
private config: AIConfig;
|
||||
private maxSelectedItems: number;
|
||||
private embeddingCandidates: number;
|
||||
private similarityThreshold: number;
|
||||
private microTaskDelay: number;
|
||||
|
||||
// FIXED: Add proper token management
|
||||
private maxContextTokens: number;
|
||||
private maxPromptTokens: number;
|
||||
|
||||
constructor() {
|
||||
this.config = {
|
||||
endpoint: this.getEnv('AI_ANALYZER_ENDPOINT'),
|
||||
apiKey: this.getEnv('AI_ANALYZER_API_KEY'),
|
||||
model: this.getEnv('AI_ANALYZER_MODEL')
|
||||
};
|
||||
|
||||
this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
|
||||
this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '60', 10);
|
||||
this.similarityThreshold = 0.3;
|
||||
this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
|
||||
|
||||
// FIXED: Token management
|
||||
this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
|
||||
this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
|
||||
}
|
||||
|
||||
private getEnv(key: string): string {
|
||||
const value = process.env[key];
|
||||
if (!value) {
|
||||
throw new Error(`Missing environment variable: ${key}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
// FIXED: Estimate token count (rough approximation)
|
||||
private estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4); // Rough estimate: 4 chars per token
|
||||
}
|
||||
|
||||
// FIXED: Manage context history with token limits
|
||||
private addToContextHistory(context: AnalysisContext, newEntry: string): void {
|
||||
const entryTokens = this.estimateTokens(newEntry);
|
||||
|
||||
// Add new entry
|
||||
context.contextHistory.push(newEntry);
|
||||
context.currentContextLength += entryTokens;
|
||||
|
||||
// Prune old entries if exceeding limits
|
||||
while (context.currentContextLength > this.maxContextTokens && context.contextHistory.length > 1) {
|
||||
const removed = context.contextHistory.shift()!;
|
||||
context.currentContextLength -= this.estimateTokens(removed);
|
||||
}
|
||||
}
|
||||
|
||||
// FIXED: Safe JSON parsing with validation
|
||||
private safeParseJSON(jsonString: string, fallback: any = null): any {
|
||||
try {
|
||||
const cleaned = jsonString
|
||||
.replace(/^```json\s*/i, '')
|
||||
.replace(/\s*```\s*$/g, '')
|
||||
.trim();
|
||||
|
||||
const parsed = JSON.parse(cleaned);
|
||||
return parsed;
|
||||
} catch (error) {
|
||||
console.warn('[AI PIPELINE] JSON parsing failed:', error.message);
|
||||
console.warn('[AI PIPELINE] Raw content:', jsonString.slice(0, 200));
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
// FIXED: Add tool deduplication
|
||||
private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
|
||||
if (context.seenToolNames.has(tool.name)) {
|
||||
console.log(`[AI PIPELINE] Skipping duplicate tool: ${tool.name}`);
|
||||
return false;
|
||||
}
|
||||
|
||||
context.seenToolNames.add(tool.name);
|
||||
if (!context.selectedTools) context.selectedTools = [];
|
||||
|
||||
context.selectedTools.push({
|
||||
tool,
|
||||
phase,
|
||||
priority,
|
||||
justification
|
||||
});
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
private async getIntelligentCandidates(userQuery: string, toolsData: any, mode: string) {
|
||||
let candidateTools: any[] = [];
|
||||
let candidateConcepts: any[] = [];
|
||||
let selectionMethod = 'unknown';
|
||||
|
||||
if (embeddingsService.isEnabled()) {
|
||||
const similarItems = await embeddingsService.findSimilar(
|
||||
userQuery,
|
||||
this.embeddingCandidates,
|
||||
this.similarityThreshold
|
||||
);
|
||||
|
||||
const toolNames = new Set<string>();
|
||||
const conceptNames = new Set<string>();
|
||||
|
||||
similarItems.forEach(item => {
|
||||
if (item.type === 'tool') toolNames.add(item.name);
|
||||
if (item.type === 'concept') conceptNames.add(item.name);
|
||||
});
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Embeddings found: ${toolNames.size} tools, ${conceptNames.size} concepts`);
|
||||
|
||||
// FIXED: Use your expected flow - get full data of embeddings results
|
||||
if (toolNames.size >= 15) { // Reasonable threshold for quality
|
||||
candidateTools = toolsData.tools.filter((tool: any) => toolNames.has(tool.name));
|
||||
candidateConcepts = toolsData.concepts.filter((concept: any) => conceptNames.has(concept.name));
|
||||
selectionMethod = 'embeddings_candidates';
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Using embeddings candidates: ${candidateTools.length} tools`);
|
||||
} else {
|
||||
console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${toolNames.size} < 15), using full dataset`);
|
||||
candidateTools = toolsData.tools;
|
||||
candidateConcepts = toolsData.concepts;
|
||||
selectionMethod = 'full_dataset';
|
||||
}
|
||||
} else {
|
||||
console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
|
||||
candidateTools = toolsData.tools;
|
||||
candidateConcepts = toolsData.concepts;
|
||||
selectionMethod = 'full_dataset';
|
||||
}
|
||||
|
||||
// FIXED: NOW AI ANALYZES FULL DATA of the candidates
|
||||
console.log(`[IMPROVED PIPELINE] AI will analyze FULL DATA of ${candidateTools.length} candidate tools`);
|
||||
const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
|
||||
|
||||
return {
|
||||
tools: finalSelection.selectedTools,
|
||||
concepts: finalSelection.selectedConcepts,
|
||||
domains: toolsData.domains,
|
||||
phases: toolsData.phases,
|
||||
'domain-agnostic-software': toolsData['domain-agnostic-software']
|
||||
};
|
||||
}
|
||||
|
||||
// src/utils/aiPipeline.ts - FIXED: De-biased AI selection prompt
|
||||
|
||||
private async aiSelectionWithFullData(
|
||||
userQuery: string,
|
||||
candidateTools: any[],
|
||||
candidateConcepts: any[],
|
||||
mode: string,
|
||||
selectionMethod: string
|
||||
) {
|
||||
const modeInstruction = mode === 'workflow'
|
||||
? 'The user wants a COMPREHENSIVE WORKFLOW with multiple tools/methods across different phases. Select 15-25 tools that cover the full investigation lifecycle.'
|
||||
: 'The user wants SPECIFIC TOOLS/METHODS that directly solve their particular problem. Select 3-8 tools that are most relevant and effective.';
|
||||
|
||||
// FIXED: Give AI the COMPLETE tool data, not truncated
|
||||
const toolsWithFullData = candidateTools.map((tool: any) => ({
|
||||
name: tool.name,
|
||||
type: tool.type,
|
||||
description: tool.description,
|
||||
domains: tool.domains,
|
||||
phases: tool.phases,
|
||||
platforms: tool.platforms || [],
|
||||
tags: tool.tags || [],
|
||||
skillLevel: tool.skillLevel,
|
||||
license: tool.license,
|
||||
accessType: tool.accessType,
|
||||
projectUrl: tool.projectUrl,
|
||||
knowledgebase: tool.knowledgebase,
|
||||
related_concepts: tool.related_concepts || [],
|
||||
related_software: tool.related_software || []
|
||||
}));
|
||||
|
||||
const conceptsWithFullData = candidateConcepts.map((concept: any) => ({
|
||||
name: concept.name,
|
||||
type: 'concept',
|
||||
description: concept.description,
|
||||
domains: concept.domains,
|
||||
phases: concept.phases,
|
||||
tags: concept.tags || [],
|
||||
skillLevel: concept.skillLevel,
|
||||
related_concepts: concept.related_concepts || [],
|
||||
related_software: concept.related_software || []
|
||||
}));
|
||||
|
||||
const prompt = `You are a DFIR expert with access to the complete forensics tool database. You need to select the most relevant tools and concepts for this specific query.
|
||||
|
||||
SELECTION METHOD: ${selectionMethod}
|
||||
${selectionMethod === 'embeddings_candidates' ?
|
||||
'These tools were pre-filtered by vector similarity, so they are already relevant. Your job is to select the BEST ones from this relevant set.' :
|
||||
'You have access to the full tool database. Select the most relevant tools for the query.'}
|
||||
|
||||
${modeInstruction}
|
||||
|
||||
USER QUERY: "${userQuery}"
|
||||
|
||||
CRITICAL SELECTION PRINCIPLES:
|
||||
1. **CONTEXT OVER POPULARITY**: Don't default to "famous" tools like Volatility, Wireshark, or Autopsy just because they're well-known. Choose based on SPECIFIC scenario needs.
|
||||
|
||||
2. **METHODOLOGY vs SOFTWARE**:
|
||||
- For RAPID/URGENT scenarios → Prioritize METHODS and rapid response approaches
|
||||
- For TIME-CRITICAL incidents → Choose triage methods over deep analysis tools
|
||||
- For COMPREHENSIVE analysis → Then consider detailed software tools
|
||||
- METHODS (type: "method") are often better than SOFTWARE for procedural guidance
|
||||
|
||||
3. **SCENARIO-SPECIFIC LOGIC**:
|
||||
- "Rapid/Quick/Urgent/Triage" scenarios → Rapid Incident Response and Triage METHOD > Volatility
|
||||
- "Industrial/SCADA/ICS" scenarios → Specialized ICS tools > generic network tools
|
||||
- "Mobile/Android/iOS" scenarios → Mobile-specific tools > desktop forensics tools
|
||||
- "Memory analysis needed urgently" → Quick memory tools/methods > comprehensive Volatility analysis
|
||||
|
||||
4. **AVOID TOOL BIAS**:
|
||||
- Volatility is NOT always the answer for memory analysis
|
||||
- Wireshark is NOT always the answer for network analysis
|
||||
- Autopsy is NOT always the answer for disk analysis
|
||||
- Consider lighter, faster, more appropriate alternatives
|
||||
|
||||
AVAILABLE TOOLS (with complete data):
|
||||
${JSON.stringify(toolsWithFullData.slice(0, 30), null, 2)}
|
||||
|
||||
AVAILABLE CONCEPTS (with complete data):
|
||||
${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}
|
||||
|
||||
ANALYSIS INSTRUCTIONS:
|
||||
1. Read the FULL description of each tool/concept
|
||||
2. Consider ALL tags, platforms, related tools, and metadata
|
||||
3. **MATCH URGENCY LEVEL**: Rapid scenarios need rapid methods, not deep analysis tools
|
||||
4. **MATCH SPECIFICITY**: Specialized scenarios need specialized tools, not generic ones
|
||||
5. **CONSIDER TYPE**: Methods provide procedural guidance, software provides technical capability
|
||||
6. For SCADA/ICS queries: prioritize specialized ICS tools over generic network tools
|
||||
7. For mobile queries: prioritize mobile-specific tools over desktop tools
|
||||
8. For rapid/urgent queries: prioritize methodology and triage approaches
|
||||
|
||||
BIAS PREVENTION:
|
||||
- If query mentions "rapid", "quick", "urgent", "triage" → Strongly favor METHODS over deep analysis SOFTWARE
|
||||
- If query mentions specific technologies (SCADA, Android, etc.) → Strongly favor specialized tools
|
||||
- Don't recommend Volatility unless deep memory analysis is specifically needed AND time allows
|
||||
- Don't recommend generic tools when specialized ones are available
|
||||
- Consider the SKILL LEVEL and TIME CONSTRAINTS implied by the query
|
||||
|
||||
Select the most relevant items (max ${this.maxSelectedItems} total).
|
||||
|
||||
Respond with ONLY this JSON format:
|
||||
{
|
||||
"selectedTools": ["Tool Name 1", "Tool Name 2", ...],
|
||||
"selectedConcepts": ["Concept Name 1", "Concept Name 2", ...],
|
||||
"reasoning": "Detailed explanation of why these specific tools were selected for this query, addressing why certain popular tools were NOT selected if they were inappropriate for the scenario context"
|
||||
}`;
|
||||
|
||||
try {
|
||||
const response = await this.callAI(prompt, 2500); // More tokens for bias prevention logic
|
||||
|
||||
const result = this.safeParseJSON(response, null);
|
||||
|
||||
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
|
||||
console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
|
||||
throw new Error('AI selection failed to return valid tool selection');
|
||||
}
|
||||
|
||||
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
|
||||
if (totalSelected === 0) {
|
||||
console.error('[IMPROVED PIPELINE] AI selection returned no tools');
|
||||
throw new Error('AI selection returned empty selection');
|
||||
}
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts`);
|
||||
console.log(`[IMPROVED PIPELINE] AI reasoning: ${result.reasoning}`);
|
||||
|
||||
// Return the actual tool/concept objects
|
||||
const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
|
||||
const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
|
||||
|
||||
return {
|
||||
selectedTools,
|
||||
selectedConcepts
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
console.error('[IMPROVED PIPELINE] AI selection failed:', error);
|
||||
|
||||
// Emergency fallback with bias awareness
|
||||
console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
|
||||
return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
|
||||
}
|
||||
}
|
||||
|
||||
private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
|
||||
const queryLower = userQuery.toLowerCase();
|
||||
const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
|
||||
|
||||
// Score tools based on keyword matches in full data
|
||||
const scoredTools = candidateTools.map(tool => {
|
||||
const toolText = (
|
||||
tool.name + ' ' +
|
||||
tool.description + ' ' +
|
||||
(tool.tags || []).join(' ') + ' ' +
|
||||
(tool.platforms || []).join(' ') + ' ' +
|
||||
(tool.domains || []).join(' ')
|
||||
).toLowerCase();
|
||||
|
||||
const score = keywords.reduce((acc, keyword) => {
|
||||
return acc + (toolText.includes(keyword) ? 1 : 0);
|
||||
}, 0);
|
||||
|
||||
return { tool, score };
|
||||
}).filter(item => item.score > 0)
|
||||
.sort((a, b) => b.score - a.score);
|
||||
|
||||
const maxTools = mode === 'workflow' ? 20 : 8;
|
||||
const selectedTools = scoredTools.slice(0, maxTools).map(item => item.tool);
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
|
||||
|
||||
return {
|
||||
selectedTools,
|
||||
selectedConcepts: candidateConcepts.slice(0, 3)
|
||||
};
|
||||
}
|
||||
|
||||
private async delay(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 300): Promise<MicroTaskResult> {
|
||||
const startTime = Date.now();
|
||||
|
||||
// FIXED: Build context prompt with token management
|
||||
let contextPrompt = prompt;
|
||||
if (context.contextHistory.length > 0) {
|
||||
const contextSection = `BISHERIGE ANALYSE:\n${context.contextHistory.join('\n\n')}\n\nAKTUELLE AUFGABE:\n`;
|
||||
const combinedPrompt = contextSection + prompt;
|
||||
|
||||
// Check if combined prompt exceeds limits
|
||||
if (this.estimateTokens(combinedPrompt) <= this.maxPromptTokens) {
|
||||
contextPrompt = combinedPrompt;
|
||||
} else {
|
||||
console.warn('[AI PIPELINE] Context too long, using prompt only');
|
||||
// Could implement smarter context truncation here
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await this.callAI(contextPrompt, maxTokens);
|
||||
|
||||
return {
|
||||
taskType: 'micro-task',
|
||||
content: response.trim(),
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
success: true
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
return {
|
||||
taskType: 'micro-task',
|
||||
content: '',
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
success: false,
|
||||
error: error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async analyzeScenario(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const isWorkflow = context.mode === 'workflow';
|
||||
|
||||
const prompt = `Sie sind ein erfahrener DFIR-Experte. Analysieren Sie das folgende ${isWorkflow ? 'forensische Szenario' : 'technische Problem'}.
|
||||
|
||||
${isWorkflow ? 'FORENSISCHES SZENARIO' : 'TECHNISCHES PROBLEM'}: "${context.userQuery}"
|
||||
|
||||
Führen Sie eine systematische ${isWorkflow ? 'Szenario-Analyse' : 'Problem-Analyse'} durch und berücksichtigen Sie dabei:
|
||||
|
||||
${isWorkflow ?
|
||||
`- Angriffsvektoren und Bedrohungsmodellierung nach MITRE ATT&CK
|
||||
- Betroffene Systeme und kritische Infrastrukturen
|
||||
- Zeitkritische Faktoren und Beweiserhaltung
|
||||
- Forensische Artefakte und Datenquellen` :
|
||||
`- Spezifische forensische Herausforderungen
|
||||
- Verfügbare Datenquellen und deren Integrität
|
||||
- Methodische Anforderungen für rechtssichere Analyse`
|
||||
}
|
||||
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen, Aufzählungen oder Markdown-Formatierung. Maximum 150 Wörter.`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, context, 220);
|
||||
|
||||
if (result.success) {
|
||||
if (isWorkflow) {
|
||||
context.scenarioAnalysis = result.content;
|
||||
} else {
|
||||
context.problemAnalysis = result.content;
|
||||
}
|
||||
|
||||
// FIXED: Use new context management
|
||||
this.addToContextHistory(context, `${isWorkflow ? 'Szenario' : 'Problem'}-Analyse: ${result.content.slice(0, 200)}...`);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async generateApproach(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const isWorkflow = context.mode === 'workflow';
|
||||
|
||||
const prompt = `Basierend auf der Analyse entwickeln Sie einen fundierten ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} nach NIST SP 800-86 Methodik.
|
||||
|
||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
|
||||
|
||||
Entwickeln Sie einen systematischen ${isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz'} unter Berücksichtigung von:
|
||||
|
||||
${isWorkflow ?
|
||||
`- Triage-Prioritäten nach forensischer Dringlichkeit
|
||||
- Phasenabfolge nach NIST-Methodik
|
||||
- Kontaminationsvermeidung und forensische Isolierung` :
|
||||
`- Methodik-Auswahl nach wissenschaftlichen Kriterien
|
||||
- Validierung und Verifizierung der gewählten Ansätze
|
||||
- Integration in bestehende forensische Workflows`
|
||||
}
|
||||
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 150 Wörter.`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, context, 220);
|
||||
|
||||
if (result.success) {
|
||||
context.investigationApproach = result.content;
|
||||
this.addToContextHistory(context, `${isWorkflow ? 'Untersuchungs' : 'Lösungs'}ansatz: ${result.content.slice(0, 200)}...`);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async generateCriticalConsiderations(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const isWorkflow = context.mode === 'workflow';
|
||||
|
||||
const prompt = `Identifizieren Sie ${isWorkflow ? 'kritische forensische Überlegungen' : 'wichtige methodische Voraussetzungen'} für diesen Fall.
|
||||
|
||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
|
||||
|
||||
Berücksichtigen Sie folgende forensische Aspekte:
|
||||
|
||||
${isWorkflow ?
|
||||
`- Time-sensitive evidence preservation
|
||||
- Chain of custody requirements und rechtliche Verwertbarkeit
|
||||
- Incident containment vs. evidence preservation Dilemma
|
||||
- Privacy- und Compliance-Anforderungen` :
|
||||
`- Tool-Validierung und Nachvollziehbarkeit
|
||||
- False positive/negative Risiken bei der gewählten Methodik
|
||||
- Qualifikationsanforderungen für die Durchführung
|
||||
- Dokumentations- und Reporting-Standards`
|
||||
}
|
||||
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, context, 180);
|
||||
|
||||
if (result.success) {
|
||||
context.criticalConsiderations = result.content;
|
||||
this.addToContextHistory(context, `Kritische Überlegungen: ${result.content.slice(0, 200)}...`);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async selectToolsForPhase(context: AnalysisContext, phase: any): Promise<MicroTaskResult> {
|
||||
const phaseTools = context.filteredData.tools.filter((tool: any) =>
|
||||
tool.phases && tool.phases.includes(phase.id)
|
||||
);
|
||||
|
||||
if (phaseTools.length === 0) {
|
||||
return {
|
||||
taskType: 'tool-selection',
|
||||
content: JSON.stringify([]),
|
||||
processingTimeMs: 0,
|
||||
success: true
|
||||
};
|
||||
}
|
||||
|
||||
const prompt = `Wählen Sie 2-3 Methoden/Tools für die Phase "${phase.name}" basierend auf objektiven, fallbezogenen Kriterien.
|
||||
|
||||
SZENARIO: "${context.userQuery}"
|
||||
|
||||
VERFÜGBARE TOOLS FÜR ${phase.name.toUpperCase()}:
|
||||
${phaseTools.map((tool: any) => `- ${tool.name}: ${tool.description.slice(0, 100)}...`).join('\n')}
|
||||
|
||||
Wählen Sie Methoden/Tools nach forensischen Kriterien aus:
|
||||
- Court admissibility und Chain of Custody Kompatibilität
|
||||
- Integration in forensische Standard-Workflows
|
||||
- Reproduzierbarkeit und Dokumentationsqualität
|
||||
- Objektivität
|
||||
|
||||
Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format (kein zusätzlicher Text):
|
||||
[
|
||||
{
|
||||
"toolName": "Exakter Methoden/Tool-Name",
|
||||
"priority": "high|medium|low",
|
||||
"justification": "Objektive Begründung warum diese Methode/Tool für das spezifische Szenario besser geeignet ist"
|
||||
}
|
||||
]`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, context, 450);
|
||||
|
||||
if (result.success) {
|
||||
// FIXED: Safe JSON parsing with validation
|
||||
const selections = this.safeParseJSON(result.content, []);
|
||||
|
||||
if (Array.isArray(selections)) {
|
||||
const validSelections = selections.filter((sel: any) =>
|
||||
sel.toolName && phaseTools.some((tool: any) => tool.name === sel.toolName)
|
||||
);
|
||||
|
||||
validSelections.forEach((sel: any) => {
|
||||
const tool = phaseTools.find((t: any) => t.name === sel.toolName);
|
||||
if (tool) {
|
||||
// FIXED: Use deduplication helper
|
||||
this.addToolToSelection(context, tool, phase.id, sel.priority, sel.justification);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise<MicroTaskResult> {
|
||||
const prompt = `Bewerten Sie diese Methode/Tool fallbezogen für das spezifische Problem nach forensischen Qualitätskriterien.
|
||||
|
||||
PROBLEM: "${context.userQuery}"
|
||||
|
||||
TOOL: ${tool.name}
|
||||
BESCHREIBUNG: ${tool.description}
|
||||
PLATTFORMEN: ${tool.platforms?.join(', ') || 'N/A'}
|
||||
SKILL LEVEL: ${tool.skillLevel}
|
||||
|
||||
Bewerten Sie nach forensischen Standards und antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
|
||||
{
|
||||
"suitability_score": "high|medium|low",
|
||||
"detailed_explanation": "Detaillierte forensische Begründung warum diese Methode/Tool das Problem löst",
|
||||
"implementation_approach": "Konkrete methodische Schritte zur korrekten Anwendung für dieses spezifische Problem",
|
||||
"pros": ["Forensischer Vorteil 1", "Validierter Vorteil 2"],
|
||||
"cons": ["Methodische Limitation 1", "Potenzielle Schwäche 2"],
|
||||
"alternatives": "Alternative Ansätze falls diese Methode/Tool nicht optimal ist"
|
||||
}`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, context, 650);
|
||||
|
||||
if (result.success) {
|
||||
// FIXED: Safe JSON parsing
|
||||
const evaluation = this.safeParseJSON(result.content, {
|
||||
suitability_score: 'medium',
|
||||
detailed_explanation: 'Evaluation failed',
|
||||
implementation_approach: '',
|
||||
pros: [],
|
||||
cons: [],
|
||||
alternatives: ''
|
||||
});
|
||||
|
||||
// FIXED: Use deduplication helper
|
||||
this.addToolToSelection(context, {
|
||||
...tool,
|
||||
evaluation: {
|
||||
...evaluation,
|
||||
rank
|
||||
}
|
||||
}, 'evaluation', evaluation.suitability_score);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async selectBackgroundKnowledge(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const availableConcepts = context.filteredData.concepts;
|
||||
|
||||
if (availableConcepts.length === 0) {
|
||||
return {
|
||||
taskType: 'background-knowledge',
|
||||
content: JSON.stringify([]),
|
||||
processingTimeMs: 0,
|
||||
success: true
|
||||
};
|
||||
}
|
||||
|
||||
const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
|
||||
|
||||
const prompt = `Wählen Sie relevante forensische Konzepte für das Verständnis der empfohlenen Methodik.
|
||||
|
||||
${context.mode === 'workflow' ? 'SZENARIO' : 'PROBLEM'}: "${context.userQuery}"
|
||||
EMPFOHLENE TOOLS: ${selectedToolNames.join(', ')}
|
||||
|
||||
VERFÜGBARE KONZEPTE:
|
||||
${availableConcepts.slice(0, 15).map((concept: any) => `- ${concept.name}: ${concept.description.slice(0, 80)}...`).join('\n')}
|
||||
|
||||
Wählen Sie 2-4 Konzepte aus, die für das Verständnis der forensischen Methodik essentiell sind.
|
||||
|
||||
Antworten Sie AUSSCHLIESSLICH mit diesem JSON-Format:
|
||||
[
|
||||
{
|
||||
"conceptName": "Exakter Konzept-Name",
|
||||
"relevance": "Forensische Relevanz: Warum dieses Konzept für das Verständnis der Methodik kritisch ist"
|
||||
}
|
||||
]`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, context, 400);
|
||||
|
||||
if (result.success) {
|
||||
// FIXED: Safe JSON parsing
|
||||
const selections = this.safeParseJSON(result.content, []);
|
||||
|
||||
if (Array.isArray(selections)) {
|
||||
context.backgroundKnowledge = selections.filter((sel: any) =>
|
||||
sel.conceptName && availableConcepts.some((concept: any) => concept.name === sel.conceptName)
|
||||
).map((sel: any) => ({
|
||||
concept: availableConcepts.find((c: any) => c.name === sel.conceptName),
|
||||
relevance: sel.relevance
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async generateFinalRecommendations(context: AnalysisContext): Promise<MicroTaskResult> {
|
||||
const isWorkflow = context.mode === 'workflow';
|
||||
|
||||
const prompt = isWorkflow ?
|
||||
`Erstellen Sie eine forensisch fundierte Workflow-Empfehlung basierend auf DFIR-Prinzipien.
|
||||
|
||||
SZENARIO: "${context.userQuery}"
|
||||
AUSGEWÄHLTE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Tools ausgewählt'}
|
||||
|
||||
Erstellen Sie konkrete methodische Workflow-Schritte für dieses spezifische Szenario unter Berücksichtigung forensischer Best Practices, Objektivität und rechtlicher Verwertbarkeit.
|
||||
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 120 Wörter.` :
|
||||
|
||||
`Erstellen Sie wichtige methodische Überlegungen für die korrekte Methoden-/Tool-Anwendung.
|
||||
|
||||
PROBLEM: "${context.userQuery}"
|
||||
EMPFOHLENE TOOLS: ${context.selectedTools?.map(st => st.tool.name).join(', ') || 'Keine Methoden/Tools ausgewählt'}
|
||||
|
||||
Geben Sie kritische methodische Überlegungen, Validierungsanforderungen und Qualitätssicherungsmaßnahmen für die korrekte Anwendung der empfohlenen Methoden/Tools.
|
||||
|
||||
WICHTIG: Antworten Sie NUR in fließendem deutschen Text ohne Listen oder Markdown. Maximum 100 Wörter.`;
|
||||
|
||||
const result = await this.callMicroTaskAI(prompt, context, 180);
|
||||
return result;
|
||||
}
|
||||
|
||||
private async callAI(prompt: string, maxTokens: number = 1000): Promise<string> {
|
||||
const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${this.config.apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.config.model,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
max_tokens: maxTokens,
|
||||
temperature: 0.3
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
throw new Error(`AI API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const content = data.choices?.[0]?.message?.content;
|
||||
|
||||
if (!content) {
|
||||
throw new Error('No response from AI model');
|
||||
}
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
async processQuery(userQuery: string, mode: string): Promise<AnalysisResult> {
|
||||
const startTime = Date.now();
|
||||
let completedTasks = 0;
|
||||
let failedTasks = 0;
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity`);
|
||||
|
||||
try {
|
||||
// Stage 1: Get intelligent candidates (embeddings + AI selection)
|
||||
const toolsData = await getCompressedToolsDataForAI();
|
||||
const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
|
||||
|
||||
// FIXED: Initialize context with proper state management
|
||||
const context: AnalysisContext = {
|
||||
userQuery,
|
||||
mode,
|
||||
filteredData,
|
||||
contextHistory: [],
|
||||
maxContextLength: this.maxContextTokens,
|
||||
currentContextLength: 0,
|
||||
seenToolNames: new Set<string>() // FIXED: Add deduplication tracking
|
||||
};
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
|
||||
|
||||
// MICRO-TASK SEQUENCE
|
||||
|
||||
// Task 1: Scenario/Problem Analysis
|
||||
const analysisResult = await this.analyzeScenario(context);
|
||||
if (analysisResult.success) completedTasks++; else failedTasks++;
|
||||
await this.delay(this.microTaskDelay);
|
||||
|
||||
// Task 2: Investigation/Solution Approach
|
||||
const approachResult = await this.generateApproach(context);
|
||||
if (approachResult.success) completedTasks++; else failedTasks++;
|
||||
await this.delay(this.microTaskDelay);
|
||||
|
||||
// Task 3: Critical Considerations
|
||||
const considerationsResult = await this.generateCriticalConsiderations(context);
|
||||
if (considerationsResult.success) completedTasks++; else failedTasks++;
|
||||
await this.delay(this.microTaskDelay);
|
||||
|
||||
// Task 4: Tool Selection/Evaluation (mode-dependent)
|
||||
if (mode === 'workflow') {
|
||||
// Select tools for each phase
|
||||
const phases = toolsData.phases || [];
|
||||
for (const phase of phases) {
|
||||
const toolSelectionResult = await this.selectToolsForPhase(context, phase);
|
||||
if (toolSelectionResult.success) completedTasks++; else failedTasks++;
|
||||
await this.delay(this.microTaskDelay);
|
||||
}
|
||||
} else {
|
||||
// Evaluate top 3 tools for specific problem
|
||||
const topTools = filteredData.tools.slice(0, 3);
|
||||
for (let i = 0; i < topTools.length; i++) {
|
||||
const evaluationResult = await this.evaluateSpecificTool(context, topTools[i], i + 1);
|
||||
if (evaluationResult.success) completedTasks++; else failedTasks++;
|
||||
await this.delay(this.microTaskDelay);
|
||||
}
|
||||
}
|
||||
|
||||
// Task 5: Background Knowledge Selection
|
||||
const knowledgeResult = await this.selectBackgroundKnowledge(context);
|
||||
if (knowledgeResult.success) completedTasks++; else failedTasks++;
|
||||
await this.delay(this.microTaskDelay);
|
||||
|
||||
// Task 6: Final Recommendations
|
||||
const finalResult = await this.generateFinalRecommendations(context);
|
||||
if (finalResult.success) completedTasks++; else failedTasks++;
|
||||
|
||||
// Build final recommendation
|
||||
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
|
||||
|
||||
const processingStats = {
|
||||
embeddingsUsed: embeddingsService.isEnabled(),
|
||||
candidatesFromEmbeddings: filteredData.tools.length,
|
||||
finalSelectedItems: (context.selectedTools?.length || 0) +
|
||||
(context.backgroundKnowledge?.length || 0),
|
||||
processingTimeMs: Date.now() - startTime,
|
||||
microTasksCompleted: completedTasks,
|
||||
microTasksFailed: failedTasks,
|
||||
contextContinuityUsed: true
|
||||
};
|
||||
|
||||
console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
|
||||
console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
|
||||
|
||||
return {
|
||||
recommendation,
|
||||
processingStats
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
console.error('[IMPROVED PIPELINE] Processing failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Build recommendation (same structure but using fixed context)
|
||||
private buildRecommendation(context: AnalysisContext, mode: string, finalContent: string): any {
|
||||
const isWorkflow = mode === 'workflow';
|
||||
|
||||
const base = {
|
||||
[isWorkflow ? 'scenario_analysis' : 'problem_analysis']:
|
||||
isWorkflow ? context.scenarioAnalysis : context.problemAnalysis,
|
||||
investigation_approach: context.investigationApproach,
|
||||
critical_considerations: context.criticalConsiderations,
|
||||
background_knowledge: context.backgroundKnowledge?.map(bk => ({
|
||||
concept_name: bk.concept.name,
|
||||
relevance: bk.relevance
|
||||
})) || []
|
||||
};
|
||||
|
||||
if (isWorkflow) {
|
||||
return {
|
||||
...base,
|
||||
recommended_tools: context.selectedTools?.map(st => ({
|
||||
name: st.tool.name,
|
||||
phase: st.phase,
|
||||
priority: st.priority,
|
||||
justification: st.justification || `Empfohlen für ${st.phase}`
|
||||
})) || [],
|
||||
workflow_suggestion: finalContent
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
...base,
|
||||
recommended_tools: context.selectedTools?.map(st => ({
|
||||
name: st.tool.name,
|
||||
rank: st.tool.evaluation?.rank || 1,
|
||||
suitability_score: st.priority,
|
||||
detailed_explanation: st.tool.evaluation?.detailed_explanation || '',
|
||||
implementation_approach: st.tool.evaluation?.implementation_approach || '',
|
||||
pros: st.tool.evaluation?.pros || [],
|
||||
cons: st.tool.evaluation?.cons || [],
|
||||
alternatives: st.tool.evaluation?.alternatives || ''
|
||||
})) || [],
|
||||
additional_considerations: finalContent
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Global instance
|
||||
const aiPipeline = new ImprovedMicroTaskAIPipeline();
|
||||
|
||||
export { aiPipeline, type AnalysisResult };
|
@ -1,3 +1,4 @@
|
||||
// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
|
||||
import { promises as fs } from 'fs';
|
||||
import { load } from 'js-yaml';
|
||||
import path from 'path';
|
||||
@ -21,30 +22,44 @@ const ToolSchema = z.object({
|
||||
accessType: z.string().optional().nullable(),
|
||||
'domain-agnostic-software': z.array(z.string()).optional().nullable(),
|
||||
related_concepts: z.array(z.string()).optional().nullable().default([]),
|
||||
related_software: z.array(z.string()).optional().nullable().default([]), // Added this line
|
||||
related_software: z.array(z.string()).optional().nullable().default([]),
|
||||
});
|
||||
|
||||
const ToolsDataSchema = z.object({
|
||||
tools: z.array(ToolSchema),
|
||||
domains: z.array(z.object({
|
||||
id: z.string(),
|
||||
name: z.string()
|
||||
name: z.string(),
|
||||
description: z.string().optional()
|
||||
})),
|
||||
phases: z.array(z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
description: z.string().optional()
|
||||
description: z.string().optional(),
|
||||
typical_tools: z.array(z.string()).optional().default([]),
|
||||
key_activities: z.array(z.string()).optional().default([])
|
||||
})),
|
||||
'domain-agnostic-software': z.array(z.object({
|
||||
id: z.string(),
|
||||
name: z.string(),
|
||||
description: z.string().optional()
|
||||
description: z.string().optional(),
|
||||
use_cases: z.array(z.string()).optional().default([])
|
||||
})).optional().default([]),
|
||||
scenarios: z.array(z.object({
|
||||
id: z.string(),
|
||||
icon: z.string(),
|
||||
friendly_name: z.string()
|
||||
friendly_name: z.string(),
|
||||
description: z.string().optional(),
|
||||
typical_phases: z.array(z.string()).optional().default([]),
|
||||
complexity: z.enum(['low', 'medium', 'high']).optional()
|
||||
})).optional().default([]),
|
||||
skill_levels: z.object({
|
||||
novice: z.string().optional(),
|
||||
beginner: z.string().optional(),
|
||||
intermediate: z.string().optional(),
|
||||
advanced: z.string().optional(),
|
||||
expert: z.string().optional()
|
||||
}).optional().default({})
|
||||
});
|
||||
|
||||
interface ToolsData {
|
||||
@ -53,20 +68,49 @@ interface ToolsData {
|
||||
phases: any[];
|
||||
'domain-agnostic-software': any[];
|
||||
scenarios: any[];
|
||||
skill_levels?: any;
|
||||
}
|
||||
|
||||
interface CompressedToolsData {
|
||||
interface EnhancedCompressedToolsData {
|
||||
tools: any[];
|
||||
concepts: any[];
|
||||
domains: any[];
|
||||
phases: any[];
|
||||
'domain-agnostic-software': any[];
|
||||
scenarios?: any[]; // Optional for AI processing
|
||||
skill_levels: any;
|
||||
// Enhanced context for micro-tasks
|
||||
domain_relationships: DomainRelationship[];
|
||||
phase_dependencies: PhaseDependency[];
|
||||
tool_compatibility_matrix: CompatibilityMatrix[];
|
||||
}
|
||||
|
||||
interface DomainRelationship {
|
||||
domain_id: string;
|
||||
tool_count: number;
|
||||
common_tags: string[];
|
||||
skill_distribution: Record<string, number>;
|
||||
}
|
||||
|
||||
interface PhaseDependency {
|
||||
phase_id: string;
|
||||
order: number;
|
||||
depends_on: string | null;
|
||||
enables: string | null;
|
||||
is_parallel_capable: boolean;
|
||||
typical_duration: string;
|
||||
}
|
||||
|
||||
interface CompatibilityMatrix {
|
||||
type: string;
|
||||
groups: Record<string, string[]>;
|
||||
}
|
||||
|
||||
let cachedData: ToolsData | null = null;
|
||||
let cachedRandomizedData: ToolsData | null = null;
|
||||
let cachedCompressedData: CompressedToolsData | null = null;
|
||||
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
|
||||
let lastRandomizationDate: string | null = null;
|
||||
let dataVersion: string | null = null;
|
||||
|
||||
function seededRandom(seed: number): () => number {
|
||||
let x = Math.sin(seed) * 10000;
|
||||
@ -91,6 +135,115 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
|
||||
return shuffled;
|
||||
}
|
||||
|
||||
function generateDataVersion(data: any): string {
|
||||
const str = JSON.stringify(data, Object.keys(data).sort());
|
||||
let hash = 0;
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
hash = ((hash << 5) - hash) + char;
|
||||
hash = hash & hash;
|
||||
}
|
||||
return Math.abs(hash).toString(36);
|
||||
}
|
||||
|
||||
// Enhanced: Generate domain relationships for better AI understanding
|
||||
function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
|
||||
const relationships: DomainRelationship[] = [];
|
||||
|
||||
for (const domain of domains) {
|
||||
const domainTools = tools.filter(tool =>
|
||||
tool.domains && tool.domains.includes(domain.id)
|
||||
);
|
||||
|
||||
const commonTags = domainTools
|
||||
.flatMap(tool => tool.tags || [])
|
||||
.reduce((acc: any, tag: string) => {
|
||||
acc[tag] = (acc[tag] || 0) + 1;
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const topTags = Object.entries(commonTags)
|
||||
.sort(([,a], [,b]) => (b as number) - (a as number))
|
||||
.slice(0, 5)
|
||||
.map(([tag]) => tag);
|
||||
|
||||
relationships.push({
|
||||
domain_id: domain.id,
|
||||
tool_count: domainTools.length,
|
||||
common_tags: topTags,
|
||||
skill_distribution: domainTools.reduce((acc: any, tool: any) => {
|
||||
acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
|
||||
return acc;
|
||||
}, {})
|
||||
});
|
||||
}
|
||||
|
||||
return relationships;
|
||||
}
|
||||
|
||||
// Enhanced: Generate phase dependencies
|
||||
function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
|
||||
const dependencies: PhaseDependency[] = [];
|
||||
|
||||
for (let i = 0; i < phases.length; i++) {
|
||||
const phase = phases[i];
|
||||
const nextPhase = phases[i + 1];
|
||||
const prevPhase = phases[i - 1];
|
||||
|
||||
dependencies.push({
|
||||
phase_id: phase.id,
|
||||
order: i + 1,
|
||||
depends_on: prevPhase?.id || null,
|
||||
enables: nextPhase?.id || null,
|
||||
is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
|
||||
typical_duration: phase.id === 'data-collection' ? 'hours-days' :
|
||||
phase.id === 'examination' ? 'hours-weeks' :
|
||||
phase.id === 'analysis' ? 'days-weeks' :
|
||||
'hours-days'
|
||||
});
|
||||
}
|
||||
|
||||
return dependencies;
|
||||
}
|
||||
|
||||
// Enhanced: Generate tool compatibility matrix
|
||||
function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
|
||||
const matrix: CompatibilityMatrix[] = [];
|
||||
|
||||
// Group tools by common characteristics
|
||||
const platformGroups = tools.reduce((acc: any, tool: any) => {
|
||||
if (tool.platforms) {
|
||||
tool.platforms.forEach((platform: string) => {
|
||||
if (!acc[platform]) acc[platform] = [];
|
||||
acc[platform].push(tool.name);
|
||||
});
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
const phaseGroups = tools.reduce((acc: any, tool: any) => {
|
||||
if (tool.phases) {
|
||||
tool.phases.forEach((phase: string) => {
|
||||
if (!acc[phase]) acc[phase] = [];
|
||||
acc[phase].push(tool.name);
|
||||
});
|
||||
}
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
matrix.push({
|
||||
type: 'platform_compatibility',
|
||||
groups: platformGroups
|
||||
});
|
||||
|
||||
matrix.push({
|
||||
type: 'phase_synergy',
|
||||
groups: phaseGroups
|
||||
});
|
||||
|
||||
return matrix;
|
||||
}
|
||||
|
||||
async function loadRawData(): Promise<ToolsData> {
|
||||
if (!cachedData) {
|
||||
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
||||
@ -99,6 +252,21 @@ async function loadRawData(): Promise<ToolsData> {
|
||||
|
||||
try {
|
||||
cachedData = ToolsDataSchema.parse(rawData);
|
||||
|
||||
// Enhanced: Add default skill level descriptions if not provided
|
||||
if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
|
||||
cachedData.skill_levels = {
|
||||
novice: "Minimal technical background required, guided interfaces",
|
||||
beginner: "Basic IT knowledge, some command-line familiarity helpful",
|
||||
intermediate: "Solid technical foundation, comfortable with various tools",
|
||||
advanced: "Extensive experience, deep technical understanding required",
|
||||
expert: "Specialist knowledge, cutting-edge techniques and complex scenarios"
|
||||
};
|
||||
}
|
||||
|
||||
dataVersion = generateDataVersion(cachedData);
|
||||
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('YAML validation failed:', error);
|
||||
throw new Error('Invalid tools.yaml structure');
|
||||
@ -123,47 +291,88 @@ export async function getToolsData(): Promise<ToolsData> {
|
||||
};
|
||||
|
||||
lastRandomizationDate = today;
|
||||
|
||||
cachedCompressedData = null;
|
||||
}
|
||||
|
||||
return cachedRandomizedData;
|
||||
}
|
||||
|
||||
export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData> {
|
||||
export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedToolsData> {
|
||||
if (!cachedCompressedData) {
|
||||
const data = await getToolsData();
|
||||
|
||||
// Enhanced: More detailed tool information for micro-tasks
|
||||
const compressedTools = data.tools
|
||||
.filter(tool => tool.type !== 'concept')
|
||||
.map(tool => {
|
||||
const { projectUrl, statusUrl, ...compressedTool } = tool;
|
||||
return compressedTool;
|
||||
return {
|
||||
...compressedTool,
|
||||
// Enhanced: Add computed fields for AI
|
||||
is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
|
||||
is_open_source: tool.license && tool.license !== 'Proprietary',
|
||||
complexity_score: tool.skillLevel === 'expert' ? 5 :
|
||||
tool.skillLevel === 'advanced' ? 4 :
|
||||
tool.skillLevel === 'intermediate' ? 3 :
|
||||
tool.skillLevel === 'beginner' ? 2 : 1,
|
||||
// Enhanced: Phase-specific suitability hints
|
||||
phase_suitability: tool.phases?.map(phase => ({
|
||||
phase,
|
||||
primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
|
||||
})) || []
|
||||
};
|
||||
});
|
||||
|
||||
const concepts = data.tools
|
||||
.filter(tool => tool.type === 'concept')
|
||||
.map(concept => {
|
||||
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
|
||||
return compressedConcept;
|
||||
return {
|
||||
...compressedConcept,
|
||||
// Enhanced: Learning difficulty indicator
|
||||
learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
|
||||
concept.skillLevel === 'advanced' ? 'high' :
|
||||
concept.skillLevel === 'intermediate' ? 'medium' :
|
||||
'low'
|
||||
};
|
||||
});
|
||||
|
||||
// Enhanced: Add rich context data
|
||||
const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
|
||||
const phaseDependencies = generatePhaseDependencies(data.phases);
|
||||
const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
|
||||
|
||||
cachedCompressedData = {
|
||||
tools: compressedTools,
|
||||
concepts: concepts,
|
||||
domains: data.domains,
|
||||
phases: data.phases,
|
||||
'domain-agnostic-software': data['domain-agnostic-software']
|
||||
// scenarios intentionally excluded from AI data
|
||||
'domain-agnostic-software': data['domain-agnostic-software'],
|
||||
scenarios: data.scenarios, // Include scenarios for context
|
||||
skill_levels: data.skill_levels || {},
|
||||
// Enhanced context for micro-tasks
|
||||
domain_relationships: domainRelationships,
|
||||
phase_dependencies: phaseDependencies,
|
||||
tool_compatibility_matrix: toolCompatibilityMatrix
|
||||
};
|
||||
|
||||
console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
|
||||
console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
|
||||
}
|
||||
|
||||
return cachedCompressedData;
|
||||
}
|
||||
|
||||
export function getDataVersion(): string | null {
|
||||
return dataVersion;
|
||||
}
|
||||
|
||||
export function clearCache(): void {
|
||||
cachedData = null;
|
||||
cachedRandomizedData = null;
|
||||
cachedCompressedData = null;
|
||||
lastRandomizationDate = null;
|
||||
dataVersion = null;
|
||||
|
||||
console.log('[DATA SERVICE] Enhanced cache cleared');
|
||||
}
|
267
src/utils/embeddings.ts
Normal file
267
src/utils/embeddings.ts
Normal file
@ -0,0 +1,267 @@
|
||||
// src/utils/embeddings.ts
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import { getCompressedToolsDataForAI } from './dataService.js';
|
||||
|
||||
interface EmbeddingData {
|
||||
id: string;
|
||||
type: 'tool' | 'concept';
|
||||
name: string;
|
||||
content: string;
|
||||
embedding: number[];
|
||||
metadata: {
|
||||
domains?: string[];
|
||||
phases?: string[];
|
||||
tags?: string[];
|
||||
skillLevel?: string;
|
||||
type?: string;
|
||||
};
|
||||
}
|
||||
|
||||
interface EmbeddingsDatabase {
|
||||
version: string;
|
||||
lastUpdated: number;
|
||||
embeddings: EmbeddingData[];
|
||||
}
|
||||
|
||||
class EmbeddingsService {
|
||||
private embeddings: EmbeddingData[] = [];
|
||||
private isInitialized = false;
|
||||
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
|
||||
private readonly batchSize: number;
|
||||
private readonly batchDelay: number;
|
||||
private readonly enabled: boolean;
|
||||
|
||||
constructor() {
|
||||
this.enabled = process.env.AI_EMBEDDINGS_ENABLED === 'true';
|
||||
this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
|
||||
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
if (!this.enabled) {
|
||||
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
console.log('[EMBEDDINGS] Initializing embeddings system...');
|
||||
|
||||
// Create data directory if it doesn't exist
|
||||
await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
|
||||
|
||||
const toolsData = await getCompressedToolsDataForAI();
|
||||
const currentDataHash = this.hashData(toolsData);
|
||||
|
||||
// Try to load existing embeddings
|
||||
const existingEmbeddings = await this.loadEmbeddings();
|
||||
|
||||
if (existingEmbeddings && existingEmbeddings.version === currentDataHash) {
|
||||
console.log('[EMBEDDINGS] Using cached embeddings');
|
||||
this.embeddings = existingEmbeddings.embeddings;
|
||||
} else {
|
||||
console.log('[EMBEDDINGS] Generating new embeddings...');
|
||||
await this.generateEmbeddings(toolsData, currentDataHash);
|
||||
}
|
||||
|
||||
this.isInitialized = true;
|
||||
console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('[EMBEDDINGS] Failed to initialize:', error);
|
||||
this.isInitialized = false;
|
||||
}
|
||||
}
|
||||
|
||||
private hashData(data: any): string {
|
||||
return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
|
||||
}
|
||||
|
||||
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
|
||||
try {
|
||||
const data = await fs.readFile(this.embeddingsPath, 'utf8');
|
||||
return JSON.parse(data);
|
||||
} catch (error) {
|
||||
console.log('[EMBEDDINGS] No existing embeddings found');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private async saveEmbeddings(version: string): Promise<void> {
|
||||
const database: EmbeddingsDatabase = {
|
||||
version,
|
||||
lastUpdated: Date.now(),
|
||||
embeddings: this.embeddings
|
||||
};
|
||||
|
||||
await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
|
||||
console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
|
||||
}
|
||||
|
||||
private createContentString(item: any): string {
|
||||
const parts = [
|
||||
item.name,
|
||||
item.description || '',
|
||||
...(item.tags || []),
|
||||
...(item.domains || []),
|
||||
...(item.phases || [])
|
||||
];
|
||||
|
||||
return parts.filter(Boolean).join(' ').toLowerCase();
|
||||
}
|
||||
|
||||
private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
|
||||
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
||||
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
|
||||
const model = process.env.AI_EMBEDDINGS_MODEL;
|
||||
|
||||
if (!endpoint || !apiKey || !model) {
|
||||
throw new Error('Missing embeddings API configuration');
|
||||
}
|
||||
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${apiKey}`
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
input: contents
|
||||
})
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.text();
|
||||
throw new Error(`Embeddings API error: ${response.status} - ${error}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
return data.data.map((item: any) => item.embedding);
|
||||
}
|
||||
|
||||
private async generateEmbeddings(toolsData: any, version: string): Promise<void> {
|
||||
const allItems = [
|
||||
...toolsData.tools.map((tool: any) => ({ ...tool, type: 'tool' })),
|
||||
...toolsData.concepts.map((concept: any) => ({ ...concept, type: 'concept' }))
|
||||
];
|
||||
|
||||
const contents = allItems.map(item => this.createContentString(item));
|
||||
this.embeddings = [];
|
||||
|
||||
// Process in batches to respect rate limits
|
||||
for (let i = 0; i < contents.length; i += this.batchSize) {
|
||||
const batch = contents.slice(i, i + this.batchSize);
|
||||
const batchItems = allItems.slice(i, i + this.batchSize);
|
||||
|
||||
console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
|
||||
|
||||
try {
|
||||
const embeddings = await this.generateEmbeddingsBatch(batch);
|
||||
|
||||
embeddings.forEach((embedding, index) => {
|
||||
const item = batchItems[index];
|
||||
this.embeddings.push({
|
||||
id: `${item.type}_${item.name.replace(/[^a-zA-Z0-9]/g, '_')}`,
|
||||
type: item.type,
|
||||
name: item.name,
|
||||
content: batch[index],
|
||||
embedding,
|
||||
metadata: {
|
||||
domains: item.domains,
|
||||
phases: item.phases,
|
||||
tags: item.tags,
|
||||
skillLevel: item.skillLevel,
|
||||
type: item.type
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Rate limiting delay between batches
|
||||
if (i + this.batchSize < contents.length) {
|
||||
await new Promise(resolve => setTimeout(resolve, this.batchDelay));
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
await this.saveEmbeddings(version);
|
||||
}
|
||||
|
||||
public async embedText(text: string): Promise<number[]> {
|
||||
// Re‑use the private batch helper to avoid auth duplication
|
||||
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
|
||||
return embedding;
|
||||
}
|
||||
|
||||
private cosineSimilarity(a: number[], b: number[]): number {
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
dotProduct += a[i] * b[i];
|
||||
normA += a[i] * a[i];
|
||||
normB += b[i] * b[i];
|
||||
}
|
||||
|
||||
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
||||
}
|
||||
|
||||
async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<EmbeddingData[]> {
|
||||
if (!this.enabled || !this.isInitialized || this.embeddings.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
// Generate embedding for query
|
||||
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
|
||||
const queryEmbedding = queryEmbeddings[0];
|
||||
|
||||
// Calculate similarities
|
||||
const similarities = this.embeddings.map(item => ({
|
||||
...item,
|
||||
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
||||
}));
|
||||
|
||||
// Filter by threshold and sort by similarity
|
||||
return similarities
|
||||
.filter(item => item.similarity >= threshold)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, maxResults);
|
||||
|
||||
} catch (error) {
|
||||
console.error('[EMBEDDINGS] Failed to find similar items:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
isEnabled(): boolean {
|
||||
return this.enabled && this.isInitialized;
|
||||
}
|
||||
|
||||
getStats(): { enabled: boolean; initialized: boolean; count: number } {
|
||||
return {
|
||||
enabled: this.enabled,
|
||||
initialized: this.isInitialized,
|
||||
count: this.embeddings.length
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Global instance
|
||||
const embeddingsService = new EmbeddingsService();
|
||||
|
||||
export { embeddingsService, type EmbeddingData };
|
||||
|
||||
// Auto-initialize on import in server environment
|
||||
if (typeof window === 'undefined' && process.env.NODE_ENV !== 'test') {
|
||||
embeddingsService.initialize().catch(error => {
|
||||
console.error('[EMBEDDINGS] Auto-initialization failed:', error);
|
||||
});
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
// src/utils/rateLimitedQueue.ts
|
||||
// src/utils/rateLimitedQueue.ts - FIXED: Memory leak and better cleanup
|
||||
|
||||
import dotenv from "dotenv";
|
||||
|
||||
@ -32,6 +32,43 @@ class RateLimitedQueue {
|
||||
private lastProcessedAt = 0;
|
||||
private currentlyProcessingTaskId: string | null = null;
|
||||
|
||||
private cleanupInterval: NodeJS.Timeout;
|
||||
private readonly TASK_RETENTION_MS = 30000;
|
||||
|
||||
constructor() {
|
||||
this.cleanupInterval = setInterval(() => {
|
||||
this.cleanupOldTasks();
|
||||
}, 30000);
|
||||
}
|
||||
|
||||
private cleanupOldTasks(): void {
|
||||
const now = Date.now();
|
||||
const initialLength = this.tasks.length;
|
||||
|
||||
this.tasks = this.tasks.filter(task => {
|
||||
if (task.status === 'queued' || task.status === 'processing') {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (task.completedAt && (now - task.completedAt) > this.TASK_RETENTION_MS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
const cleaned = initialLength - this.tasks.length;
|
||||
if (cleaned > 0) {
|
||||
console.log(`[QUEUE] Cleaned up ${cleaned} old tasks, ${this.tasks.length} remaining`);
|
||||
}
|
||||
}
|
||||
|
||||
public shutdown(): void {
|
||||
if (this.cleanupInterval) {
|
||||
clearInterval(this.cleanupInterval);
|
||||
}
|
||||
}
|
||||
|
||||
add<T>(task: Task<T>, taskId?: string): Promise<T> {
|
||||
const id = taskId || this.generateTaskId();
|
||||
|
||||
@ -103,7 +140,6 @@ class RateLimitedQueue {
|
||||
const processingOffset = processingTasks.length > 0 ? 1 : 0;
|
||||
status.currentPosition = processingOffset + positionInQueue + 1;
|
||||
}
|
||||
} else if (task.status === 'completed' || task.status === 'failed') {
|
||||
}
|
||||
} else {
|
||||
const taskTimestamp = taskId.match(/ai_(\d+)_/)?.[1];
|
||||
@ -152,7 +188,6 @@ class RateLimitedQueue {
|
||||
this.currentlyProcessingTaskId = nextTask.id;
|
||||
this.lastProcessedAt = Date.now();
|
||||
|
||||
|
||||
try {
|
||||
await nextTask.task();
|
||||
nextTask.status = 'completed';
|
||||
@ -166,14 +201,6 @@ class RateLimitedQueue {
|
||||
|
||||
this.currentlyProcessingTaskId = null;
|
||||
|
||||
setTimeout(() => {
|
||||
const index = this.tasks.findIndex(t => t.id === nextTask.id);
|
||||
if (index >= 0) {
|
||||
console.log(`[QUEUE] Removing completed task ${nextTask.id}`);
|
||||
this.tasks.splice(index, 1);
|
||||
}
|
||||
}, 10000);
|
||||
|
||||
const hasMoreQueued = this.tasks.some(t => t.status === 'queued');
|
||||
if (hasMoreQueued) {
|
||||
console.log(`[QUEUE] Waiting ${this.delayMs}ms before next task`);
|
||||
@ -201,4 +228,8 @@ export function getQueueStatus(taskId?: string): QueueStatus {
|
||||
return queue.getStatus(taskId);
|
||||
}
|
||||
|
||||
export function shutdownQueue(): void {
|
||||
queue.shutdown();
|
||||
}
|
||||
|
||||
export default queue;
|
Loading…
x
Reference in New Issue
Block a user