bugfixing in embeddings api
This commit is contained in:
276
.env.example
276
.env.example
@@ -1,17 +1,17 @@
|
||||
# ============================================================================
|
||||
# ForensicPathways Environment Configuration
|
||||
# ForensicPathways Environment Configuration - COMPLETE
|
||||
# ============================================================================
|
||||
# Copy this file to .env and adjust the values below.
|
||||
# Settings are ordered by likelihood of needing adjustment during setup.
|
||||
# This file covers ALL environment variables used in the codebase.
|
||||
|
||||
# ============================================================================
|
||||
# 1. CORE APPLICATION SETTINGS (REQUIRED - ADJUST FOR YOUR SETUP)
|
||||
# 1. CORE APPLICATION SETTINGS (REQUIRED)
|
||||
# ============================================================================
|
||||
|
||||
# Your application's public URL (used for redirects and links)
|
||||
PUBLIC_BASE_URL=http://localhost:4321
|
||||
|
||||
# Application environment (development, production, staging)
|
||||
# Application environment
|
||||
NODE_ENV=development
|
||||
|
||||
# Secret key for session encryption (CHANGE IN PRODUCTION!)
|
||||
@@ -22,19 +22,99 @@ AUTH_SECRET=your-secret-key-change-in-production-please
|
||||
# ============================================================================
|
||||
|
||||
# Main AI Analysis Service (for query processing and recommendations)
|
||||
# Example uses Mistral AI - adjust endpoint/model as needed
|
||||
AI_ANALYZER_ENDPOINT=https://api.mistral.ai/v1
|
||||
AI_ANALYZER_API_KEY=your-mistral-api-key-here
|
||||
AI_ANALYZER_MODEL=mistral-small-latest
|
||||
# Examples: http://localhost:11434 (Ollama), https://api.mistral.ai, https://api.openai.com
|
||||
AI_ANALYZER_ENDPOINT=https://api.mistral.ai/v1/chat/completions
|
||||
AI_ANALYZER_API_KEY=
|
||||
AI_ANALYZER_MODEL=mistral/mistral-small-latest
|
||||
|
||||
# Vector Embeddings Service (for semantic search - can use same provider)
|
||||
# Vector Embeddings Service (for semantic search)
|
||||
# Leave API_KEY empty for Ollama, use actual key for cloud services
|
||||
AI_EMBEDDINGS_ENABLED=true
|
||||
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
|
||||
AI_EMBEDDINGS_API_KEY=your-mistral-api-key-here
|
||||
AI_EMBEDDINGS_API_KEY=
|
||||
AI_EMBEDDINGS_MODEL=mistral-embed
|
||||
|
||||
# ============================================================================
|
||||
# 3. AUTHENTICATION (OPTIONAL - SET TO 'true' IF NEEDED)
|
||||
# 3. AI PIPELINE CONFIGURATION (CONTEXT & PERFORMANCE TUNING)
|
||||
# ============================================================================
|
||||
|
||||
# === SIMILARITY SEARCH STAGE ===
|
||||
# How many similar tools/concepts embeddings search returns as candidates
|
||||
# 🔍 This is the FIRST filter - vector similarity matching
|
||||
# Lower = faster, less comprehensive | Higher = slower, more comprehensive
|
||||
AI_EMBEDDING_CANDIDATES=40
|
||||
|
||||
# Minimum similarity score threshold (0.0-1.0)
|
||||
# Lower = more results but less relevant | Higher = fewer but more relevant
|
||||
AI_SIMILARITY_THRESHOLD=0.3
|
||||
|
||||
# === AI SELECTION STAGE ===
|
||||
# Maximum tools the AI can select from embedding candidates
|
||||
# 🤖 This is the SECOND filter - AI intelligent selection
|
||||
# Should be ≤ AI_EMBEDDING_CANDIDATES
|
||||
AI_MAX_SELECTED_ITEMS=25
|
||||
|
||||
# Maximum tools sent to AI for detailed analysis (micro-tasks)
|
||||
# 📋 This is the FINAL context size sent to AI models
|
||||
# Lower = less AI context, faster responses | Higher = more context, slower
|
||||
AI_MAX_TOOLS_TO_ANALYZE=20
|
||||
|
||||
# Maximum concepts sent to AI for background knowledge selection
|
||||
# 📚 Concepts are smaller than tools, so can be higher
|
||||
AI_MAX_CONCEPTS_TO_ANALYZE=10
|
||||
|
||||
# === CONTEXT FLOW SUMMARY ===
|
||||
# 1. Vector Search: 111 total tools → AI_EMBEDDING_CANDIDATES (40) most similar
|
||||
# 2. AI Selection: 40 candidates → AI_MAX_SELECTED_ITEMS (25) best matches
|
||||
# 3. AI Analysis: 25 selected → AI_MAX_TOOLS_TO_ANALYZE (20) for micro-tasks
|
||||
# 4. Final Output: Recommendations based on analyzed subset
|
||||
|
||||
# ============================================================================
|
||||
# 4. AI PERFORMANCE & RATE LIMITING
|
||||
# ============================================================================
|
||||
|
||||
# === USER RATE LIMITS (per minute) ===
|
||||
# Main queries per user per minute
|
||||
AI_RATE_LIMIT_MAX_REQUESTS=4
|
||||
|
||||
# Total AI micro-task calls per user per minute (across all micro-tasks)
|
||||
AI_MICRO_TASK_TOTAL_LIMIT=30
|
||||
|
||||
# === PIPELINE TIMING ===
|
||||
# Delay between micro-tasks within a single query (milliseconds)
|
||||
# Higher = gentler on AI service | Lower = faster responses
|
||||
AI_MICRO_TASK_DELAY_MS=500
|
||||
|
||||
# Delay between queued requests (milliseconds)
|
||||
AI_RATE_LIMIT_DELAY_MS=2000
|
||||
|
||||
# === EMBEDDINGS BATCH PROCESSING ===
|
||||
# How many embeddings to generate per API call
|
||||
AI_EMBEDDINGS_BATCH_SIZE=10
|
||||
|
||||
# Delay between embedding batches (milliseconds)
|
||||
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
|
||||
|
||||
# ============================================================================
|
||||
# 5. AI CONTEXT & TOKEN MANAGEMENT
|
||||
# ============================================================================
|
||||
|
||||
# Maximum context tokens to maintain across micro-tasks
|
||||
# Controls how much conversation history is preserved between AI calls
|
||||
AI_MAX_CONTEXT_TOKENS=3000
|
||||
|
||||
# Maximum tokens per individual AI prompt
|
||||
# Larger = more context per call | Smaller = faster responses
|
||||
AI_MAX_PROMPT_TOKENS=1200
|
||||
|
||||
# Timeout for individual micro-tasks (milliseconds)
|
||||
AI_MICRO_TASK_TIMEOUT_MS=25000
|
||||
|
||||
# Maximum size of the processing queue
|
||||
AI_QUEUE_MAX_SIZE=50
|
||||
|
||||
# ============================================================================
|
||||
# 6. AUTHENTICATION & AUTHORIZATION (OPTIONAL)
|
||||
# ============================================================================
|
||||
|
||||
# Enable authentication for different features
|
||||
@@ -48,30 +128,47 @@ OIDC_CLIENT_ID=your-client-id
|
||||
OIDC_CLIENT_SECRET=your-client-secret
|
||||
|
||||
# ============================================================================
|
||||
# 4. ADVANCED AI CONFIGURATION (FINE-TUNING - DEFAULT VALUES USUALLY WORK)
|
||||
# 7. FILE UPLOADS - NEXTCLOUD INTEGRATION (OPTIONAL)
|
||||
# ============================================================================
|
||||
|
||||
# Pipeline Performance Settings
|
||||
AI_MAX_SELECTED_ITEMS=60 # Tools analyzed per micro-task
|
||||
AI_EMBEDDING_CANDIDATES=60 # Vector search candidates
|
||||
AI_MICRO_TASK_DELAY_MS=500 # Delay between AI micro-tasks
|
||||
# Nextcloud server for file uploads (knowledgebase contributions)
|
||||
# Leave empty to disable file upload functionality
|
||||
NEXTCLOUD_ENDPOINT=https://your-nextcloud.com
|
||||
|
||||
# Rate Limiting (requests per minute)
|
||||
AI_RATE_LIMIT_MAX_REQUESTS=6 # Main query rate limit
|
||||
AI_MICRO_TASK_RATE_LIMIT=15 # Micro-task rate limit
|
||||
AI_RATE_LIMIT_DELAY_MS=3000 # Delay between rate-limited calls
|
||||
# Nextcloud credentials (app password recommended)
|
||||
NEXTCLOUD_USERNAME=your-username
|
||||
NEXTCLOUD_PASSWORD=your-app-password
|
||||
|
||||
# Embeddings Batch Processing
|
||||
AI_EMBEDDINGS_BATCH_SIZE=20 # Embeddings processed per batch
|
||||
AI_EMBEDDINGS_BATCH_DELAY_MS=1000 # Delay between embedding batches
|
||||
# Upload directory on Nextcloud (will be created if doesn't exist)
|
||||
NEXTCLOUD_UPLOAD_PATH=/kb-media
|
||||
|
||||
# Timeouts and Limits
|
||||
AI_MICRO_TASK_TIMEOUT_MS=25000 # Max time per micro-task
|
||||
AI_QUEUE_MAX_SIZE=50 # Max queued requests
|
||||
AI_SIMILARITY_THRESHOLD=0.3 # Vector similarity threshold
|
||||
# Public URL base for sharing uploaded files
|
||||
# Usually your Nextcloud base URL + share path
|
||||
NEXTCLOUD_PUBLIC_URL=https://your-nextcloud.com/s/
|
||||
|
||||
# ============================================================================
|
||||
# 5. FORENSIC AUDIT SYSTEM (OPTIONAL - FOR TRANSPARENCY AND DEBUGGING)
|
||||
# 8. GIT CONTRIBUTIONS - ISSUE CREATION (OPTIONAL)
|
||||
# ============================================================================
|
||||
|
||||
# Git provider: gitea, github, or gitlab
|
||||
GIT_PROVIDER=gitea
|
||||
|
||||
# Repository URL (used to extract owner/name)
|
||||
# Example: https://git.example.com/owner/forensic-pathways.git
|
||||
GIT_REPO_URL=https://git.example.com/owner/forensic-pathways.git
|
||||
|
||||
# API endpoint for your git provider
|
||||
# Gitea: https://git.example.com/api/v1
|
||||
# GitHub: https://api.github.com
|
||||
# GitLab: https://gitlab.example.com/api/v4
|
||||
GIT_API_ENDPOINT=https://git.example.com/api/v1
|
||||
|
||||
# Personal access token or API token for creating issues
|
||||
# Generate this in your git provider's settings
|
||||
GIT_API_TOKEN=your-git-api-token
|
||||
|
||||
# ============================================================================
|
||||
# 9. AUDIT & DEBUGGING (OPTIONAL)
|
||||
# ============================================================================
|
||||
|
||||
# Enable detailed audit trail of AI decision-making
|
||||
@@ -80,38 +177,49 @@ FORENSIC_AUDIT_ENABLED=false
|
||||
# Audit detail level: minimal, standard, verbose
|
||||
FORENSIC_AUDIT_DETAIL_LEVEL=standard
|
||||
|
||||
# Audit retention and limits
|
||||
FORENSIC_AUDIT_RETENTION_HOURS=72 # Keep audit data for 3 days
|
||||
FORENSIC_AUDIT_MAX_ENTRIES=50 # Max entries per request
|
||||
# Audit retention time (hours)
|
||||
FORENSIC_AUDIT_RETENTION_HOURS=24
|
||||
|
||||
# Maximum audit entries per request
|
||||
FORENSIC_AUDIT_MAX_ENTRIES=50
|
||||
|
||||
# Enable detailed AI pipeline logging
|
||||
AI_PIPELINE_DEBUG=false
|
||||
|
||||
# Enable performance metrics collection
|
||||
AI_PERFORMANCE_METRICS=false
|
||||
|
||||
# Enable detailed micro-task debugging
|
||||
AI_MICRO_TASK_DEBUG=false
|
||||
|
||||
# ============================================================================
|
||||
# 6. QUALITY CONTROL AND BIAS DETECTION (OPTIONAL - ADVANCED FEATURES)
|
||||
# 10. QUALITY CONTROL & BIAS DETECTION (ADVANCED)
|
||||
# ============================================================================
|
||||
|
||||
# Confidence Scoring Weights (must sum to 1.0)
|
||||
# Confidence scoring weights (must sum to 1.0)
|
||||
CONFIDENCE_EMBEDDINGS_WEIGHT=0.3
|
||||
CONFIDENCE_CONSENSUS_WEIGHT=0.25
|
||||
CONFIDENCE_DOMAIN_MATCH_WEIGHT=0.25
|
||||
CONFIDENCE_FRESHNESS_WEIGHT=0.2
|
||||
|
||||
# Confidence Thresholds (0-100)
|
||||
# Confidence thresholds (0-100)
|
||||
CONFIDENCE_MINIMUM_THRESHOLD=40
|
||||
CONFIDENCE_MEDIUM_THRESHOLD=60
|
||||
CONFIDENCE_HIGH_THRESHOLD=80
|
||||
|
||||
# Bias Detection Settings
|
||||
# Bias detection settings
|
||||
BIAS_DETECTION_ENABLED=false
|
||||
BIAS_POPULARITY_THRESHOLD=0.7 # Detect over-popular tools
|
||||
BIAS_DIVERSITY_MINIMUM=0.6 # Require recommendation diversity
|
||||
BIAS_CELEBRITY_TOOLS="Volatility 3,Wireshark,Autopsy,Maltego"
|
||||
BIAS_POPULARITY_THRESHOLD=0.7
|
||||
BIAS_DIVERSITY_MINIMUM=0.6
|
||||
BIAS_CELEBRITY_TOOLS=""
|
||||
|
||||
# Quality Control Thresholds
|
||||
QUALITY_MIN_RESPONSE_LENGTH=50 # Minimum AI response length
|
||||
QUALITY_MIN_SELECTION_COUNT=1 # Minimum tools selected
|
||||
QUALITY_MAX_PROCESSING_TIME_MS=30000 # Max processing time
|
||||
# Quality control thresholds
|
||||
QUALITY_MIN_RESPONSE_LENGTH=50
|
||||
QUALITY_MIN_SELECTION_COUNT=1
|
||||
QUALITY_MAX_PROCESSING_TIME_MS=30000
|
||||
|
||||
# ============================================================================
|
||||
# 7. USER INTERFACE PREFERENCES (OPTIONAL - UI DEFAULTS)
|
||||
# 11. USER INTERFACE DEFAULTS (OPTIONAL)
|
||||
# ============================================================================
|
||||
|
||||
# Default UI behavior (users can override)
|
||||
@@ -121,34 +229,76 @@ UI_SHOW_BIAS_WARNINGS=true
|
||||
UI_AUDIT_TRAIL_COLLAPSIBLE=true
|
||||
|
||||
# ============================================================================
|
||||
# 8. EXTERNAL INTEGRATIONS (OPTIONAL - ONLY IF USING THESE SERVICES)
|
||||
# 12. CACHING & PERFORMANCE (OPTIONAL)
|
||||
# ============================================================================
|
||||
|
||||
# Nextcloud Integration (for file uploads)
|
||||
# NEXTCLOUD_ENDPOINT=https://your-nextcloud.com
|
||||
# NEXTCLOUD_USERNAME=your-username
|
||||
# NEXTCLOUD_PASSWORD=your-password
|
||||
# NEXTCLOUD_UPLOAD_PATH=/kb-media
|
||||
# NEXTCLOUD_PUBLIC_URL=https://your-nextcloud.com/s/
|
||||
# Cache AI responses (milliseconds)
|
||||
AI_RESPONSE_CACHE_TTL_MS=3600000
|
||||
|
||||
# Queue cleanup interval (milliseconds)
|
||||
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
|
||||
|
||||
# ============================================================================
|
||||
# 9. PERFORMANCE AND MONITORING (OPTIONAL - FOR PRODUCTION OPTIMIZATION)
|
||||
# PERFORMANCE TUNING PRESETS
|
||||
# ============================================================================
|
||||
|
||||
# Caching and Queue Management
|
||||
AI_RESPONSE_CACHE_TTL_MS=3600000 # Cache responses for 1 hour
|
||||
AI_QUEUE_CLEANUP_INTERVAL_MS=300000 # Cleanup queue every 5 minutes
|
||||
# 🚀 FOR FASTER RESPONSES (less comprehensive):
|
||||
# AI_EMBEDDING_CANDIDATES=20
|
||||
# AI_MAX_SELECTED_ITEMS=15
|
||||
# AI_MAX_TOOLS_TO_ANALYZE=10
|
||||
# AI_MICRO_TASK_DELAY_MS=200
|
||||
# AI_MAX_CONTEXT_TOKENS=2000
|
||||
|
||||
# Debug and Monitoring
|
||||
AI_MICRO_TASK_DEBUG=false # Enable detailed micro-task logging
|
||||
AI_PERFORMANCE_METRICS=false # Enable performance tracking
|
||||
# 🎯 FOR BETTER QUALITY (more comprehensive):
|
||||
# AI_EMBEDDING_CANDIDATES=60
|
||||
# AI_MAX_SELECTED_ITEMS=40
|
||||
# AI_MAX_TOOLS_TO_ANALYZE=30
|
||||
# AI_MICRO_TASK_DELAY_MS=800
|
||||
# AI_MAX_CONTEXT_TOKENS=4000
|
||||
|
||||
# 🔋 FOR LOW-POWER SYSTEMS (minimal resources):
|
||||
# AI_EMBEDDING_CANDIDATES=15
|
||||
# AI_MAX_SELECTED_ITEMS=10
|
||||
# AI_MAX_TOOLS_TO_ANALYZE=8
|
||||
# AI_RATE_LIMIT_MAX_REQUESTS=2
|
||||
# AI_MICRO_TASK_DELAY_MS=1000
|
||||
|
||||
# ============================================================================
|
||||
# SETUP CHECKLIST:
|
||||
# FEATURE COMBINATIONS GUIDE
|
||||
# ============================================================================
|
||||
# 1. Set PUBLIC_BASE_URL to your domain
|
||||
# 2. Change AUTH_SECRET to a secure random string
|
||||
# 3. Configure AI service endpoints and API keys
|
||||
# 4. Set authentication options if needed
|
||||
# 5. Test with default advanced settings before adjusting
|
||||
|
||||
# 📝 BASIC SETUP (AI only):
|
||||
# - Configure AI_ANALYZER_* and AI_EMBEDDINGS_*
|
||||
# - Leave authentication, file uploads, and git disabled
|
||||
|
||||
# 🔐 WITH AUTHENTICATION:
|
||||
# - Set AUTHENTICATION_NECESSARY_* to true
|
||||
# - Configure OIDC_* settings
|
||||
|
||||
# 📁 WITH FILE UPLOADS:
|
||||
# - Configure all NEXTCLOUD_* settings
|
||||
# - Test connection before enabling in UI
|
||||
|
||||
# 🔄 WITH CONTRIBUTIONS:
|
||||
# - Configure all GIT_* settings
|
||||
# - Test API token permissions for issue creation
|
||||
|
||||
# 🔍 WITH FULL MONITORING:
|
||||
# - Enable FORENSIC_AUDIT_ENABLED=true
|
||||
# - Enable AI_PIPELINE_DEBUG=true
|
||||
# - Configure audit retention and detail level
|
||||
|
||||
# ============================================================================
|
||||
# SETUP CHECKLIST
|
||||
# ============================================================================
|
||||
# ✅ 1. Set PUBLIC_BASE_URL to your domain
|
||||
# ✅ 2. Change AUTH_SECRET to a secure random string
|
||||
# ✅ 3. Configure AI endpoints (Ollama: leave API_KEY empty)
|
||||
# ✅ 4. Start with default AI values, tune based on performance
|
||||
# ✅ 5. Enable authentication if needed (configure OIDC)
|
||||
# ✅ 6. Configure Nextcloud if file uploads needed
|
||||
# ✅ 7. Configure Git provider if contributions needed
|
||||
# ✅ 8. Test with a simple query to verify pipeline works
|
||||
# ✅ 9. Enable audit trail for transparency if desired
|
||||
# ✅ 10. Tune performance settings based on usage patterns
|
||||
# ============================================================================
|
||||
Reference in New Issue
Block a user