enhancement 1: audit trail

2025-08-03 12:41:02 +02:00
parent 57c507915f
commit 6308c03709
6 changed files with 639 additions and 71 deletions
--- a/.env.example
+++ b/.env.example
@@ -1,79 +1,154 @@
-# ===========================================
+# ============================================================================
 # ForensicPathways Environment Configuration
-# ===========================================
+# ============================================================================
+# Copy this file to .env and adjust the values below.
+# Settings are ordered by likelihood of needing adjustment during setup.

-# === Authentication Configuration ===
+# ============================================================================
+# 1. CORE APPLICATION SETTINGS (REQUIRED - ADJUST FOR YOUR SETUP)
+# ============================================================================
+
+# Your application's public URL (used for redirects and links)
+PUBLIC_BASE_URL=http://localhost:4321
+
+# Application environment (development, production, staging)
+NODE_ENV=development
+
+# Secret key for session encryption (CHANGE IN PRODUCTION!)
+AUTH_SECRET=your-secret-key-change-in-production-please
+
+# ============================================================================
+# 2. AI SERVICES CONFIGURATION (REQUIRED FOR AI FEATURES)
+# ============================================================================
+
+# Main AI Analysis Service (for query processing and recommendations)
+# Example uses Mistral AI - adjust endpoint/model as needed
+AI_ANALYZER_ENDPOINT=https://api.mistral.ai/v1
+AI_ANALYZER_API_KEY=your-mistral-api-key-here
+AI_ANALYZER_MODEL=mistral-small-latest
+
+# Vector Embeddings Service (for semantic search - can use same provider)
+AI_EMBEDDINGS_ENABLED=true
+AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
+AI_EMBEDDINGS_API_KEY=your-mistral-api-key-here
+AI_EMBEDDINGS_MODEL=mistral-embed
+
+# ============================================================================
+# 3. AUTHENTICATION (OPTIONAL - SET TO 'true' IF NEEDED)
+# ============================================================================
+
+# Enable authentication for different features
 AUTHENTICATION_NECESSARY=false
 AUTHENTICATION_NECESSARY_CONTRIBUTIONS=false
 AUTHENTICATION_NECESSARY_AI=false
-AUTH_SECRET=your-secret-key-change-in-production

-# OIDC Configuration (if authentication enabled)
+# OIDC Provider Settings (only needed if authentication enabled)
 OIDC_ENDPOINT=https://your-oidc-provider.com
 OIDC_CLIENT_ID=your-client-id
 OIDC_CLIENT_SECRET=your-client-secret

-# ===================================================================
-# AI CONFIGURATION - Complete Reference for Improved Pipeline
-# ===================================================================
+# ============================================================================
+# 4. ADVANCED AI CONFIGURATION (FINE-TUNING - DEFAULT VALUES USUALLY WORK)
+# ============================================================================

-# === CORE AI ENDPOINTS & MODELS ===
-AI_API_ENDPOINT=https://llm.mikoshi.de
-AI_API_KEY=sREDACTED3w
-AI_MODEL='mistral/mistral-small-latest'
+# Pipeline Performance Settings
+AI_MAX_SELECTED_ITEMS=60                    # Tools analyzed per micro-task
+AI_EMBEDDING_CANDIDATES=60                  # Vector search candidates
+AI_MICRO_TASK_DELAY_MS=500                  # Delay between AI micro-tasks

-# === IMPROVED PIPELINE: Use separate analyzer model (mistral-small is fine) ===
-AI_ANALYZER_ENDPOINT=https://llm.mikoshi.de
-AI_ANALYZER_API_KEY=skREDACTEDw3w  
-AI_ANALYZER_MODEL='mistral/mistral-small-latest'
+# Rate Limiting (requests per minute)
+AI_RATE_LIMIT_MAX_REQUESTS=6               # Main query rate limit
+AI_MICRO_TASK_RATE_LIMIT=15                # Micro-task rate limit
+AI_RATE_LIMIT_DELAY_MS=3000                # Delay between rate-limited calls

-# === EMBEDDINGS CONFIGURATION ===
-AI_EMBEDDINGS_ENABLED=true
-AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
-AI_EMBEDDINGS_API_KEY=ZREDACTED3wL
-AI_EMBEDDINGS_MODEL=mistral-embed
-AI_EMBEDDINGS_BATCH_SIZE=20
-AI_EMBEDDINGS_BATCH_DELAY_MS=1000
+# Embeddings Batch Processing
+AI_EMBEDDINGS_BATCH_SIZE=20                 # Embeddings processed per batch
+AI_EMBEDDINGS_BATCH_DELAY_MS=1000          # Delay between embedding batches

-# === PIPELINE: VectorIndex (HNSW) Configuration ===
-AI_MAX_SELECTED_ITEMS=60                    # Tools visible to each micro-task 
-AI_EMBEDDING_CANDIDATES=60                  # VectorIndex candidates (HNSW is more efficient)
-AI_SIMILARITY_THRESHOLD=0.3                # Not used by VectorIndex (uses cosine distance internally)
+# Timeouts and Limits
+AI_MICRO_TASK_TIMEOUT_MS=25000             # Max time per micro-task
+AI_QUEUE_MAX_SIZE=50                       # Max queued requests
+AI_SIMILARITY_THRESHOLD=0.3                # Vector similarity threshold

-# === MICRO-TASK CONFIGURATION ===
-AI_MICRO_TASK_DELAY_MS=500                 # Delay between micro-tasks  
-AI_MICRO_TASK_TIMEOUT_MS=25000             # Timeout per micro-task (increased for full context)
+# ============================================================================
+# 5. FORENSIC AUDIT SYSTEM (OPTIONAL - FOR TRANSPARENCY AND DEBUGGING)
+# ============================================================================

-# === RATE LIMITING ===
-AI_RATE_LIMIT_DELAY_MS=3000                # Main rate limit delay
-AI_RATE_LIMIT_MAX_REQUESTS=6               # Main requests per minute (reduced - fewer but richer calls)
-AI_MICRO_TASK_RATE_LIMIT=15                # Micro-task requests per minute (was 30)
+# Enable detailed audit trail of AI decision-making
+FORENSIC_AUDIT_ENABLED=false

-# === QUEUE MANAGEMENT ===
-AI_QUEUE_MAX_SIZE=50
-AI_QUEUE_CLEANUP_INTERVAL_MS=300000
+# Audit detail level: minimal, standard, verbose
+FORENSIC_AUDIT_DETAIL_LEVEL=standard

-# === PERFORMANCE & MONITORING ===
-AI_MICRO_TASK_DEBUG=true
-AI_PERFORMANCE_METRICS=true
-AI_RESPONSE_CACHE_TTL_MS=3600000
+# Audit retention and limits
+FORENSIC_AUDIT_RETENTION_HOURS=72          # Keep audit data for 3 days
+FORENSIC_AUDIT_MAX_ENTRIES=50              # Max entries per request

-# ===================================================================
-# LEGACY VARIABLES (still used but less important)
-# ===================================================================
+# ============================================================================
+# 6. QUALITY CONTROL AND BIAS DETECTION (OPTIONAL - ADVANCED FEATURES)
+# ============================================================================

-# These are still used by other parts of the system:
-AI_RESPONSE_CACHE_TTL_MS=3600000           # For caching responses
-AI_QUEUE_MAX_SIZE=50                       # Queue management
-AI_QUEUE_CLEANUP_INTERVAL_MS=300000       # Queue cleanup
+# Confidence Scoring Weights (must sum to 1.0)
+CONFIDENCE_EMBEDDINGS_WEIGHT=0.3
+CONFIDENCE_CONSENSUS_WEIGHT=0.25
+CONFIDENCE_DOMAIN_MATCH_WEIGHT=0.25
+CONFIDENCE_FRESHNESS_WEIGHT=0.2

-# === Application Configuration ===
-PUBLIC_BASE_URL=http://localhost:4321
-NODE_ENV=development
+# Confidence Thresholds (0-100)
+CONFIDENCE_MINIMUM_THRESHOLD=40
+CONFIDENCE_MEDIUM_THRESHOLD=60
+CONFIDENCE_HIGH_THRESHOLD=80

-# Nextcloud Integration (Optional)
-NEXTCLOUD_ENDPOINT=https://your-nextcloud.com
-NEXTCLOUD_USERNAME=your-username
-NEXTCLOUD_PASSWORD=your-password
-NEXTCLOUD_UPLOAD_PATH=/kb-media
-NEXTCLOUD_PUBLIC_URL=https://your-nextcloud.com/s/
+# Bias Detection Settings
+BIAS_DETECTION_ENABLED=false
+BIAS_POPULARITY_THRESHOLD=0.7              # Detect over-popular tools
+BIAS_DIVERSITY_MINIMUM=0.6                 # Require recommendation diversity
+BIAS_CELEBRITY_TOOLS="Volatility 3,Wireshark,Autopsy,Maltego"
+
+# Quality Control Thresholds
+QUALITY_MIN_RESPONSE_LENGTH=50             # Minimum AI response length
+QUALITY_MIN_SELECTION_COUNT=1              # Minimum tools selected
+QUALITY_MAX_PROCESSING_TIME_MS=30000       # Max processing time
+
+# ============================================================================
+# 7. USER INTERFACE PREFERENCES (OPTIONAL - UI DEFAULTS)
+# ============================================================================
+
+# Default UI behavior (users can override)
+UI_SHOW_AUDIT_TRAIL_DEFAULT=false
+UI_SHOW_CONFIDENCE_SCORES=true
+UI_SHOW_BIAS_WARNINGS=true
+UI_AUDIT_TRAIL_COLLAPSIBLE=true
+
+# ============================================================================
+# 8. EXTERNAL INTEGRATIONS (OPTIONAL - ONLY IF USING THESE SERVICES)
+# ============================================================================
+
+# Nextcloud Integration (for file uploads)
+# NEXTCLOUD_ENDPOINT=https://your-nextcloud.com
+# NEXTCLOUD_USERNAME=your-username
+# NEXTCLOUD_PASSWORD=your-password
+# NEXTCLOUD_UPLOAD_PATH=/kb-media
+# NEXTCLOUD_PUBLIC_URL=https://your-nextcloud.com/s/
+
+# ============================================================================
+# 9. PERFORMANCE AND MONITORING (OPTIONAL - FOR PRODUCTION OPTIMIZATION)
+# ============================================================================
+
+# Caching and Queue Management
+AI_RESPONSE_CACHE_TTL_MS=3600000           # Cache responses for 1 hour
+AI_QUEUE_CLEANUP_INTERVAL_MS=300000        # Cleanup queue every 5 minutes
+
+# Debug and Monitoring
+AI_MICRO_TASK_DEBUG=false                  # Enable detailed micro-task logging
+AI_PERFORMANCE_METRICS=false               # Enable performance tracking
+
+# ============================================================================
+# SETUP CHECKLIST:
+# ============================================================================
+# 1. Set PUBLIC_BASE_URL to your domain
+# 2. Change AUTH_SECRET to a secure random string
+# 3. Configure AI service endpoints and API keys
+# 4. Set authentication options if needed
+# 5. Test with default advanced settings before adjusting
+# ============================================================================