diff --git a/.env.example b/.env.example
index b127600..00b00bc 100644
--- a/.env.example
+++ b/.env.example
@@ -42,32 +42,40 @@ AI_EMBEDDINGS_MODEL=mistral-embed
# How many similar tools/concepts embeddings search returns as candidates
# 🔍 This is the FIRST filter - vector similarity matching
# Lower = faster, less comprehensive | Higher = slower, more comprehensive
-AI_EMBEDDING_CANDIDATES=40
+AI_EMBEDDING_CANDIDATES=50
# Minimum similarity score threshold (0.0-1.0)
# Lower = more results but less relevant | Higher = fewer but more relevant
AI_SIMILARITY_THRESHOLD=0.3
+# === AI SELECTION FROM EMBEDDINGS ===
+# When embeddings are enabled, how many top tools to send with full context
+# 🎯 This is the SECOND filter - take best N from embeddings results
+AI_EMBEDDING_SELECTION_LIMIT=30
+AI_EMBEDDING_CONCEPTS_LIMIT=15
+
+# Maximum tools/concepts sent to AI when embeddings are DISABLED
+# Set to 0 for no limit (WARNING: may cause token overflow with large datasets)
+AI_NO_EMBEDDINGS_TOOL_LIMIT=0
+AI_NO_EMBEDDINGS_CONCEPT_LIMIT=0
+
# === AI SELECTION STAGE ===
# Maximum tools the AI can select from embedding candidates
# 🤖 This is the SECOND filter - AI intelligent selection
# Should be ≤ AI_EMBEDDING_CANDIDATES
AI_MAX_SELECTED_ITEMS=25
-# Maximum tools sent to AI for detailed analysis (micro-tasks)
-# 📋 This is the FINAL context size sent to AI models
-# Lower = less AI context, faster responses | Higher = more context, slower
-AI_MAX_TOOLS_TO_ANALYZE=20
+# === EMBEDDINGS EFFICIENCY THRESHOLDS ===
+# Minimum tools required for embeddings to be considered useful
+AI_EMBEDDINGS_MIN_TOOLS=8
-# Maximum concepts sent to AI for background knowledge selection
-# 📚 Concepts are smaller than tools, so can be higher
-AI_MAX_CONCEPTS_TO_ANALYZE=10
+# Maximum percentage of total tools that embeddings can return to be considered "filtering"
+AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75
# === CONTEXT FLOW SUMMARY ===
# 1. Vector Search: 111 total tools → AI_EMBEDDING_CANDIDATES (40) most similar
# 2. AI Selection: 40 candidates → AI_MAX_SELECTED_ITEMS (25) best matches
-# 3. AI Analysis: 25 selected → AI_MAX_TOOLS_TO_ANALYZE (20) for micro-tasks
-# 4. Final Output: Recommendations based on analyzed subset
+# 3. Final Output: Recommendations based on analyzed subset
# ============================================================================
# 4. AI PERFORMANCE & RATE LIMITING
@@ -95,23 +103,21 @@ AI_EMBEDDINGS_BATCH_SIZE=10
# Delay between embedding batches (milliseconds)
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
+# Maximum tools sent to AI for detailed analysis (micro-tasks)
+AI_MAX_TOOLS_TO_ANALYZE=20
+AI_MAX_CONCEPTS_TO_ANALYZE=10
+
# ============================================================================
# 5. AI CONTEXT & TOKEN MANAGEMENT
# ============================================================================
# Maximum context tokens to maintain across micro-tasks
# Controls how much conversation history is preserved between AI calls
-AI_MAX_CONTEXT_TOKENS=3000
+AI_MAX_CONTEXT_TOKENS=4000
# Maximum tokens per individual AI prompt
# Larger = more context per call | Smaller = faster responses
-AI_MAX_PROMPT_TOKENS=1200
-
-# Timeout for individual micro-tasks (milliseconds)
-AI_MICRO_TASK_TIMEOUT_MS=25000
-
-# Maximum size of the processing queue
-AI_QUEUE_MAX_SIZE=50
+AI_MAX_PROMPT_TOKENS=1500
# ============================================================================
# 6. AUTHENTICATION & AUTHORIZATION (OPTIONAL)
@@ -172,7 +178,7 @@ GIT_API_TOKEN=your-git-api-token
# ============================================================================
# Enable detailed audit trail of AI decision-making
-FORENSIC_AUDIT_ENABLED=false
+FORENSIC_AUDIT_ENABLED=true
# Audit detail level: minimal, standard, verbose
FORENSIC_AUDIT_DETAIL_LEVEL=standard
@@ -183,15 +189,6 @@ FORENSIC_AUDIT_RETENTION_HOURS=24
# Maximum audit entries per request
FORENSIC_AUDIT_MAX_ENTRIES=50
-# Enable detailed AI pipeline logging
-AI_PIPELINE_DEBUG=false
-
-# Enable performance metrics collection
-AI_PERFORMANCE_METRICS=false
-
-# Enable detailed micro-task debugging
-AI_MICRO_TASK_DEBUG=false
-
# ============================================================================
# 10. QUALITY CONTROL & BIAS DETECTION (ADVANCED)
# ============================================================================
@@ -207,61 +204,20 @@ CONFIDENCE_MINIMUM_THRESHOLD=40
CONFIDENCE_MEDIUM_THRESHOLD=60
CONFIDENCE_HIGH_THRESHOLD=80
-# Bias detection settings
-BIAS_DETECTION_ENABLED=false
-BIAS_POPULARITY_THRESHOLD=0.7
-BIAS_DIVERSITY_MINIMUM=0.6
-BIAS_CELEBRITY_TOOLS=""
-
-# Quality control thresholds
-QUALITY_MIN_RESPONSE_LENGTH=50
-QUALITY_MIN_SELECTION_COUNT=1
-QUALITY_MAX_PROCESSING_TIME_MS=30000
-
-# ============================================================================
-# 11. USER INTERFACE DEFAULTS (OPTIONAL)
-# ============================================================================
-
-# Default UI behavior (users can override)
-UI_SHOW_AUDIT_TRAIL_DEFAULT=false
-UI_SHOW_CONFIDENCE_SCORES=true
-UI_SHOW_BIAS_WARNINGS=true
-UI_AUDIT_TRAIL_COLLAPSIBLE=true
-
-# ============================================================================
-# 12. CACHING & PERFORMANCE (OPTIONAL)
-# ============================================================================
-
-# Cache AI responses (milliseconds)
-AI_RESPONSE_CACHE_TTL_MS=3600000
-
-# Queue cleanup interval (milliseconds)
-AI_QUEUE_CLEANUP_INTERVAL_MS=300000
-
# ============================================================================
# PERFORMANCE TUNING PRESETS
# ============================================================================
-# 🚀 FOR FASTER RESPONSES (less comprehensive):
-# AI_EMBEDDING_CANDIDATES=20
-# AI_MAX_SELECTED_ITEMS=15
-# AI_MAX_TOOLS_TO_ANALYZE=10
-# AI_MICRO_TASK_DELAY_MS=200
-# AI_MAX_CONTEXT_TOKENS=2000
+# 🚀 FOR FASTER RESPONSES (prevent token overflow):
+# AI_NO_EMBEDDINGS_TOOL_LIMIT=25
+# AI_NO_EMBEDDINGS_CONCEPT_LIMIT=10
-# 🎯 FOR BETTER QUALITY (more comprehensive):
-# AI_EMBEDDING_CANDIDATES=60
-# AI_MAX_SELECTED_ITEMS=40
-# AI_MAX_TOOLS_TO_ANALYZE=30
-# AI_MICRO_TASK_DELAY_MS=800
-# AI_MAX_CONTEXT_TOKENS=4000
+# 🎯 FOR FULL DATABASE ACCESS (risk of truncation):
+# AI_NO_EMBEDDINGS_TOOL_LIMIT=0
+# AI_NO_EMBEDDINGS_CONCEPT_LIMIT=0
-# 🔋 FOR LOW-POWER SYSTEMS (minimal resources):
-# AI_EMBEDDING_CANDIDATES=15
-# AI_MAX_SELECTED_ITEMS=10
-# AI_MAX_TOOLS_TO_ANALYZE=8
-# AI_RATE_LIMIT_MAX_REQUESTS=2
-# AI_MICRO_TASK_DELAY_MS=1000
+# 🔋 FOR LOW-POWER SYSTEMS:
+# AI_NO_EMBEDDINGS_TOOL_LIMIT=15
# ============================================================================
# FEATURE COMBINATIONS GUIDE
@@ -285,7 +241,6 @@ AI_QUEUE_CLEANUP_INTERVAL_MS=300000
# 🔍 WITH FULL MONITORING:
# - Enable FORENSIC_AUDIT_ENABLED=true
-# - Enable AI_PIPELINE_DEBUG=true
# - Configure audit retention and detail level
# ============================================================================
diff --git a/src/components/AIQueryInterface.astro b/src/components/AIQueryInterface.astro
index 0c8d86e..9b80036 100644
--- a/src/components/AIQueryInterface.astro
+++ b/src/components/AIQueryInterface.astro
@@ -15,7 +15,7 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];
- KI-gestützte Workflow-Empfehlungen
+ Forensic AI
Beschreiben Sie Ihr forensisches Szenario und erhalten Sie maßgeschneiderte Workflow-Empfehlungen
@@ -169,16 +169,16 @@ const domainAgnosticSoftware = data['domain-agnostic-software'] || [];
- ${this.renderHeader('Empfohlener DFIR-Workflow', originalQuery)}
+ ${this.renderHeader('Untersuchungsansatz', originalQuery)}
${this.renderContextualAnalysis(recommendation, 'workflow')}
${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
${this.renderWorkflowPhases(toolsByPhase, phaseOrder, phaseNames)}
@@ -721,7 +721,7 @@ class AIQueryInterface {
displayToolResults(recommendation, originalQuery) {
const html = `
- ${this.renderHeader('Passende Empfehlungen', originalQuery)}
+ ${this.renderHeader('Handlungsempfehlung', originalQuery)}
${this.renderContextualAnalysis(recommendation, 'tool')}
${this.renderBackgroundKnowledge(recommendation.background_knowledge)}
${this.renderToolRecommendations(recommendation.recommended_tools)}
diff --git a/src/config/forensic.config.ts b/src/config/forensic.config.ts
deleted file mode 100644
index 5723854..0000000
--- a/src/config/forensic.config.ts
+++ /dev/null
@@ -1,126 +0,0 @@
-// src/config/forensic.config.ts
-// Centralized configuration for forensic RAG enhancements
-
-export const FORENSIC_CONFIG = {
- audit: {
- enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
- detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as 'minimal' | 'standard' | 'verbose') || 'standard',
- retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10),
- maxEntriesPerRequest: parseInt(process.env.FORENSIC_AUDIT_MAX_ENTRIES || '50', 10)
- },
- confidence: {
- embeddingsWeight: parseFloat(process.env.CONFIDENCE_EMBEDDINGS_WEIGHT || '0.3'),
- consensusWeight: parseFloat(process.env.CONFIDENCE_CONSENSUS_WEIGHT || '0.25'),
- domainMatchWeight: parseFloat(process.env.CONFIDENCE_DOMAIN_MATCH_WEIGHT || '0.25'),
- freshnessWeight: parseFloat(process.env.CONFIDENCE_FRESHNESS_WEIGHT || '0.2'),
- minimumThreshold: parseInt(process.env.CONFIDENCE_MINIMUM_THRESHOLD || '40', 10),
- highThreshold: parseInt(process.env.CONFIDENCE_HIGH_THRESHOLD || '80', 10),
- mediumThreshold: parseInt(process.env.CONFIDENCE_MEDIUM_THRESHOLD || '60', 10)
- },
- bias: {
- enabled: process.env.BIAS_DETECTION_ENABLED === 'true',
- popularityThreshold: parseFloat(process.env.BIAS_POPULARITY_THRESHOLD || '0.7'),
- diversityMinimum: parseFloat(process.env.BIAS_DIVERSITY_MINIMUM || '0.6'),
- domainMismatchThreshold: parseFloat(process.env.BIAS_DOMAIN_MISMATCH_THRESHOLD || '0.3'),
- warningThreshold: parseInt(process.env.BIAS_WARNING_THRESHOLD || '3', 10),
- celebrityTools: (process.env.BIAS_CELEBRITY_TOOLS || 'Volatility 3,Wireshark,Autopsy,Maltego').split(',').map(t => t.trim())
- },
- // Quality thresholds for various metrics
- quality: {
- minResponseLength: parseInt(process.env.QUALITY_MIN_RESPONSE_LENGTH || '50', 10),
- minSelectionCount: parseInt(process.env.QUALITY_MIN_SELECTION_COUNT || '1', 10),
- maxProcessingTime: parseInt(process.env.QUALITY_MAX_PROCESSING_TIME_MS || '30000', 10)
- },
- // Display preferences
- ui: {
- showAuditTrailByDefault: process.env.UI_SHOW_AUDIT_TRAIL_DEFAULT === 'true',
- showConfidenceScores: process.env.UI_SHOW_CONFIDENCE_SCORES !== 'false',
- showBiasWarnings: process.env.UI_SHOW_BIAS_WARNINGS !== 'false',
- auditTrailCollapsible: process.env.UI_AUDIT_TRAIL_COLLAPSIBLE !== 'false'
- }
-};
-
-// Validation function to ensure configuration is valid
-export function validateForensicConfig(): { valid: boolean; errors: string[] } {
- const errors: string[] = [];
-
- // Validate audit configuration
- if (FORENSIC_CONFIG.audit.retentionHours < 1 || FORENSIC_CONFIG.audit.retentionHours > 168) {
- errors.push('FORENSIC_AUDIT_RETENTION_HOURS must be between 1 and 168 (1 week)');
- }
-
- if (!['minimal', 'standard', 'verbose'].includes(FORENSIC_CONFIG.audit.detailLevel)) {
- errors.push('FORENSIC_AUDIT_DETAIL_LEVEL must be one of: minimal, standard, verbose');
- }
-
- // Validate confidence weights sum to approximately 1.0
- const weightSum = FORENSIC_CONFIG.confidence.embeddingsWeight +
- FORENSIC_CONFIG.confidence.consensusWeight +
- FORENSIC_CONFIG.confidence.domainMatchWeight +
- FORENSIC_CONFIG.confidence.freshnessWeight;
-
- if (Math.abs(weightSum - 1.0) > 0.05) {
- errors.push(`Confidence weights must sum to 1.0 (currently ${weightSum.toFixed(3)})`);
- }
-
- // Validate threshold ranges
- if (FORENSIC_CONFIG.confidence.minimumThreshold < 0 || FORENSIC_CONFIG.confidence.minimumThreshold > 100) {
- errors.push('CONFIDENCE_MINIMUM_THRESHOLD must be between 0 and 100');
- }
-
- if (FORENSIC_CONFIG.confidence.highThreshold <= FORENSIC_CONFIG.confidence.mediumThreshold) {
- errors.push('CONFIDENCE_HIGH_THRESHOLD must be greater than CONFIDENCE_MEDIUM_THRESHOLD');
- }
-
- // Validate bias thresholds
- if (FORENSIC_CONFIG.bias.popularityThreshold < 0 || FORENSIC_CONFIG.bias.popularityThreshold > 1) {
- errors.push('BIAS_POPULARITY_THRESHOLD must be between 0 and 1');
- }
-
- if (FORENSIC_CONFIG.bias.diversityMinimum < 0 || FORENSIC_CONFIG.bias.diversityMinimum > 1) {
- errors.push('BIAS_DIVERSITY_MINIMUM must be between 0 and 1');
- }
-
- return {
- valid: errors.length === 0,
- errors
- };
-}
-
-// Helper functions for configuration access
-export function isAuditEnabled(): boolean {
- return FORENSIC_CONFIG.audit.enabled;
-}
-
-export function getAuditDetailLevel(): 'minimal' | 'standard' | 'verbose' {
- return FORENSIC_CONFIG.audit.detailLevel;
-}
-
-export function getConfidenceThresholds() {
- return {
- minimum: FORENSIC_CONFIG.confidence.minimumThreshold,
- medium: FORENSIC_CONFIG.confidence.mediumThreshold,
- high: FORENSIC_CONFIG.confidence.highThreshold
- };
-}
-
-export function isBiasDetectionEnabled(): boolean {
- return FORENSIC_CONFIG.bias.enabled;
-}
-
-// Initialize and validate configuration on module load
-const configValidation = validateForensicConfig();
-if (!configValidation.valid) {
- console.warn('[FORENSIC CONFIG] Configuration validation failed:', configValidation.errors);
- // In development, we might want to throw an error
- if (process.env.NODE_ENV === 'development') {
- throw new Error(`Forensic configuration invalid: ${configValidation.errors.join(', ')}`);
- }
-}
-
-console.log('[FORENSIC CONFIG] Configuration loaded:', {
- auditEnabled: FORENSIC_CONFIG.audit.enabled,
- confidenceEnabled: true, // Always enabled
- biasDetectionEnabled: FORENSIC_CONFIG.bias.enabled,
- detailLevel: FORENSIC_CONFIG.audit.detailLevel
-});
\ No newline at end of file
diff --git a/src/data/tools.yaml b/src/data/tools.yaml
index fae358a..0c3a185 100644
--- a/src/data/tools.yaml
+++ b/src/data/tools.yaml
@@ -113,64 +113,6 @@ tools:
accessType: download
license: VSL
knowledgebase: false
- - name: TheHive 5
- icon: 🐝
- type: software
- description: >-
- Die zentrale Incident-Response-Plattform orchestriert komplexe
- Sicherheitsvorfälle vom ersten Alert bis zum Abschlussbericht. Jeder Case
- wird strukturiert durch Observables (IOCs), Tasks und Zeitleisten
- abgebildet. Die Cortex-Integration automatisiert Analysen durch Dutzende
- Analyzer - von VirusTotal-Checks bis Sandbox-Detonation.
- MISP-Synchronisation reichert Cases mit Threat-Intelligence an. Das
- ausgeklügelte Rollen- und Rechtesystem ermöglicht sichere Zusammenarbeit
- zwischen SOC-Analysten, Forensikern und Management. Templates
- standardisieren Response-Prozesse nach Incident-Typ. Die RESTful API
- integriert nahtlos mit SIEM, SOAR und Ticketing-Systemen. Metrics und
- KPIs messen die Team-Performance. Die Community Edition bleibt kostenlos
- für kleinere Teams, während Gold/Platinum-Lizenzen Enterprise-Features
- bieten.
- domains:
- - incident-response
- - static-investigations
- - malware-analysis
- - network-forensics
- - fraud-investigation
- phases:
- - data-collection
- - examination
- - analysis
- - reporting
- platforms:
- - Web
- related_software:
- - MISP
- - Cortex
- - Elasticsearch
- domain-agnostic-software:
- - collaboration-general
- skillLevel: intermediate
- accessType: server-based
- url: https://strangebee.com/thehive/
- projectUrl: ''
- license: Community Edition (Discontinued) / Commercial
- knowledgebase: false
- statusUrl: https://uptime.example.lab/api/badge/1/status
- tags:
- - web-interface
- - case-management
- - collaboration
- - api
- - workflow
- - multi-user-support
- - cortex-analyzer
- - misp-integration
- - playbooks
- - metrics
- - rbac
- - template-driven
- related_concepts:
- - Digital Evidence Chain of Custody
- name: MISP
icon: 🌐
type: software
@@ -223,7 +165,6 @@ tools:
related_concepts:
- Hash Functions & Digital Signatures
related_software:
- - TheHive 5
- Cortex
- OpenCTI
- name: DFIR-IRIS
@@ -260,7 +201,6 @@ tools:
platforms:
- Web
related_software:
- - TheHive 5
- MISP
- OpenCTI
domain-agnostic-software:
@@ -3427,6 +3367,244 @@ tools:
accessType: download
license: "MPL\_/ AGPL"
knowledgebase: false
+ - name: ShadowExplorer
+ icon: 🗂️
+ type: software
+ description: >-
+ Das schlanke Windows-Tool macht Volume-Shadow-Copy-Snapshots auch in Home-Editionen sichtbar und erlaubt das komfortable Durchstöbern sowie Wiederherstellen früherer Datei-Versionen. Damit lassen sich versehentlich gelöschte oder überschriebene Dateien in Sekunden zurückholen – geeignet für schnelle Triage und klassische Datenträgerforensik.
+ domains:
+ - static-investigations
+ - incident-response
+ phases:
+ - examination
+ - analysis
+ platforms:
+ - Windows
+ related_software:
+ - OSFMount
+ - PhotoRec
+ domain-agnostic-software: null
+ skillLevel: novice
+ accessType: download
+ url: https://www.shadowexplorer.com/
+ license: Freeware
+ knowledgebase: false
+ tags:
+ - gui
+ - shadow-copy
+ - snapshot-browsing
+ - file-recovery
+ - previous-versions
+ - scenario:file_recovery
+ - point-in-time-restore
+ related_concepts:
+ - Digital Evidence Chain of Custody
+
+
+ - name: Sonic Visualiser
+ icon: 🎵
+ type: software
+ description: >-
+ Die Open-Source-Audio-Analyse-Suite wird in der Forensik eingesetzt,
+ um Wave- und Kompressionsformate bis auf Sample-Ebene zu untersuchen.
+ Spektrogramm-Visualisierung, Zeit-/Frequenz-Annotationen und
+ Transkriptions-Plugins (Vamp) helfen, Manipulationen wie
+ Bandpass-Filter, Time-Stretching oder Insert-Edits nachzuweisen.
+ FFT- und Mel-Spectral-Views decken versteckte Audio-Watermarks oder
+ Steganografie auf. Export-Funktionen in CSV/JSON erlauben die
+ Weiterverarbeitung in Python-Notebooks oder SIEM-Pipelines.
+ Ideal für Voice-Authentication-Checks, Deep-Fake-Erkennung
+ und Beweisaufbereitung vor Gericht.
+ skillLevel: intermediate
+ url: https://www.sonicvisualiser.org/
+ domains:
+ - static-investigations
+ - fraud-investigation
+ phases:
+ - examination
+ - analysis
+ - reporting
+ platforms:
+ - Windows
+ - Linux
+ - macOS
+ accessType: download
+ license: GPL-2.0
+ knowledgebase: false
+ tags:
+ - gui
+ - audio-forensics
+ - spectrogram
+ - plugin-support
+ - annotation
+ - csv-export
+ related_concepts: []
+ related_software:
+ - Audacity
+
+ - name: Dissect
+ icon: 🧩
+ type: software
+ description: >-
+ Fox-ITs Python-Framework abstrahiert Windows- und Linux-Speicherabbilder
+ in virtuelle Objekte (Prozesse, Dateien, Registry, Kernel-Strukturen),
+ ohne zuvor ein Profil definieren zu müssen. Modularer
+ Hypervisor-Layer erlaubt das Mounten und gleichzeitige Analysieren
+ mehrerer Memory-Dumps – perfekt für großflächige Incident-Response.
+ Plugins dekodieren PTEs, handle tables, APC-Queues und liefern
+ YARA-kompatible Scans. Die Zero-Copy-Architektur beschleunigt Queries auf
+ Multi-GB-Images signifikant. Unterstützt Windows 11 24H2-Kernel sowie
+ Linux 6.x-schichten ab Juli 2025.
+ skillLevel: advanced
+ url: https://github.com/fox-it/dissect
+ domains:
+ - incident-response
+ - malware-analysis
+ - static-investigations
+ phases:
+ - examination
+ - analysis
+ platforms:
+ - Windows
+ - Linux
+ - macOS
+ accessType: download
+ license: Apache 2.0
+ knowledgebase: false
+ tags:
+ - command-line
+ - memory-analysis
+ - plugin-support
+ - python-library
+ - zero-copy
+ - profile-less
+ related_concepts:
+ - Regular Expressions (Regex)
+ related_software:
+ - Volatility 3
+ - Rekall
+
+ - name: Docker Explorer
+ icon: 🐳
+ type: software
+ description: >-
+ Googles Forensik-Toolkit zerlegt Offline-Docker-Volumes und
+ Overlay-Dateisysteme ohne laufenden Daemon. Es extrahiert
+ Container-Config, Image-Layer, ENV-Variablen, Mounted-Secrets
+ und schreibt Timeline-fähige Metadata-JSONs. Unterstützt btrfs,
+ overlay2 und zfs Storage-Driver sowie Docker Desktop (macOS/Windows).
+ Perfekt, um bösartige Images nach Supply-Chain-Attacken zu enttarnen
+ oder flüchtige Container nach einem Incident nachträglich zu analysieren.
+ skillLevel: intermediate
+ url: https://github.com/google/docker-explorer
+ domains:
+ - cloud-forensics
+ - incident-response
+ - static-investigations
+ phases:
+ - data-collection
+ - examination
+ - analysis
+ platforms:
+ - Linux
+ - macOS
+ - Windows
+ accessType: download
+ license: Apache 2.0
+ knowledgebase: false
+ tags:
+ - command-line
+ - container-forensics
+ - docker
+ - timeline
+ - json-export
+ - supply-chain
+ related_concepts: []
+ related_software:
+ - Velociraptor
+ - osquery
+
+ - name: Ghiro
+ icon: 🖼️
+ type: software
+ description: >-
+ Die Web-basierte Bildforensik-Plattform automatisiert EXIF-Analyse,
+ Hash-Matching, Error-Level-Evaluation (ELA) und
+ Steganografie-Erkennung für große Dateibatches. Unterstützt
+ Gesichts- und NSFW-Detection sowie GPS-Reverse-Geocoding für
+ Bewegungsprofile. Reports sind gerichtsfest
+ versioniert, REST-API und Celery-Worker skalieren auf
+ Millionen Bilder – ideal für CSAM-Ermittlungen oder Fake-News-Prüfung.
+ skillLevel: intermediate
+ url: https://getghiro.org/
+ domains:
+ - static-investigations
+ - fraud-investigation
+ - mobile-forensics
+ phases:
+ - examination
+ - analysis
+ - reporting
+ platforms:
+ - Web
+ - Linux
+ accessType: server-based
+ license: GPL-2.0
+ knowledgebase: false
+ tags:
+ - web-interface
+ - image-forensics
+ - exif-analysis
+ - steganography
+ - nsfw-detection
+ - batch-processing
+ related_concepts:
+ - Hash Functions & Digital Signatures
+ related_software:
+ - ExifTool
+ - PhotoRec
+
+ - name: Sherloq
+ icon: 🔍
+ type: software
+ description: >-
+ Das Python-GUI-Toolkit für visuelle Datei-Analyse kombiniert
+ klassische Reverse-Steganografie-Techniken (LSB, Palette-Tweaking,
+ DCT-Coefficient-Scanning) mit modernen CV-Algorithmen.
+ Heatmaps und Histogramm-Diffs zeigen Manipulations-Hotspots,
+ während eine „Carve-All-Layers“-Funktion versteckte Daten in PNG,
+ JPEG, BMP, GIF und Audio-Spectra aufspürt. Plugins für zsteg,
+ binwalk und exiftool erweitern die Pipeline.
+ Eine Must-have-Ergänzung zu Ghidra & friends, wenn
+ Malware Dateien als Dead-Drop nutzt.
+ skillLevel: intermediate
+ url: https://github.com/GuidoBartoli/sherloq
+ domains:
+ - malware-analysis
+ - static-investigations
+ phases:
+ - examination
+ - analysis
+ platforms:
+ - Windows
+ - Linux
+ - macOS
+ accessType: download
+ license: MIT
+ knowledgebase: false
+ tags:
+ - gui
+ - image-forensics
+ - steganography
+ - lsb-extraction
+ - histogram-analysis
+ - plugin-support
+ related_concepts:
+ - Regular Expressions (Regex)
+ related_software:
+ - Ghiro
+ - CyberChef
+
- name: Cortex
type: software
description: >-
diff --git a/src/pages/api/ai/enhance-input.ts b/src/pages/api/ai/enhance-input.ts
index 86086e2..1ebb2a8 100644
--- a/src/pages/api/ai/enhance-input.ts
+++ b/src/pages/api/ai/enhance-input.ts
@@ -94,18 +94,15 @@ ${input}
`.trim();
}
-// Enhanced AI service call function
async function callAIService(prompt: string): Promise {
const endpoint = AI_ENDPOINT;
const apiKey = AI_ANALYZER_API_KEY;
const model = AI_ANALYZER_MODEL;
- // Simple headers - add auth only if API key exists
let headers: Record = {
'Content-Type': 'application/json'
};
- // Add authentication if API key is provided
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
console.log('[ENHANCE API] Using API key authentication');
@@ -113,7 +110,6 @@ async function callAIService(prompt: string): Promise {
console.log('[ENHANCE API] No API key - making request without authentication');
}
- // Simple request body
const requestBody = {
model,
messages: [{ role: 'user', content: prompt }],
@@ -124,8 +120,6 @@ async function callAIService(prompt: string): Promise {
presence_penalty: 0.1
};
- // FIXED: This function is already being called through enqueueApiCall in the main handler
- // So we can use direct fetch here since the queuing happens at the caller level
return fetch(`${endpoint}/v1/chat/completions`, {
method: 'POST',
headers,
@@ -214,7 +208,7 @@ export const POST: APIRoute = async ({ request }) => {
success: true,
questions,
taskId,
- inputComplete: questions.length === 0 // Flag to indicate if input seems complete
+ inputComplete: questions.length === 0
}), {
status: 200,
headers: { 'Content-Type': 'application/json' }
diff --git a/src/styles/global.css b/src/styles/global.css
index 4c59ca9..b3540f5 100644
--- a/src/styles/global.css
+++ b/src/styles/global.css
@@ -2087,6 +2087,7 @@ input[type="checkbox"] {
gap: 1rem;
max-width: 1200px;
margin: 0 auto;
+ margin-top: 1rem;
}
.phase-header {
diff --git a/src/utils/aiPipeline.ts b/src/utils/aiPipeline.ts
index 003523d..5b14b4d 100644
--- a/src/utils/aiPipeline.ts
+++ b/src/utils/aiPipeline.ts
@@ -31,7 +31,6 @@ interface AnalysisResult {
};
}
-// NEW: Audit Trail Types
interface AuditEntry {
timestamp: number;
phase: string; // 'retrieval', 'selection', 'micro-task-N'
@@ -40,10 +39,9 @@ interface AuditEntry {
output: any; // What came out of this step
confidence: number; // 0-100: How confident we are in this step
processingTimeMs: number;
- metadata: Record; // Additional context
+ metadata: Record;
}
-// Enhanced AnalysisContext with Audit Trail
interface AnalysisContext {
userQuery: string;
mode: string;
@@ -62,7 +60,6 @@ interface AnalysisContext {
seenToolNames: Set;
- // NEW: Audit Trail
auditTrail: AuditEntry[];
}
@@ -78,17 +75,24 @@ class ImprovedMicroTaskAIPipeline {
private similarityThreshold: number;
private microTaskDelay: number;
+ private embeddingSelectionLimit: number;
+ private embeddingConceptsLimit: number;
+
+ private noEmbeddingsToolLimit: number;
+ private noEmbeddingsConceptLimit: number;
+
+ private embeddingsMinTools: number;
+ private embeddingsMaxReductionRatio: number;
+
private maxContextTokens: number;
private maxPromptTokens: number;
- // NEW: Audit Configuration
private auditConfig: {
enabled: boolean;
detailLevel: 'minimal' | 'standard' | 'verbose';
retentionHours: number;
};
- // NEW: Temporary audit storage for pre-context operations
private tempAuditEntries: AuditEntry[] = [];
constructor() {
@@ -98,20 +102,35 @@ class ImprovedMicroTaskAIPipeline {
model: this.getEnv('AI_ANALYZER_MODEL')
};
- this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '60', 10);
- this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '60', 10);
- this.similarityThreshold = 0.3;
+ this.maxSelectedItems = parseInt(process.env.AI_MAX_SELECTED_ITEMS || '25', 10);
+ this.embeddingCandidates = parseInt(process.env.AI_EMBEDDING_CANDIDATES || '50', 10);
+ this.similarityThreshold = parseFloat(process.env.AI_SIMILARITY_THRESHOLD || '0.3');
this.microTaskDelay = parseInt(process.env.AI_MICRO_TASK_DELAY_MS || '500', 10);
+ this.embeddingSelectionLimit = parseInt(process.env.AI_EMBEDDING_SELECTION_LIMIT || '30', 10);
+ this.embeddingConceptsLimit = parseInt(process.env.AI_EMBEDDING_CONCEPTS_LIMIT || '15', 10);
+
+ this.noEmbeddingsToolLimit = parseInt(process.env.AI_NO_EMBEDDINGS_TOOL_LIMIT || '0', 10);
+ this.noEmbeddingsConceptLimit = parseInt(process.env.AI_NO_EMBEDDINGS_CONCEPT_LIMIT || '0', 10);
+
+ this.embeddingsMinTools = parseInt(process.env.AI_EMBEDDINGS_MIN_TOOLS || '8', 10);
+ this.embeddingsMaxReductionRatio = parseFloat(process.env.AI_EMBEDDINGS_MAX_REDUCTION_RATIO || '0.75');
+
this.maxContextTokens = parseInt(process.env.AI_MAX_CONTEXT_TOKENS || '4000', 10);
this.maxPromptTokens = parseInt(process.env.AI_MAX_PROMPT_TOKENS || '1500', 10);
- // NEW: Initialize Audit Configuration
this.auditConfig = {
enabled: process.env.FORENSIC_AUDIT_ENABLED === 'true',
detailLevel: (process.env.FORENSIC_AUDIT_DETAIL_LEVEL as any) || 'standard',
retentionHours: parseInt(process.env.FORENSIC_AUDIT_RETENTION_HOURS || '72', 10)
};
+
+ console.log('[AI PIPELINE] Configuration loaded:', {
+ embeddingCandidates: this.embeddingCandidates,
+ embeddingSelection: `${this.embeddingSelectionLimit} tools, ${this.embeddingConceptsLimit} concepts`,
+ noEmbeddingsLimits: `${this.noEmbeddingsToolLimit || 'unlimited'} tools, ${this.noEmbeddingsConceptLimit || 'unlimited'} concepts`,
+ auditEnabled: this.auditConfig.enabled
+ });
}
private getEnv(key: string): string {
@@ -122,7 +141,6 @@ class ImprovedMicroTaskAIPipeline {
return value;
}
- // NEW: Audit Trail Utility Functions
private addAuditEntry(
context: AnalysisContext | null,
phase: string,
@@ -149,22 +167,18 @@ class ImprovedMicroTaskAIPipeline {
if (context) {
context.auditTrail.push(auditEntry);
} else {
- // Store in temporary array for later merging
this.tempAuditEntries.push(auditEntry);
}
- // Log for debugging when audit is enabled
console.log(`[AUDIT] ${phase}/${action}: ${confidence}% confidence, ${Date.now() - startTime}ms`);
}
- // NEW: Merge temporary audit entries into context
private mergeTemporaryAuditEntries(context: AnalysisContext): void {
if (!this.auditConfig.enabled || this.tempAuditEntries.length === 0) return;
const entryCount = this.tempAuditEntries.length;
- // Add temp entries to the beginning of the context audit trail
context.auditTrail.unshift(...this.tempAuditEntries);
- this.tempAuditEntries = []; // Clear temp storage
+ this.tempAuditEntries = [];
console.log(`[AUDIT] Merged ${entryCount} temporary audit entries into context`);
}
@@ -196,15 +210,12 @@ class ImprovedMicroTaskAIPipeline {
let confidence = 60; // Base confidence
- // Good selection ratio (not too many, not too few)
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
else if (selectionRatio <= 0.05) confidence -= 10; // Too few
else confidence -= 15; // Too many
- // Has detailed reasoning
if (hasReasoning) confidence += 15;
- // Selected tools have good distribution
if (result.selectedConcepts && result.selectedConcepts.length > 0) confidence += 5;
return Math.min(95, Math.max(25, confidence));
@@ -228,26 +239,106 @@ class ImprovedMicroTaskAIPipeline {
private safeParseJSON(jsonString: string, fallback: any = null): any {
try {
- const cleaned = jsonString
+ let cleaned = jsonString
.replace(/^```json\s*/i, '')
.replace(/\s*```\s*$/g, '')
.trim();
+ if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
+ console.warn('[AI PIPELINE] JSON appears truncated, attempting recovery...');
+
+ let lastCompleteStructure = '';
+ let braceCount = 0;
+ let bracketCount = 0;
+ let inString = false;
+ let escaped = false;
+
+ for (let i = 0; i < cleaned.length; i++) {
+ const char = cleaned[i];
+
+ if (escaped) {
+ escaped = false;
+ continue;
+ }
+
+ if (char === '\\') {
+ escaped = true;
+ continue;
+ }
+
+ if (char === '"' && !escaped) {
+ inString = !inString;
+ continue;
+ }
+
+ if (!inString) {
+ if (char === '{') braceCount++;
+ if (char === '}') braceCount--;
+ if (char === '[') bracketCount++;
+ if (char === ']') bracketCount--;
+
+ if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
+ lastCompleteStructure = cleaned.substring(0, i + 1);
+ }
+ }
+ }
+
+ if (lastCompleteStructure) {
+ console.log('[AI PIPELINE] Attempting to parse recovered JSON structure...');
+ cleaned = lastCompleteStructure;
+ } else {
+ if (braceCount > 0) {
+ cleaned += '}';
+ console.log('[AI PIPELINE] Added closing brace to truncated JSON');
+ }
+ if (bracketCount > 0) {
+ cleaned += ']';
+ console.log('[AI PIPELINE] Added closing bracket to truncated JSON');
+ }
+ }
+ }
+
const parsed = JSON.parse(cleaned);
+
+ if (parsed && typeof parsed === 'object') {
+ if (parsed.selectedTools === undefined) parsed.selectedTools = [];
+ if (parsed.selectedConcepts === undefined) parsed.selectedConcepts = [];
+
+ if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
+ if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
+ }
+
return parsed;
} catch (error) {
console.warn('[AI PIPELINE] JSON parsing failed:', error.message);
- console.warn('[AI PIPELINE] Raw content:', jsonString.slice(0, 200));
+ console.warn('[AI PIPELINE] Raw content (first 300 chars):', jsonString.slice(0, 300));
+ console.warn('[AI PIPELINE] Raw content (last 300 chars):', jsonString.slice(-300));
+
+ if (jsonString.includes('selectedTools')) {
+ const toolMatches = jsonString.match(/"([^"]+)"/g);
+ if (toolMatches && toolMatches.length > 0) {
+ console.log('[AI PIPELINE] Attempting partial recovery from broken JSON...');
+ const possibleTools = toolMatches
+ .map(match => match.replace(/"/g, ''))
+ .filter(name => name.length > 2 && !['selectedTools', 'selectedConcepts', 'reasoning'].includes(name))
+ .slice(0, 15); // Reasonable limit
+
+ if (possibleTools.length > 0) {
+ console.log(`[AI PIPELINE] Recovered ${possibleTools.length} possible tool names from broken JSON`);
+ return {
+ selectedTools: possibleTools,
+ selectedConcepts: [],
+ reasoning: 'Recovered from truncated response'
+ };
+ }
+ }
+ }
+
return fallback;
}
}
- private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
- if (context.seenToolNames.has(tool.name)) {
- console.log(`[AI PIPELINE] Skipping duplicate tool: ${tool.name}`);
- return false;
- }
-
+ private addToolToSelection(context: AnalysisContext, tool: any, phase: string, priority: string, justification?: string): boolean {
context.seenToolNames.add(tool.name);
if (!context.selectedTools) context.selectedTools = [];
@@ -266,56 +357,62 @@ class ImprovedMicroTaskAIPipeline {
let candidateConcepts: any[] = [];
let selectionMethod = 'unknown';
+ // WAIT for embeddings initialization if embeddings are enabled
+ if (process.env.AI_EMBEDDINGS_ENABLED === 'true') {
+ try {
+ console.log('[AI PIPELINE] Waiting for embeddings initialization...');
+ await embeddingsService.waitForInitialization();
+ console.log('[AI PIPELINE] Embeddings ready, proceeding with similarity search');
+ } catch (error) {
+ console.error('[AI PIPELINE] Embeddings initialization failed, falling back to full dataset:', error);
+ }
+ }
+
if (embeddingsService.isEnabled()) {
const embeddingsStart = Date.now();
const similarItems = await embeddingsService.findSimilar(
userQuery,
this.embeddingCandidates,
this.similarityThreshold
- ) as SimilarityResult[]; // Type assertion for similarity property
+ ) as SimilarityResult[];
- console.log(`[IMPROVED PIPELINE] Embeddings found ${similarItems.length} similar items`);
+ console.log(`[AI PIPELINE] Embeddings found ${similarItems.length} similar items`);
- // FIXED: Create lookup maps for O(1) access while preserving original data
const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept]));
- // FIXED: Process in similarity order, preserving the ranking
const similarTools = similarItems
.filter((item): item is SimilarityResult => item.type === 'tool')
.map(item => toolsMap.get(item.name))
- .filter((tool): tool is any => tool !== undefined); // Proper type guard
+ .filter((tool): tool is any => tool !== undefined);
const similarConcepts = similarItems
.filter((item): item is SimilarityResult => item.type === 'concept')
.map(item => conceptsMap.get(item.name))
- .filter((concept): concept is any => concept !== undefined); // Proper type guard
+ .filter((concept): concept is any => concept !== undefined);
- console.log(`[IMPROVED PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
+ console.log(`[AI PIPELINE] Similarity-ordered results: ${similarTools.length} tools, ${similarConcepts.length} concepts`);
- // Log the first few tools to verify ordering is preserved
- if (similarTools.length > 0) {
- console.log(`[IMPROVED PIPELINE] Top similar tools (in similarity order):`);
- similarTools.slice(0, 5).forEach((tool, idx) => {
- const originalSimilarItem = similarItems.find(item => item.name === tool.name);
- console.log(` ${idx + 1}. ${tool.name} (similarity: ${originalSimilarItem?.similarity?.toFixed(4) || 'N/A'})`);
- });
- }
+ const totalAvailableTools = toolsData.tools.length;
+ const reductionRatio = similarTools.length / totalAvailableTools;
- if (similarTools.length >= 15) {
+ if (similarTools.length >= this.embeddingsMinTools && reductionRatio <= this.embeddingsMaxReductionRatio) {
candidateTools = similarTools;
candidateConcepts = similarConcepts;
selectionMethod = 'embeddings_candidates';
- console.log(`[IMPROVED PIPELINE] Using embeddings candidates in similarity order: ${candidateTools.length} tools`);
+ console.log(`[AI PIPELINE] Using embeddings filtering: ${totalAvailableTools} → ${similarTools.length} tools (${(reductionRatio * 100).toFixed(1)}% reduction)`);
} else {
- console.log(`[IMPROVED PIPELINE] Embeddings insufficient (${similarTools.length} < 15), using full dataset`);
+ if (similarTools.length < this.embeddingsMinTools) {
+ console.log(`[AI PIPELINE] Embeddings found too few tools (${similarTools.length} < ${this.embeddingsMinTools}), using full dataset`);
+ } else {
+ console.log(`[AI PIPELINE] Embeddings didn't filter enough (${(reductionRatio * 100).toFixed(1)}% > ${(this.embeddingsMaxReductionRatio * 100).toFixed(1)}%), using full dataset`);
+ }
candidateTools = toolsData.tools;
candidateConcepts = toolsData.concepts;
selectionMethod = 'full_dataset';
}
- // NEW: Add Audit Entry for Embeddings Search with ordering verification
if (this.auditConfig.enabled) {
this.addAuditEntry(null, 'retrieval', 'embeddings-search',
{ query: userQuery, threshold: this.similarityThreshold, candidates: this.embeddingCandidates },
@@ -323,21 +420,29 @@ class ImprovedMicroTaskAIPipeline {
candidatesFound: similarItems.length,
toolsInOrder: similarTools.slice(0, 3).map((t: any) => t.name),
conceptsInOrder: similarConcepts.slice(0, 3).map((c: any) => c.name),
- orderingPreserved: true
+ reductionRatio: reductionRatio,
+ usingEmbeddings: selectionMethod === 'embeddings_candidates',
+ totalAvailable: totalAvailableTools,
+ filtered: similarTools.length
},
- similarTools.length >= 15 ? 85 : 60,
+ selectionMethod === 'embeddings_candidates' ? 85 : 60,
embeddingsStart,
- { selectionMethod, embeddingsEnabled: true, orderingFixed: true }
+ {
+ selectionMethod,
+ embeddingsEnabled: true,
+ reductionAchieved: selectionMethod === 'embeddings_candidates',
+ tokenSavingsExpected: selectionMethod === 'embeddings_candidates'
+ }
);
}
} else {
- console.log(`[IMPROVED PIPELINE] Embeddings disabled, using full dataset`);
+ console.log(`[AI PIPELINE] Embeddings disabled or not ready, using full dataset`);
candidateTools = toolsData.tools;
candidateConcepts = toolsData.concepts;
selectionMethod = 'full_dataset';
}
- console.log(`[IMPROVED PIPELINE] AI will analyze ${candidateTools.length} candidate tools (ordering preserved: ${selectionMethod === 'embeddings_candidates'})`);
+ console.log(`[AI PIPELINE] AI will analyze ${candidateTools.length} candidate tools (method: ${selectionMethod})`);
const finalSelection = await this.aiSelectionWithFullData(userQuery, candidateTools, candidateConcepts, mode, selectionMethod);
return {
@@ -387,15 +492,44 @@ class ImprovedMicroTaskAIPipeline {
related_software: concept.related_software || []
}));
- // Generate the German prompt with tool data
+ let toolsToSend: any[];
+ let conceptsToSend: any[];
+
+ if (selectionMethod === 'embeddings_candidates') {
+ toolsToSend = toolsWithFullData.slice(0, this.embeddingSelectionLimit);
+ conceptsToSend = conceptsWithFullData.slice(0, this.embeddingConceptsLimit);
+
+ console.log(`[AI PIPELINE] Embeddings enabled: sending top ${toolsToSend.length} similarity-ordered tools`);
+ } else {
+ const maxTools = this.noEmbeddingsToolLimit > 0 ?
+ Math.min(this.noEmbeddingsToolLimit, candidateTools.length) :
+ candidateTools.length;
+
+ const maxConcepts = this.noEmbeddingsConceptLimit > 0 ?
+ Math.min(this.noEmbeddingsConceptLimit, candidateConcepts.length) :
+ candidateConcepts.length;
+
+ toolsToSend = toolsWithFullData.slice(0, maxTools);
+ conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
+
+ console.log(`[AI PIPELINE] Embeddings disabled: sending ${toolsToSend.length}/${candidateTools.length} tools (limit: ${this.noEmbeddingsToolLimit || 'none'})`);
+ }
+
const basePrompt = getPrompt('toolSelection', mode, userQuery, selectionMethod, this.maxSelectedItems);
const prompt = `${basePrompt}
VERFÜGBARE TOOLS (mit vollständigen Daten):
-${JSON.stringify(toolsWithFullData.slice(0, 30), null, 2)}
+${JSON.stringify(toolsToSend, null, 2)}
VERFÜGBARE KONZEPTE (mit vollständigen Daten):
-${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
+${JSON.stringify(conceptsToSend, null, 2)}`;
+
+ const estimatedTokens = this.estimateTokens(prompt);
+ console.log(`[AI PIPELINE] Method: ${selectionMethod}, Tools: ${toolsToSend.length}, Estimated tokens: ~${estimatedTokens}`);
+
+ if (estimatedTokens > 35000) {
+ console.warn(`[AI PIPELINE] WARNING: Prompt tokens (${estimatedTokens}) may exceed model limits`);
+ }
try {
const response = await this.callAI(prompt, 2500);
@@ -403,16 +537,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
const result = this.safeParseJSON(response, null);
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
- console.error('[IMPROVED PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
+ console.error('[AI PIPELINE] AI selection returned invalid structure:', response.slice(0, 200));
- // NEW: Add Audit Entry for Failed Selection
if (this.auditConfig.enabled) {
this.addAuditEntry(null, 'selection', 'ai-tool-selection-failed',
{ candidateCount: candidateTools.length, mode, prompt: prompt.slice(0, 200) },
{ error: 'Invalid JSON structure', response: response.slice(0, 200) },
- 10, // Very low confidence
+ 10,
selectionStart,
- { aiModel: this.config.model, selectionMethod }
+ { aiModel: this.config.model, selectionMethod, tokensSent: estimatedTokens, toolsSent: toolsToSend.length }
);
}
@@ -421,19 +554,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
if (totalSelected === 0) {
- console.error('[IMPROVED PIPELINE] AI selection returned no tools');
+ console.error('[AI PIPELINE] AI selection returned no tools');
throw new Error('AI selection returned empty selection');
}
- console.log(`[IMPROVED PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts`);
- console.log(`[IMPROVED PIPELINE] AI reasoning: ${result.reasoning}`);
+ console.log(`[AI PIPELINE] AI selected: ${result.selectedTools.length} tools, ${result.selectedConcepts.length} concepts from ${toolsToSend.length} candidates`);
const selectedTools = candidateTools.filter(tool => result.selectedTools.includes(tool.name));
const selectedConcepts = candidateConcepts.filter(concept => result.selectedConcepts.includes(concept.name));
- console.log(`[IMPROVED PIPELINE] Final selection: ${selectedTools.length} tools with bias prevention applied`);
-
- // NEW: Add Audit Entry for Successful Selection
if (this.auditConfig.enabled) {
const confidence = this.calculateSelectionConfidence(result, candidateTools.length);
@@ -443,11 +572,12 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
selectedToolCount: result.selectedTools.length,
selectedConceptCount: result.selectedConcepts.length,
reasoning: result.reasoning?.slice(0, 200) + '...',
- finalToolNames: selectedTools.map(t => t.name)
+ finalToolNames: selectedTools.map(t => t.name),
+ selectionEfficiency: `${toolsToSend.length} → ${result.selectedTools.length}`
},
confidence,
selectionStart,
- { aiModel: this.config.model, selectionMethod, promptTokens: this.estimateTokens(prompt) }
+ { aiModel: this.config.model, selectionMethod, promptTokens: estimatedTokens, toolsSent: toolsToSend.length }
);
}
@@ -457,74 +587,26 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
};
} catch (error) {
- console.error('[IMPROVED PIPELINE] AI selection failed:', error);
+ console.error('[AI PIPELINE] AI selection failed:', error);
- // NEW: Add Audit Entry for Selection Error
if (this.auditConfig.enabled) {
this.addAuditEntry(null, 'selection', 'ai-tool-selection-error',
{ candidateCount: candidateTools.length, mode },
{ error: error.message },
- 5, // Very low confidence
+ 5,
selectionStart,
- { aiModel: this.config.model, selectionMethod }
+ { aiModel: this.config.model, selectionMethod, tokensSent: estimatedTokens }
);
}
-
- console.log('[IMPROVED PIPELINE] Using emergency keyword-based selection');
- return this.emergencyKeywordSelection(userQuery, candidateTools, candidateConcepts, mode);
+ throw error;
}
}
- private emergencyKeywordSelection(userQuery: string, candidateTools: any[], candidateConcepts: any[], mode: string) {
- const emergencyStart = Date.now();
-
- const queryLower = userQuery.toLowerCase();
- const keywords = queryLower.split(/\s+/).filter(word => word.length > 3);
-
- const scoredTools = candidateTools.map(tool => {
- const toolText = (
- tool.name + ' ' +
- tool.description + ' ' +
- (tool.tags || []).join(' ') + ' ' +
- (tool.platforms || []).join(' ') + ' ' +
- (tool.domains || []).join(' ')
- ).toLowerCase();
-
- const score = keywords.reduce((acc, keyword) => {
- return acc + (toolText.includes(keyword) ? 1 : 0);
- }, 0);
-
- return { tool, score };
- }).filter(item => item.score > 0)
- .sort((a, b) => b.score - a.score);
-
- const maxTools = mode === 'workflow' ? 20 : 8;
- const selectedTools = scoredTools.slice(0, maxTools).map(item => item.tool);
-
- console.log(`[IMPROVED PIPELINE] Emergency selection: ${selectedTools.length} tools, keywords: ${keywords.slice(0, 5).join(', ')}`);
-
- // NEW: Add Audit Entry for Emergency Selection
- if (this.auditConfig.enabled) {
- this.addAuditEntry(null, 'selection', 'emergency-keyword-selection',
- { keywords: keywords.slice(0, 10), candidateCount: candidateTools.length },
- { selectedCount: selectedTools.length, topScores: scoredTools.slice(0, 5).map(s => ({ name: s.tool.name, score: s.score })) },
- 40, // Moderate confidence for emergency selection
- emergencyStart,
- { selectionMethod: 'emergency_keyword' }
- );
- }
-
- return {
- selectedTools,
- selectedConcepts: candidateConcepts.slice(0, 3)
- };
- }
-
private async delay(ms: number): Promise {
return new Promise(resolve => setTimeout(resolve, ms));
}
- private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 300): Promise {
+ private async callMicroTaskAI(prompt: string, context: AnalysisContext, maxTokens: number = 500): Promise {
const startTime = Date.now();
let contextPrompt = prompt;
@@ -549,11 +631,10 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
success: true
};
- // NEW: Add Audit Entry for Successful Micro-Task
this.addAuditEntry(context, 'micro-task', 'ai-analysis',
{ promptLength: contextPrompt.length, maxTokens },
{ responseLength: response.length, contentPreview: response.slice(0, 100) },
- response.length > 50 ? 80 : 60, // Confidence based on response quality
+ response.length > 50 ? 80 : 60,
startTime,
{ aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
);
@@ -569,11 +650,10 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
error: error.message
};
- // NEW: Add Audit Entry for Failed Micro-Task
this.addAuditEntry(context, 'micro-task', 'ai-analysis-failed',
{ promptLength: contextPrompt.length, maxTokens },
{ error: error.message },
- 5, // Very low confidence
+ 5,
startTime,
{ aiModel: this.config.model, contextUsed: context.contextHistory.length > 0 }
);
@@ -586,7 +666,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
const isWorkflow = context.mode === 'workflow';
const prompt = getPrompt('scenarioAnalysis', isWorkflow, context.userQuery);
- const result = await this.callMicroTaskAI(prompt, context, 220);
+ const result = await this.callMicroTaskAI(prompt, context, 400);
if (result.success) {
if (isWorkflow) {
@@ -605,7 +685,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
const isWorkflow = context.mode === 'workflow';
const prompt = getPrompt('investigationApproach', isWorkflow, context.userQuery);
- const result = await this.callMicroTaskAI(prompt, context, 220);
+ const result = await this.callMicroTaskAI(prompt, context, 400);
if (result.success) {
context.investigationApproach = result.content;
@@ -619,7 +699,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
const isWorkflow = context.mode === 'workflow';
const prompt = getPrompt('criticalConsiderations', isWorkflow, context.userQuery);
- const result = await this.callMicroTaskAI(prompt, context, 180);
+ const result = await this.callMicroTaskAI(prompt, context, 350);
if (result.success) {
context.criticalConsiderations = result.content;
@@ -645,7 +725,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
const prompt = getPrompt('phaseToolSelection', context.userQuery, phase, phaseTools);
- const result = await this.callMicroTaskAI(prompt, context, 450);
+ const result = await this.callMicroTaskAI(prompt, context, 800);
if (result.success) {
const selections = this.safeParseJSON(result.content, []);
@@ -662,7 +742,6 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
}
});
- // NEW: Add audit entry for tool selection
this.addAuditEntry(context, 'micro-task', 'phase-tool-selection',
{ phase: phase.id, availableTools: phaseTools.length },
{ validSelections: validSelections.length, selectedTools: validSelections.map(s => s.toolName) },
@@ -679,7 +758,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
private async evaluateSpecificTool(context: AnalysisContext, tool: any, rank: number): Promise {
const prompt = getPrompt('toolEvaluation', context.userQuery, tool, rank);
- const result = await this.callMicroTaskAI(prompt, context, 650);
+ const result = await this.callMicroTaskAI(prompt, context, 1200);
if (result.success) {
const evaluation = this.safeParseJSON(result.content, {
@@ -699,7 +778,6 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
}
}, 'evaluation', evaluation.suitability_score);
- // NEW: Add audit entry for tool evaluation
this.addAuditEntry(context, 'micro-task', 'tool-evaluation',
{ toolName: tool.name, rank },
{ suitabilityScore: evaluation.suitability_score, hasExplanation: !!evaluation.detailed_explanation },
@@ -727,7 +805,7 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
const prompt = getPrompt('backgroundKnowledgeSelection', context.userQuery, context.mode, selectedToolNames, availableConcepts);
- const result = await this.callMicroTaskAI(prompt, context, 400);
+ const result = await this.callMicroTaskAI(prompt, context, 700);
if (result.success) {
const selections = this.safeParseJSON(result.content, []);
@@ -740,7 +818,6 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
relevance: sel.relevance
}));
- // NEW: Add audit entry for background knowledge selection
this.addAuditEntry(context, 'micro-task', 'background-knowledge-selection',
{ availableConcepts: availableConcepts.length },
{ selectedConcepts: context.backgroundKnowledge?.length || 0 },
@@ -758,21 +835,19 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
const selectedToolNames = context.selectedTools?.map(st => st.tool.name) || [];
const prompt = getPrompt('finalRecommendations', context.mode === 'workflow', context.userQuery, selectedToolNames);
- const result = await this.callMicroTaskAI(prompt, context, 180);
+ const result = await this.callMicroTaskAI(prompt, context, 350);
return result;
}
- private async callAI(prompt: string, maxTokens: number = 1000): Promise {
+ private async callAI(prompt: string, maxTokens: number = 1500): Promise {
const endpoint = this.config.endpoint;
const apiKey = this.config.apiKey;
const model = this.config.model;
- // Simple headers - add auth only if API key exists
let headers: Record = {
'Content-Type': 'application/json'
};
- // Add authentication if API key is provided
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
console.log('[AI PIPELINE] Using API key authentication');
@@ -780,7 +855,6 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
console.log('[AI PIPELINE] No API key - making request without authentication');
}
- // Simple request body
const requestBody = {
model,
messages: [{ role: 'user', content: prompt }],
@@ -789,7 +863,6 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
};
try {
- // FIXED: Use direct fetch since entire pipeline is already queued at query.ts level
const response = await fetch(`${endpoint}/v1/chat/completions`, {
method: 'POST',
headers,
@@ -823,13 +896,11 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
let completedTasks = 0;
let failedTasks = 0;
- // NEW: Clear any previous temporary audit entries
this.tempAuditEntries = [];
- console.log(`[IMPROVED PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
+ console.log(`[AI PIPELINE] Starting ${mode} query processing with context continuity and audit trail`);
try {
- // Stage 1: Get intelligent candidates (embeddings + AI selection)
const toolsData = await getCompressedToolsDataForAI();
const filteredData = await this.getIntelligentCandidates(userQuery, toolsData, mode);
@@ -841,20 +912,17 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
maxContextLength: this.maxContextTokens,
currentContextLength: 0,
seenToolNames: new Set(),
- // NEW: Initialize audit trail
auditTrail: []
};
- // NEW: Merge any temporary audit entries from pre-context operations
this.mergeTemporaryAuditEntries(context);
- console.log(`[IMPROVED PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
+ console.log(`[AI PIPELINE] Starting micro-tasks with ${filteredData.tools.length} tools visible`);
- // NEW: Add initial audit entry
this.addAuditEntry(context, 'initialization', 'pipeline-start',
{ userQuery, mode, toolsDataLoaded: !!toolsData },
{ candidateTools: filteredData.tools.length, candidateConcepts: filteredData.concepts.length },
- 90, // High confidence for initialization
+ 90,
startTime,
{ auditEnabled: this.auditConfig.enabled }
);
@@ -893,19 +961,15 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
}
}
- // Task 5: Background Knowledge Selection
const knowledgeResult = await this.selectBackgroundKnowledge(context);
if (knowledgeResult.success) completedTasks++; else failedTasks++;
await this.delay(this.microTaskDelay);
- // Task 6: Final Recommendations
const finalResult = await this.generateFinalRecommendations(context);
if (finalResult.success) completedTasks++; else failedTasks++;
- // Build final recommendation
const recommendation = this.buildRecommendation(context, mode, finalResult.content);
- // NEW: Add final audit entry
this.addAuditEntry(context, 'completion', 'pipeline-end',
{ completedTasks, failedTasks },
{ finalRecommendation: !!recommendation, auditEntriesGenerated: context.auditTrail.length },
@@ -925,23 +989,21 @@ ${JSON.stringify(conceptsWithFullData.slice(0, 10), null, 2)}`;
contextContinuityUsed: true
};
- console.log(`[IMPROVED PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
- console.log(`[IMPROVED PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
- console.log(`[IMPROVED PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
+ console.log(`[AI PIPELINE] Completed: ${completedTasks} tasks, Failed: ${failedTasks} tasks`);
+ console.log(`[AI PIPELINE] Unique tools selected: ${context.seenToolNames.size}`);
+ console.log(`[AI PIPELINE] Audit trail entries: ${context.auditTrail.length}`);
return {
recommendation: {
...recommendation,
- // NEW: Include audit trail in response
auditTrail: this.auditConfig.enabled ? context.auditTrail : undefined
},
processingStats
};
} catch (error) {
- console.error('[IMPROVED PIPELINE] Processing failed:', error);
+ console.error('[AI PIPELINE] Processing failed:', error);
- // NEW: Ensure temp audit entries are cleared even on error
this.tempAuditEntries = [];
throw error;
diff --git a/src/utils/dataService.ts b/src/utils/dataService.ts
index 7f6b6a0..c0f8b1d 100644
--- a/src/utils/dataService.ts
+++ b/src/utils/dataService.ts
@@ -77,33 +77,8 @@ interface EnhancedCompressedToolsData {
domains: any[];
phases: any[];
'domain-agnostic-software': any[];
- scenarios?: any[]; // Optional for AI processing
+ scenarios?: any[];
skill_levels: any;
- // Enhanced context for micro-tasks
- domain_relationships: DomainRelationship[];
- phase_dependencies: PhaseDependency[];
- tool_compatibility_matrix: CompatibilityMatrix[];
-}
-
-interface DomainRelationship {
- domain_id: string;
- tool_count: number;
- common_tags: string[];
- skill_distribution: Record;
-}
-
-interface PhaseDependency {
- phase_id: string;
- order: number;
- depends_on: string | null;
- enables: string | null;
- is_parallel_capable: boolean;
- typical_duration: string;
-}
-
-interface CompatibilityMatrix {
- type: string;
- groups: Record;
}
let cachedData: ToolsData | null = null;
@@ -146,104 +121,6 @@ function generateDataVersion(data: any): string {
return Math.abs(hash).toString(36);
}
-// Enhanced: Generate domain relationships for better AI understanding
-function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
- const relationships: DomainRelationship[] = [];
-
- for (const domain of domains) {
- const domainTools = tools.filter(tool =>
- tool.domains && tool.domains.includes(domain.id)
- );
-
- const commonTags = domainTools
- .flatMap(tool => tool.tags || [])
- .reduce((acc: any, tag: string) => {
- acc[tag] = (acc[tag] || 0) + 1;
- return acc;
- }, {});
-
- const topTags = Object.entries(commonTags)
- .sort(([,a], [,b]) => (b as number) - (a as number))
- .slice(0, 5)
- .map(([tag]) => tag);
-
- relationships.push({
- domain_id: domain.id,
- tool_count: domainTools.length,
- common_tags: topTags,
- skill_distribution: domainTools.reduce((acc: any, tool: any) => {
- acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
- return acc;
- }, {})
- });
- }
-
- return relationships;
-}
-
-// Enhanced: Generate phase dependencies
-function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
- const dependencies: PhaseDependency[] = [];
-
- for (let i = 0; i < phases.length; i++) {
- const phase = phases[i];
- const nextPhase = phases[i + 1];
- const prevPhase = phases[i - 1];
-
- dependencies.push({
- phase_id: phase.id,
- order: i + 1,
- depends_on: prevPhase?.id || null,
- enables: nextPhase?.id || null,
- is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
- typical_duration: phase.id === 'data-collection' ? 'hours-days' :
- phase.id === 'examination' ? 'hours-weeks' :
- phase.id === 'analysis' ? 'days-weeks' :
- 'hours-days'
- });
- }
-
- return dependencies;
-}
-
-// Enhanced: Generate tool compatibility matrix
-function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
- const matrix: CompatibilityMatrix[] = [];
-
- // Group tools by common characteristics
- const platformGroups = tools.reduce((acc: any, tool: any) => {
- if (tool.platforms) {
- tool.platforms.forEach((platform: string) => {
- if (!acc[platform]) acc[platform] = [];
- acc[platform].push(tool.name);
- });
- }
- return acc;
- }, {});
-
- const phaseGroups = tools.reduce((acc: any, tool: any) => {
- if (tool.phases) {
- tool.phases.forEach((phase: string) => {
- if (!acc[phase]) acc[phase] = [];
- acc[phase].push(tool.name);
- });
- }
- return acc;
- }, {});
-
- matrix.push({
- type: 'platform_compatibility',
- groups: platformGroups
- });
-
- matrix.push({
- type: 'phase_synergy',
- groups: phaseGroups
- });
-
- return matrix;
-}
-
async function loadRawData(): Promise {
if (!cachedData) {
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
@@ -253,7 +130,6 @@ async function loadRawData(): Promise {
try {
cachedData = ToolsDataSchema.parse(rawData);
- // Enhanced: Add default skill level descriptions if not provided
if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
cachedData.skill_levels = {
novice: "Minimal technical background required, guided interfaces",
@@ -301,21 +177,18 @@ export async function getCompressedToolsDataForAI(): Promise tool.type !== 'concept')
.map(tool => {
const { projectUrl, statusUrl, ...compressedTool } = tool;
return {
...compressedTool,
- // Enhanced: Add computed fields for AI
is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
is_open_source: tool.license && tool.license !== 'Proprietary',
complexity_score: tool.skillLevel === 'expert' ? 5 :
tool.skillLevel === 'advanced' ? 4 :
tool.skillLevel === 'intermediate' ? 3 :
tool.skillLevel === 'beginner' ? 2 : 1,
- // Enhanced: Phase-specific suitability hints
phase_suitability: tool.phases?.map(phase => ({
phase,
primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
@@ -329,7 +202,6 @@ export async function getCompressedToolsDataForAI(): Promise | null = null; // ADD THIS LINE
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
private readonly batchSize: number;
private readonly batchDelay: number;
@@ -42,7 +43,25 @@ class EmbeddingsService {
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
}
+ // REPLACE the existing initialize method with this:
async initialize(): Promise {
+ // If initialization is already in progress, wait for it
+ if (this.initializationPromise) {
+ return this.initializationPromise;
+ }
+
+ // If already initialized, return immediately
+ if (this.isInitialized) {
+ return Promise.resolve();
+ }
+
+ // Start initialization and store the promise
+ this.initializationPromise = this.performInitialization();
+ return this.initializationPromise;
+ }
+
+ // ADD THIS NEW METHOD:
+ private async performInitialization(): Promise {
if (!this.enabled) {
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
return;
@@ -74,9 +93,29 @@ class EmbeddingsService {
} catch (error) {
console.error('[EMBEDDINGS] Failed to initialize:', error);
this.isInitialized = false;
+ throw error;
+ } finally {
+ this.initializationPromise = null;
}
}
+ async waitForInitialization(): Promise {
+ if (!this.enabled) {
+ return Promise.resolve();
+ }
+
+ if (this.isInitialized) {
+ return Promise.resolve();
+ }
+
+ if (this.initializationPromise) {
+ await this.initializationPromise;
+ return;
+ }
+
+ return this.initialize();
+ }
+
private hashData(data: any): string {
return Buffer.from(JSON.stringify(data)).toString('base64').slice(0, 32);
}
@@ -127,7 +166,6 @@ class EmbeddingsService {
'Content-Type': 'application/json'
};
- // API key is optional for Ollama but required for Mistral/OpenAI
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
}
@@ -148,12 +186,10 @@ class EmbeddingsService {
const data = await response.json();
- // Detect Ollama format
if (Array.isArray(data.embeddings)) {
return data.embeddings;
}
- // Detect OpenAI/Mistral format
if (Array.isArray(data.data)) {
return data.data.map((item: any) => item.embedding);
}
@@ -170,7 +206,6 @@ class EmbeddingsService {
const contents = allItems.map(item => this.createContentString(item));
this.embeddings = [];
- // Process in batches to respect rate limits
for (let i = 0; i < contents.length; i += this.batchSize) {
const batch = contents.slice(i, i + this.batchSize);
const batchItems = allItems.slice(i, i + this.batchSize);
@@ -198,7 +233,6 @@ class EmbeddingsService {
});
});
- // Rate limiting delay between batches
if (i + this.batchSize < contents.length) {
await new Promise(resolve => setTimeout(resolve, this.batchDelay));
}
@@ -213,7 +247,6 @@ class EmbeddingsService {
}
public async embedText(text: string): Promise {
- // Re‑use the private batch helper to avoid auth duplication
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
return embedding;
}
@@ -239,25 +272,21 @@ class EmbeddingsService {
}
try {
- // Generate embedding for query
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
const queryEmbedding = queryEmbeddings[0];
console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
- // Calculate similarities - properly typed
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
...item,
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
}));
- // Filter by threshold and sort by similarity (descending - highest first)
const results = similarities
.filter(item => item.similarity >= threshold)
- .sort((a, b) => b.similarity - a.similarity) // CRITICAL: Ensure descending order
+ .sort((a, b) => b.similarity - a.similarity)
.slice(0, maxResults);
- // ENHANCED: Verify ordering is correct
const orderingValid = results.every((item, index) => {
if (index === 0) return true;
return item.similarity <= results[index - 1].similarity;
@@ -270,15 +299,13 @@ class EmbeddingsService {
});
}
- // ENHANCED: Log top results for debugging
console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
if (results.length > 0) {
- console.log('[EMBEDDINGS] Top 5 similarity matches:');
- results.slice(0, 5).forEach((item, idx) => {
+ console.log('[EMBEDDINGS] Top 10 similarity matches:');
+ results.slice(0, 10).forEach((item, idx) => {
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
});
- // Verify first result is indeed the highest
const topSimilarity = results[0].similarity;
const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
if (hasHigherSimilarity) {
diff --git a/src/utils/rateLimitedQueue.ts b/src/utils/rateLimitedQueue.ts
index 3d9b96b..603807a 100644
--- a/src/utils/rateLimitedQueue.ts
+++ b/src/utils/rateLimitedQueue.ts
@@ -157,15 +157,6 @@ class RateLimitedQueue {
return status;
}
- setDelay(ms: number): void {
- if (!Number.isFinite(ms) || ms < 0) return;
- this.delayMs = ms;
- }
-
- getDelay(): number {
- return this.delayMs;
- }
-
private async processQueue(): Promise {
if (this.isProcessing) {
return;
diff --git a/src/utils/toolHelpers.ts b/src/utils/toolHelpers.ts
index 54431b3..47f572f 100644
--- a/src/utils/toolHelpers.ts
+++ b/src/utils/toolHelpers.ts
@@ -1,8 +1,3 @@
-/**
- * CONSOLIDATED Tool utility functions for consistent tool operations across the app
- * Works in both server (Node.js) and client (browser) environments
- */
-
export interface Tool {
name: string;
type?: 'software' | 'method' | 'concept';
@@ -18,10 +13,6 @@ export interface Tool {
related_concepts?: string[];
}
-/**
- * Creates a URL-safe slug from a tool name
- * Used for URLs, IDs, and file names consistently across the app
- */
export function createToolSlug(toolName: string): string {
if (!toolName || typeof toolName !== 'string') {
console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
@@ -35,9 +26,6 @@ export function createToolSlug(toolName: string): string {
.replace(/^-|-$/g, ''); // Remove leading/trailing hyphens
}
-/**
- * Finds a tool by name or slug from tools array
- */
export function findToolByIdentifier(tools: Tool[], identifier: string): Tool | undefined {
if (!identifier || !Array.isArray(tools)) return undefined;
@@ -47,23 +35,9 @@ export function findToolByIdentifier(tools: Tool[], identifier: string): Tool |
);
}
-/**
- * Checks if tool has a valid project URL (hosted on CC24 server)
- */
export function isToolHosted(tool: Tool): boolean {
return tool.projectUrl !== undefined &&
tool.projectUrl !== null &&
tool.projectUrl !== "" &&
tool.projectUrl.trim() !== "";
-}
-
-/**
- * Determines tool category for styling/logic
- */
-export function getToolCategory(tool: Tool): 'concept' | 'method' | 'hosted' | 'oss' | 'proprietary' {
- if (tool.type === 'concept') return 'concept';
- if (tool.type === 'method') return 'method';
- if (isToolHosted(tool)) return 'hosted';
- if (tool.license && tool.license !== 'Proprietary') return 'oss';
- return 'proprietary';
}
\ No newline at end of file