Compare commits

...

51 Commits

Author SHA1 Message Date
overcuriousity
bdee77f459 update color palette, dependencies, fix in tools.yaml 2025-09-10 10:37:14 +02:00
8a6d9d3324 src/data/tools.yaml aktualisiert 2025-09-08 10:22:44 +00:00
overcuriousity
dc9f52fb7c cleanup, prompt centralization 2025-08-29 14:50:11 +02:00
overcuriousity
b17458d153 enhance prompts 2025-08-29 12:53:36 +02:00
overcuriousity
b14ca1d243 fix tool mode ai pipiline logic 2025-08-29 12:27:15 +02:00
overcuriousity
4ee1cc4984 replace nwc 2025-08-23 22:23:33 +02:00
overcuriousity
bbe1b12251 lightning 2025-08-23 11:06:57 +02:00
overcuriousity
d569b74a20 revert 2025-08-23 01:23:52 +02:00
overcuriousity
a2d3d3170a package.json 2025-08-23 01:15:22 +02:00
overcuriousity
3823407d49 fix lightning 2025-08-23 01:03:57 +02:00
overcuriousity
496f2a5b43 fix lightning 2025-08-23 00:43:23 +02:00
overcuriousity
20a4c71d02 lighning tips 2025-08-23 00:30:24 +02:00
overcuriousity
dad5e5ea0c embeddings fix 2025-08-18 01:18:40 +02:00
overcuriousity
b689f24502 fix embeddings enabled 2025-08-18 01:07:46 +02:00
overcuriousity
630fc1643e enabled embeddings by default 2025-08-18 01:03:45 +02:00
overcuriousity
1d750307c4 .env.example 2025-08-18 01:00:41 +02:00
05d957324a Merge pull request 'airefactor' (#19) from airefactor into main
Reviewed-on: #19
2025-08-17 22:59:30 +00:00
overcuriousity
6160620e24 cleanup 2025-08-18 00:57:57 +02:00
overcuriousity
1d91dbf478 audit trail collapsed by default 2025-08-18 00:50:16 +02:00
overcuriousity
76694e003c attempt fix layout 2025-08-18 00:34:29 +02:00
overcuriousity
28af56d6ef fix audit trail 2025-08-18 00:08:57 +02:00
overcuriousity
3d5d2506e9 fix false truncation 2025-08-17 23:45:28 +02:00
overcuriousity
6b09eb062f add switching logic 2025-08-17 23:25:23 +02:00
overcuriousity
70fb012d63 fulldata 2025-08-17 23:18:15 +02:00
overcuriousity
2cb25d1dd6 remove some env vars 2025-08-17 18:17:33 +02:00
overcuriousity
bcd92af8a0 cleanup 2025-08-17 17:27:08 +02:00
overcuriousity
5ecbabea90 some cleanup 2025-08-17 17:20:54 +02:00
overcuriousity
07c8f707df audit trail detail, dupes detector 2025-08-17 16:55:02 +02:00
overcuriousity
e63ec367a5 audit trail detail 2025-08-17 16:30:58 +02:00
overcuriousity
5c3c308225 audit trail details 2025-08-17 15:45:40 +02:00
overcuriousity
dd26d45a21 layout fixes 2025-08-17 12:09:40 +02:00
overcuriousity
afbd8d2cd3 restore old after-confidence-scoring 2025-08-17 11:45:53 +02:00
overcuriousity
8bba0eefa9 unify styles 2025-08-17 11:11:26 +02:00
overcuriousity
170638a5fa update audit trail detail level 2025-08-17 10:52:48 +02:00
overcuriousity
c60730b4aa add back download btn 2025-08-17 00:01:30 +02:00
overcuriousity
b9964685f9 bugfix 2025-08-16 23:52:59 +02:00
overcuriousity
5d72549bb7 cleanup 2025-08-16 23:35:14 +02:00
overcuriousity
15d302031e improvements & cleanup 2025-08-16 23:27:55 +02:00
overcuriousity
48209c4639 finalize phase 3 2025-08-16 22:32:23 +02:00
overcuriousity
6d08dbdcd0 phase2 2025-08-16 22:08:02 +02:00
overcuriousity
77f09ed399 phase 2 2025-08-16 22:03:40 +02:00
overcuriousity
0c7c502b03 first iteration - buggy 2025-08-16 18:15:20 +02:00
overcuriousity
1d98dd3257 cleanup 2025-08-16 17:11:03 +02:00
overcuriousity
3ad0d8120a content 2025-08-15 22:59:12 +02:00
overcuriousity
88cf682790 small changes 2025-08-15 22:43:40 +02:00
overcuriousity
182b9d01f9 remove content 2025-08-14 23:01:03 +02:00
overcuriousity
12368ed7c8 content 2025-08-14 22:56:15 +02:00
overcuriousity
c4c52f6064 cleanup 2025-08-13 15:27:14 +02:00
overcuriousity
e93f394263 update readme files 2025-08-13 15:10:20 +02:00
overcuriousity
75410e2b84 Merge branch 'main' of https://git.cc24.dev/mstoeck3/forensic-pathways 2025-08-13 14:04:20 +02:00
overcuriousity
88e79d7780 update video embed 2025-08-13 14:04:08 +02:00
51 changed files with 198645 additions and 193050 deletions

View File

@@ -1,5 +1,5 @@
{
"_variables": {
"lastUpdateCheck": 1754571688630
"lastUpdateCheck": 1755901660216
}
}

View File

@@ -59,8 +59,7 @@ FORENSIC_AUDIT_RETENTION_HOURS=24
FORENSIC_AUDIT_MAX_ENTRIES=50
# === AI SEMANTIC SEARCH ===
# Enable semantic search (highly recommended for better results)
AI_EMBEDDINGS_ENABLED=true
# semantic search
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
AI_EMBEDDINGS_API_KEY=your-embeddings-api-key-here
AI_EMBEDDINGS_MODEL=mistral-embed
@@ -68,26 +67,6 @@ AI_EMBEDDINGS_MODEL=mistral-embed
# User rate limiting (queries per minute)
AI_RATE_LIMIT_MAX_REQUESTS=4
# ============================================================================
# 🎥 VIDEO EMBEDDING - PRODUCTION CONFIGURATION
# ============================================================================
# Enable local caching of Nextcloud videos (highly recommended)
VIDEO_CACHE_ENABLED=true
# Directory for cached videos (ensure it's writable and has sufficient space)
# This directory will grow over time as videos are cached permanently
VIDEO_CACHE_DIR=./cache/videos
# Emergency cleanup threshold in MB - videos are cached indefinitely
# Only triggers cleanup when approaching this limit to prevent disk full
# Recommended: 2000MB (2GB) for small deployments, 5000MB+ for larger ones
VIDEO_CACHE_MAX_SIZE=2000
# Maximum individual video file size for caching in MB
# Videos larger than this will stream directly without caching
VIDEO_MAX_SIZE=200
# ============================================================================
# CACHING BEHAVIOR
# ============================================================================
@@ -121,17 +100,11 @@ AI_SOFTWARE_SELECTION_RATIO=0.5 # 50% software tools (increase for more tool re
# AI selection limits
AI_MAX_SELECTED_ITEMS=25
AI_MAX_TOOLS_TO_ANALYZE=20
AI_MAX_CONCEPTS_TO_ANALYZE=10
# Efficiency thresholds
AI_EMBEDDINGS_MIN_TOOLS=8
AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75
# Fallback limits when embeddings are disabled
AI_NO_EMBEDDINGS_TOOL_LIMIT=25
AI_NO_EMBEDDINGS_CONCEPT_LIMIT=10
# === Rate Limiting & Timing ===
AI_MICRO_TASK_TOTAL_LIMIT=30
AI_MICRO_TASK_DELAY_MS=500
@@ -141,10 +114,6 @@ AI_RATE_LIMIT_DELAY_MS=2000
AI_EMBEDDINGS_BATCH_SIZE=10
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
# === Context Management ===
AI_MAX_CONTEXT_TOKENS=4000
AI_MAX_PROMPT_TOKENS=2500
# === Confidence Scoring ===
CONFIDENCE_SEMANTIC_WEIGHT=0.5
CONFIDENCE_SUITABILITY_WEIGHT=0.5

View File

@@ -250,10 +250,26 @@ Ihr Artikel-Inhalt hier...
2. Installer ausführen
3. Einstellungen konfigurieren
## Video-Demonstration
<video src="/videos/setup-tutorial.mp4" title="Setup-Tutorial" controls></video>
## Häufige Probleme
Lösungen für typische Probleme...
```
### Video-Integration
Knowledgebase-Artikel unterstützen eingebettete Videos für praktische Demonstrationen:
```html
<video src="/videos/demo.mp4" title="Tool-Demonstration" controls></video>
```
**Wichtige Hinweise**:
- Videos müssen manuell in `public/videos/` bereitgestellt werden (nicht im Git-Repository enthalten)
- Firefox-kompatible Formate verwenden (MP4 H.264, WebM VP9)
- Detaillierte Video-Dokumentation: siehe `src/content/knowledgebase/README.md`
### Artikel-Struktur-Richtlinien
**Erforderliche Felder**:

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,83 @@
{
"toolsYamlPath": "./src/data/tools.yaml",
"models": [
{
"name": "granite-embedding:278m",
"type": "ollama",
"endpoint": "http://192.168.178.100:11434/api/embeddings",
"rateLimit": false,
"contextSize": 512
},
{
"name": "paraphrase-multilingual:latest",
"type": "ollama",
"endpoint": "http://192.168.178.100:11434/api/embeddings",
"rateLimit": false,
"contextSize": 128
},
{
"name": "bge-large:latest",
"type": "ollama",
"endpoint": "http://192.168.178.100:11434/api/embeddings",
"rateLimit": false,
"contextSize": 512
},
{
"name": "snowflake-arctic-embed2:latest",
"type": "ollama",
"endpoint": "http://192.168.178.100:11434/api/embeddings",
"rateLimit": false,
"contextSize": 8192
},
{
"name": "snowflake-arctic-embed:latest",
"type": "ollama",
"endpoint": "http://192.168.178.100:11434/api/embeddings",
"rateLimit": false,
"contextSize": 512
},
{
"name": "all-minilm:latest",
"type": "ollama",
"endpoint": "http://192.168.178.100:11434/api/embeddings",
"rateLimit": false,
"contextSize": 256
},
{
"name": "bge-m3:latest",
"type": "ollama",
"endpoint": "http://192.168.178.100:11434/api/embeddings",
"rateLimit": false,
"contextSize": 8192
},
{
"name": "mxbai-embed-large:latest",
"type": "ollama",
"endpoint": "http://192.168.178.100:11434/api/embeddings",
"rateLimit": false,
"contextSize": 512
},
{
"name": "nomic-embed-text:latest",
"type": "ollama",
"endpoint": "http://192.168.178.100:11434/api/embeddings",
"rateLimit": false,
"contextSize": 2048
},
{
"name": "mistral-embed",
"type": "mistral",
"endpoint": "https://api.mistral.ai/v1/embeddings",
"apiKey": "${AI_EMBEDDINGS_API_KEY}",
"rateLimit": true,
"rateLimitDelayMs": 2000,
"contextSize": 8192
}
],
"testSettings": {
"maxToolsPerCategory": 6,
"maxNegativeExamples": 4,
"contextSizeTests": true,
"performanceIterations": 3
}
}

897
embeddings-comparison.js Normal file
View File

@@ -0,0 +1,897 @@
#!/usr/bin/env node
// efficient-embedding-comparison.js
// Proper embedding model evaluation with batch processing and vector search
// Run with: node efficient-embedding-comparison.js --config=config.json
import fs from 'fs/promises';
import yaml from 'js-yaml';
import path from 'path';
import crypto from 'crypto';
class EmbeddingCache {
constructor(cacheDir = './embedding-cache') {
this.cacheDir = cacheDir;
}
async ensureCacheDir() {
try {
await fs.access(this.cacheDir);
} catch {
await fs.mkdir(this.cacheDir, { recursive: true });
}
}
getCacheKey(model, text) {
const content = `${model.name}:${text}`;
return crypto.createHash('md5').update(content).digest('hex');
}
async getCachedEmbedding(model, text) {
await this.ensureCacheDir();
const key = this.getCacheKey(model, text);
const cachePath = path.join(this.cacheDir, `${key}.json`);
try {
const data = await fs.readFile(cachePath, 'utf8');
return JSON.parse(data);
} catch {
return null;
}
}
async setCachedEmbedding(model, text, embedding) {
await this.ensureCacheDir();
const key = this.getCacheKey(model, text);
const cachePath = path.join(this.cacheDir, `${key}.json`);
await fs.writeFile(cachePath, JSON.stringify(embedding));
}
async getCacheStats(model) {
await this.ensureCacheDir();
const files = await fs.readdir(this.cacheDir);
const modelFiles = files.filter(f => f.includes(model.name.replace(/[^a-zA-Z0-9]/g, '_')));
return { cached: modelFiles.length, total: files.length };
}
}
class SearchEvaluator {
constructor() {
this.cache = new EmbeddingCache();
}
async rateLimitedDelay(model) {
if (model.rateLimit && model.rateLimitDelayMs) {
await new Promise(resolve => setTimeout(resolve, model.rateLimitDelayMs));
}
}
async getEmbedding(text, model) {
// Check cache first
const cached = await this.cache.getCachedEmbedding(model, text);
if (cached) return cached;
const headers = { 'Content-Type': 'application/json' };
let body, endpoint;
if (model.type === 'mistral') {
if (model.apiKey) {
headers['Authorization'] = `Bearer ${model.apiKey.replace('${AI_EMBEDDINGS_API_KEY}', process.env.AI_EMBEDDINGS_API_KEY || '')}`;
}
body = { model: model.name, input: [text] };
endpoint = model.endpoint;
} else {
body = { model: model.name, prompt: text };
endpoint = model.endpoint;
}
try {
const response = await fetch(endpoint, {
method: 'POST',
headers,
body: JSON.stringify(body)
});
if (!response.ok) {
if (response.status === 429 && model.rateLimit) {
console.log(` ⚠️ Rate limited, waiting...`);
await new Promise(resolve => setTimeout(resolve, 10000));
return this.getEmbedding(text, model);
}
throw new Error(`API error ${response.status}: ${await response.text()}`);
}
const data = await response.json();
const embedding = model.type === 'mistral' ? data.data[0].embedding : data.embedding;
// Cache the result
await this.cache.setCachedEmbedding(model, text, embedding);
return embedding;
} catch (error) {
console.error(`❌ Failed to get embedding: ${error.message}`);
throw error;
}
}
constructToolText(item, maxLength = null) {
if (typeof item === 'string') {
// Even for string inputs, don't truncate to match real app behavior
return item.toLowerCase();
}
// EXACT match to embeddings.ts createContentString() - NO TRUNCATION
const parts = [
item.name,
item.description || '',
...(item.tags || []),
...(item.domains || []),
...(item.phases || [])
];
const contentString = parts.filter(Boolean).join(' ').toLowerCase();
// CRITICAL: No truncation! Return full content like real app
return contentString;
}
calculateOptimalBatchSize(model) {
// Factors that ACTUALLY matter for batching individual API calls:
// 1. Rate limiting aggressiveness
if (model.rateLimit && model.rateLimitDelayMs > 2000) {
return 5; // Conservative batching for heavily rate-limited APIs
}
// 2. API latency expectations
if (model.type === 'ollama') {
return 15; // Local APIs are fast, can handle larger batches
} else if (model.type === 'mistral') {
return 10; // Remote APIs might be slower, medium batches
}
// 3. Progress reporting frequency preference
// For 185 tools:
// - Batch size 10 = 19 progress updates
// - Batch size 15 = 13 progress updates
// - Batch size 20 = 10 progress updates
return 15; // Good balance for ~13 progress updates
}
async createBatchEmbeddings(items, model) {
const batchSize = this.calculateOptimalBatchSize(model);
const contextSize = model.contextSize || 2000; // Only for display/info
console.log(` 📦 Creating embeddings for ${items.length} items`);
console.log(` 📏 Model context: ${contextSize} chars (for reference - NOT truncating)`);
console.log(` 📋 Batch size: ${batchSize} (for progress reporting)`);
const embeddings = new Map();
let apiCalls = 0;
let cacheHits = 0;
const totalBatches = Math.ceil(items.length / batchSize);
for (let i = 0; i < items.length; i += batchSize) {
const batch = items.slice(i, i + batchSize);
const batchNum = Math.floor(i/batchSize) + 1;
console.log(` 📋 Processing batch ${batchNum}/${totalBatches} (${batch.length} tools)`);
for (const item of batch) {
// Get FULL content (no truncation)
const text = this.constructToolText(item);
// Show actual text length for first few tools (full length!)
if (i < batchSize && batch.indexOf(item) < 3) {
const truncatedDisplay = text.length > 100 ? text.slice(0, 100) + '...' : text;
console.log(` 📝 ${item.name}: ${text.length} chars (full) - "${truncatedDisplay}"`);
}
try {
const embedding = await this.getEmbedding(text, model);
embeddings.set(item.id || item.name || text, {
text,
embedding,
metadata: item
});
const cached = await this.cache.getCachedEmbedding(model, text);
if (cached) cacheHits++; else apiCalls++;
await this.rateLimitedDelay(model);
} catch (error) {
console.warn(` ⚠️ Failed to embed: ${item.name || text.slice(0, 50)}...`);
// Log the error for debugging
if (text.length > 8000) {
console.warn(` 📏 Text was ${text.length} chars - may exceed model limits`);
}
}
}
}
// Show content length statistics
const lengths = Array.from(embeddings.values()).map(e => e.text.length);
const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
const maxLength = Math.max(...lengths);
const minLength = Math.min(...lengths);
console.log(` 📊 Content stats: avg ${avgLength.toFixed(0)} chars, range ${minLength}-${maxLength} chars`);
console.log(` ✅ Created ${embeddings.size} embeddings (${apiCalls} API calls, ${cacheHits} cache hits)`);
return embeddings;
}
cosineSimilarity(a, b) {
if (!a || !b || a.length === 0 || b.length === 0) return 0;
let dotProduct = 0;
let normA = 0;
let normB = 0;
const minLength = Math.min(a.length, b.length);
for (let i = 0; i < minLength; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
if (normA === 0 || normB === 0) return 0;
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}
searchSimilar(queryEmbedding, toolEmbeddings, topK = 10) {
const similarities = [];
for (const [id, data] of toolEmbeddings) {
const similarity = this.cosineSimilarity(queryEmbedding, data.embedding);
similarities.push({
id,
similarity,
metadata: data.metadata,
text: data.text
});
}
return similarities
.sort((a, b) => b.similarity - a.similarity)
.slice(0, topK);
}
calculateRetrievalMetrics(results, relevantIds, k = 10) {
const topK = results.slice(0, k);
const retrievedIds = new Set(topK.map(r => r.id));
const relevantSet = new Set(relevantIds);
// Precision@K
const relevantRetrieved = topK.filter(r => relevantSet.has(r.id)).length;
const precisionAtK = topK.length > 0 ? relevantRetrieved / topK.length : 0;
// Recall@K
const recallAtK = relevantIds.length > 0 ? relevantRetrieved / relevantIds.length : 0;
// F1@K
const f1AtK = (precisionAtK + recallAtK) > 0 ?
2 * (precisionAtK * recallAtK) / (precisionAtK + recallAtK) : 0;
// Mean Reciprocal Rank (MRR)
let mrr = 0;
for (let i = 0; i < topK.length; i++) {
if (relevantSet.has(topK[i].id)) {
mrr = 1 / (i + 1);
break;
}
}
// NDCG@K (simplified binary relevance)
let dcg = 0;
let idcg = 0;
for (let i = 0; i < k; i++) {
const rank = i + 1;
const discount = Math.log2(rank + 1);
// DCG
if (i < topK.length && relevantSet.has(topK[i].id)) {
dcg += 1 / discount;
}
// IDCG (ideal ranking)
if (i < relevantIds.length) {
idcg += 1 / discount;
}
}
const ndcgAtK = idcg > 0 ? dcg / idcg : 0;
return {
precisionAtK,
recallAtK,
f1AtK,
mrr,
ndcgAtK,
relevantRetrieved,
totalRelevant: relevantIds.length
};
}
}
class EfficientEmbeddingComparison {
constructor(configPath = './embedding-test-config.json') {
this.configPath = configPath;
this.config = null;
this.tools = [];
this.evaluator = new SearchEvaluator();
// Test queries tailored to the actual tools.yaml content
this.testQueries = [
{
query: "memory forensics RAM analysis",
keywords: ["memory", "forensics", "volatility", "ram", "dump", "analysis"],
category: "memory_analysis"
},
{
query: "network packet capture traffic analysis",
keywords: ["network", "packet", "pcap", "wireshark", "traffic", "capture"],
category: "network_analysis"
},
{
query: "malware reverse engineering binary analysis",
keywords: ["malware", "reverse", "engineering", "ghidra", "binary", "disassemble"],
category: "malware_analysis"
},
{
query: "digital forensics disk imaging",
keywords: ["forensics", "disk", "imaging", "autopsy", "investigation", "evidence"],
category: "disk_forensics"
},
{
query: "incident response threat hunting",
keywords: ["incident", "response", "threat", "hunting", "investigation", "compromise"],
category: "incident_response"
},
{
query: "mobile device smartphone forensics",
keywords: ["mobile", "smartphone", "android", "ios", "device", "cellebrite"],
category: "mobile_forensics"
},
{
query: "timeline analysis event correlation",
keywords: ["timeline", "analysis", "correlation", "events", "plaso", "timesketch"],
category: "timeline_analysis"
},
{
query: "registry analysis windows artifacts",
keywords: ["registry", "windows", "artifacts", "regripper", "hives", "keys"],
category: "registry_analysis"
},
{
query: "cloud forensics container analysis",
keywords: ["cloud", "container", "docker", "virtualization", "aws", "azure"],
category: "cloud_forensics"
},
{
query: "blockchain cryptocurrency investigation",
keywords: ["blockchain", "cryptocurrency", "bitcoin", "chainalysis", "transaction"],
category: "blockchain_analysis"
}
];
console.log('[INIT] Efficient embedding comparison initialized');
}
async loadConfig() {
try {
const configData = await fs.readFile(this.configPath, 'utf8');
this.config = JSON.parse(configData);
console.log(`[CONFIG] Loaded ${this.config.models.length} models`);
} catch (error) {
console.error('[CONFIG] Failed to load configuration:', error.message);
throw error;
}
}
async loadTools() {
try {
const yamlContent = await fs.readFile(this.config.toolsYamlPath, 'utf8');
const data = yaml.load(yamlContent);
// Extract tools (flexible - handle different YAML structures)
this.tools = data.tools || data.entries || data.applications || data;
if (!Array.isArray(this.tools)) {
this.tools = Object.values(this.tools);
}
// Filter out concepts and ensure required fields
this.tools = this.tools.filter(tool =>
tool &&
tool.type !== 'concept' &&
(tool.name || tool.title) &&
(tool.description || tool.summary)
);
// Normalize tool structure
this.tools = this.tools.map((tool, index) => ({
id: tool.id || tool.name || tool.title || `tool_${index}`,
name: tool.name || tool.title,
description: tool.description || tool.summary || '',
tags: tool.tags || [],
domains: tool.domains || tool.categories || [],
phases: tool.phases || [],
platforms: tool.platforms || [],
type: tool.type || 'tool',
skillLevel: tool.skillLevel,
license: tool.license
}));
console.log(`[DATA] Loaded ${this.tools.length} tools from ${this.config.toolsYamlPath}`);
// Show some statistics
const domainCounts = {};
const tagCounts = {};
this.tools.forEach(tool => {
(tool.domains || []).forEach(domain => {
domainCounts[domain] = (domainCounts[domain] || 0) + 1;
});
(tool.tags || []).forEach(tag => {
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
});
});
const topDomains = Object.entries(domainCounts)
.sort(([,a], [,b]) => b - a)
.slice(0, 5)
.map(([domain, count]) => `${domain}(${count})`)
.join(', ');
console.log(`[DATA] Top domains: ${topDomains}`);
console.log(`[DATA] Sample tools: ${this.tools.slice(0, 3).map(t => t.name).join(', ')}`);
if (this.tools.length === 0) {
throw new Error('No valid tools found in YAML file');
}
} catch (error) {
console.error('[DATA] Failed to load tools:', error.message);
throw error;
}
}
findRelevantTools(query) {
const queryLower = query.query.toLowerCase();
const keywords = query.keywords.map(k => k.toLowerCase());
const relevantTools = this.tools.filter(tool => {
// Build searchable text from all tool metadata
const searchableFields = [
tool.name || '',
tool.description || '',
(tool.tags || []).join(' '),
(tool.domains || []).join(' '),
(tool.phases || []).join(' '),
(tool.platforms || []).join(' ')
];
const toolText = searchableFields.join(' ').toLowerCase();
// Check for keyword matches
const hasKeywordMatch = keywords.some(keyword => toolText.includes(keyword));
// Check for query word matches (words longer than 3 chars)
const queryWords = queryLower.split(' ').filter(word => word.length > 3);
const hasQueryWordMatch = queryWords.some(word => toolText.includes(word));
// Check for domain-specific matches
const isDomainRelevant = query.category && tool.domains &&
tool.domains.some(domain => domain.includes(query.category.replace('_', '-')));
return hasKeywordMatch || hasQueryWordMatch || isDomainRelevant;
});
console.log(` 🎯 Found ${relevantTools.length} relevant tools for "${query.query}"`);
// Log some examples for debugging
if (relevantTools.length > 0) {
console.log(` 📋 Examples: ${relevantTools.slice(0, 3).map(t => t.name).join(', ')}`);
}
return relevantTools.map(tool => tool.id || tool.name);
}
async testSearchPerformance(model) {
console.log(` 🔍 Testing search performance...`);
// Create embeddings for all tools
const toolEmbeddings = await this.evaluator.createBatchEmbeddings(this.tools, model);
const results = [];
let totalApiCalls = 0;
for (const testQuery of this.testQueries) {
console.log(` 📋 Query: "${testQuery.query}"`);
// Get query embedding
const queryEmbedding = await this.evaluator.getEmbedding(testQuery.query, model);
totalApiCalls++;
await this.evaluator.rateLimitedDelay(model);
// Find relevant tools for this query
const relevantIds = this.findRelevantTools(testQuery);
console.log(` 📊 Found ${relevantIds.length} relevant tools`);
if (relevantIds.length === 0) {
console.log(` ⚠️ No relevant tools found, skipping metrics calculation`);
continue;
}
// Perform search
const searchResults = this.evaluator.searchSimilar(queryEmbedding, toolEmbeddings, 20);
// Calculate metrics for different k values
const metrics = {};
for (const k of [1, 3, 5, 10]) {
metrics[`k${k}`] = this.evaluator.calculateRetrievalMetrics(searchResults, relevantIds, k);
}
results.push({
query: testQuery.query,
category: testQuery.category,
relevantCount: relevantIds.length,
searchResults: searchResults.slice(0, 5), // Top 5 for display
metrics
});
// Display results
console.log(` 🎯 Top results:`);
searchResults.slice(0, 3).forEach((result, i) => {
const isRelevant = relevantIds.includes(result.id) ? '✓' : '✗';
console.log(` ${i+1}. ${isRelevant} ${result.metadata.name} (${(result.similarity*100).toFixed(1)}%)`);
});
console.log(` 📈 P@5: ${(metrics.k5.precisionAtK*100).toFixed(1)}% | R@5: ${(metrics.k5.recallAtK*100).toFixed(1)}% | NDCG@5: ${(metrics.k5.ndcgAtK*100).toFixed(1)}%`);
}
return { results, totalApiCalls };
}
async testSemanticUnderstanding(model) {
console.log(` 🧠 Testing semantic understanding...`);
const semanticTests = [
{
primary: "memory forensics",
synonyms: ["RAM analysis", "volatile memory examination", "memory dump investigation"],
unrelated: ["file compression", "web browser", "text editor"]
},
{
primary: "network analysis",
synonyms: ["packet inspection", "traffic monitoring", "protocol analysis"],
unrelated: ["image editing", "music player", "calculator"]
},
{
primary: "malware detection",
synonyms: ["virus scanning", "threat identification", "malicious code analysis"],
unrelated: ["video converter", "password manager", "calendar app"]
}
];
let totalCorrect = 0;
let totalTests = 0;
let apiCalls = 0;
for (const test of semanticTests) {
console.log(` 🔤 Testing: "${test.primary}"`);
const primaryEmbedding = await this.evaluator.getEmbedding(test.primary, model);
apiCalls++;
await this.evaluator.rateLimitedDelay(model);
// Test synonyms (should be similar)
for (const synonym of test.synonyms) {
const synonymEmbedding = await this.evaluator.getEmbedding(synonym, model);
apiCalls++;
const synonymSim = this.evaluator.cosineSimilarity(primaryEmbedding, synonymEmbedding);
console.log(` ✓ "${synonym}": ${(synonymSim*100).toFixed(1)}%`);
await this.evaluator.rateLimitedDelay(model);
}
// Test unrelated terms (should be dissimilar)
for (const unrelated of test.unrelated) {
const unrelatedEmbedding = await this.evaluator.getEmbedding(unrelated, model);
apiCalls++;
const unrelatedSim = this.evaluator.cosineSimilarity(primaryEmbedding, unrelatedEmbedding);
console.log(` ✗ "${unrelated}": ${(unrelatedSim*100).toFixed(1)}%`);
await this.evaluator.rateLimitedDelay(model);
}
// Calculate semantic coherence
const avgSynonymSim = await this.calculateAvgSimilarity(primaryEmbedding, test.synonyms, model);
const avgUnrelatedSim = await this.calculateAvgSimilarity(primaryEmbedding, test.unrelated, model);
const isCorrect = avgSynonymSim > avgUnrelatedSim;
if (isCorrect) totalCorrect++;
totalTests++;
console.log(` 📊 Synonyms: ${(avgSynonymSim*100).toFixed(1)}% | Unrelated: ${(avgUnrelatedSim*100).toFixed(1)}% ${isCorrect ? '✓' : '✗'}`);
}
return {
accuracy: totalCorrect / totalTests,
correctTests: totalCorrect,
totalTests,
apiCalls
};
}
async calculateAvgSimilarity(baseEmbedding, terms, model) {
let totalSim = 0;
for (const term of terms) {
const embedding = await this.evaluator.getEmbedding(term, model);
const sim = this.evaluator.cosineSimilarity(baseEmbedding, embedding);
totalSim += sim;
await this.evaluator.rateLimitedDelay(model);
}
return totalSim / terms.length;
}
async benchmarkPerformance(model) {
console.log(` ⚡ Benchmarking performance...`);
const testTexts = this.tools.slice(0, 10).map(tool => `${tool.name} ${tool.description}`.slice(0, 500));
const times = [];
let apiCalls = 0;
console.log(` 🏃 Processing ${testTexts.length} texts...`);
for (const text of testTexts) {
const start = Date.now();
await this.evaluator.getEmbedding(text, model);
const time = Date.now() - start;
times.push(time);
apiCalls++;
await this.evaluator.rateLimitedDelay(model);
}
const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
const minTime = Math.min(...times);
const maxTime = Math.max(...times);
console.log(` 📊 Avg: ${avgTime.toFixed(0)}ms | Min: ${minTime}ms | Max: ${maxTime}ms`);
return {
avgLatency: avgTime,
minLatency: minTime,
maxLatency: maxTime,
throughput: 1000 / avgTime, // requests per second
apiCalls
};
}
async testModel(model) {
console.log(`\n🧪 Testing ${model.name} (${model.type})...`);
const startTime = Date.now();
let totalApiCalls = 0;
try {
// 1. Search Performance Testing
const searchResults = await this.testSearchPerformance(model);
totalApiCalls += searchResults.totalApiCalls;
// 2. Semantic Understanding Testing
const semanticResults = await this.testSemanticUnderstanding(model);
totalApiCalls += semanticResults.apiCalls;
// 3. Performance Benchmarking
const perfResults = await this.benchmarkPerformance(model);
totalApiCalls += perfResults.apiCalls;
const totalTime = Date.now() - startTime;
console.log(`${model.name} completed in ${(totalTime/1000).toFixed(1)}s (${totalApiCalls} API calls)`);
return {
searchPerformance: searchResults.results,
semanticUnderstanding: semanticResults,
performance: perfResults,
totalTime,
totalApiCalls
};
} catch (error) {
console.error(`${model.name} failed:`, error.message);
throw error;
}
}
calculateOverallScore(results) {
// Calculate average metrics across all queries
const searchMetrics = results.searchPerformance.filter(r => r.metrics && Object.keys(r.metrics).length > 0);
if (searchMetrics.length === 0) {
console.warn('⚠️ No search metrics available for scoring - may indicate relevance matching issues');
return {
overall: 0,
components: {
precision5: 0,
recall5: 0,
ndcg5: 0,
mrr: 0,
semanticAccuracy: results.semanticUnderstanding?.accuracy || 0,
throughput: results.performance?.throughput || 0
},
warning: 'No search metrics available'
};
}
console.log(`📊 Calculating score from ${searchMetrics.length} valid search results`);
const avgPrecision5 = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.precisionAtK || 0), 0) / searchMetrics.length;
const avgRecall5 = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.recallAtK || 0), 0) / searchMetrics.length;
const avgNDCG5 = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.ndcgAtK || 0), 0) / searchMetrics.length;
const avgMRR = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.mrr || 0), 0) / searchMetrics.length;
const semanticAccuracy = results.semanticUnderstanding?.accuracy || 0;
const throughput = results.performance?.throughput || 0;
// Weighted overall score
const weights = {
precision: 0.25,
recall: 0.25,
ndcg: 0.20,
semantic: 0.20,
speed: 0.10
};
const normalizedThroughput = Math.min(throughput / 10, 1); // Normalize to 0-1 (10 req/s = 1.0)
const overall = (
avgPrecision5 * weights.precision +
avgRecall5 * weights.recall +
avgNDCG5 * weights.ndcg +
semanticAccuracy * weights.semantic +
normalizedThroughput * weights.speed
);
return {
overall,
components: {
precision5: avgPrecision5,
recall5: avgRecall5,
ndcg5: avgNDCG5,
mrr: avgMRR,
semanticAccuracy,
throughput
}
};
}
printResults(modelResults) {
console.log(`\n${'='.repeat(80)}`);
console.log("🏆 EFFICIENT EMBEDDING MODEL COMPARISON RESULTS");
console.log(`${'='.repeat(80)}`);
const scores = modelResults.map(mr => ({
model: mr.model,
score: this.calculateOverallScore(mr.results),
results: mr.results
})).sort((a, b) => b.score.overall - a.score.overall);
console.log(`\n🥇 OVERALL RANKINGS:`);
scores.forEach((score, index) => {
console.log(` ${index + 1}. ${score.model.name}: ${(score.score.overall * 100).toFixed(1)}% overall`);
});
console.log(`\n📊 DETAILED METRICS:`);
console.log(`\n 🎯 Search Performance (Precision@5):`);
scores.forEach(score => {
console.log(` ${score.model.name}: ${(score.score.components.precision5 * 100).toFixed(1)}%`);
});
console.log(`\n 🔍 Search Performance (Recall@5):`);
scores.forEach(score => {
console.log(` ${score.model.name}: ${(score.score.components.recall5 * 100).toFixed(1)}%`);
});
console.log(`\n 📈 Search Quality (NDCG@5):`);
scores.forEach(score => {
console.log(` ${score.model.name}: ${(score.score.components.ndcg5 * 100).toFixed(1)}%`);
});
console.log(`\n 🧠 Semantic Understanding:`);
scores.forEach(score => {
console.log(` ${score.model.name}: ${(score.score.components.semanticAccuracy * 100).toFixed(1)}%`);
});
console.log(`\n ⚡ Performance (req/s):`);
scores.forEach(score => {
console.log(` ${score.model.name}: ${score.score.components.throughput.toFixed(1)} req/s`);
});
// Winner analysis
const winner = scores[0];
console.log(`\n🏆 WINNER: ${winner.model.name}`);
console.log(` Overall Score: ${(winner.score.overall * 100).toFixed(1)}%`);
console.log(` Best for: ${this.getBestUseCase(winner.score.components)}`);
// Summary stats
const totalQueries = modelResults[0]?.results.searchPerformance.length || 0;
const totalTools = this.tools.length;
console.log(`\n📋 Test Summary:`);
console.log(` Tools tested: ${totalTools}`);
console.log(` Search queries: ${totalQueries}`);
console.log(` Models compared: ${scores.length}`);
console.log(` Total API calls: ${modelResults.reduce((sum, mr) => sum + mr.results.totalApiCalls, 0)}`);
}
getBestUseCase(components) {
const strengths = [];
if (components.precision5 > 0.7) strengths.push("High precision");
if (components.recall5 > 0.7) strengths.push("High recall");
if (components.semanticAccuracy > 0.8) strengths.push("Semantic understanding");
if (components.throughput > 5) strengths.push("High performance");
return strengths.length > 0 ? strengths.join(", ") : "General purpose";
}
async run() {
try {
console.log("🚀 EFFICIENT EMBEDDING MODEL COMPARISON");
console.log("=====================================");
await this.loadConfig();
await this.loadTools();
console.log(`\n📋 Test Overview:`);
console.log(` Models: ${this.config.models.length}`);
console.log(` Tools: ${this.tools.length}`);
console.log(` Search queries: ${this.testQueries.length}`);
console.log(` Cache: ${this.evaluator.cache.cacheDir}`);
const modelResults = [];
for (const model of this.config.models) {
try {
const results = await this.testModel(model);
modelResults.push({ model, results });
} catch (error) {
console.error(`❌ Skipping ${model.name}: ${error.message}`);
}
}
if (modelResults.length === 0) {
throw new Error('No models completed testing successfully');
}
this.printResults(modelResults);
} catch (error) {
console.error('\n❌ Test failed:', error.message);
console.log('\nDebugging steps:');
console.log('1. Verify tools.yaml exists and contains valid tool data');
console.log('2. Check model endpoints are accessible');
console.log('3. For Ollama: ensure models are pulled and ollama serve is running');
console.log('4. For Mistral: verify AI_EMBEDDINGS_API_KEY environment variable');
}
}
}
// Execute
const configArg = process.argv.find(arg => arg.startsWith('--config='));
const configPath = configArg ? configArg.split('=')[1] : './embedding-test-config.json';
(async () => {
const comparison = new EfficientEmbeddingComparison(configPath);
await comparison.run();
})().catch(console.error);

333
find-duplicates.mjs Normal file
View File

@@ -0,0 +1,333 @@
#!/usr/bin/env node
// find-duplicate-functions.mjs
// Usage:
// node find-duplicate-functions.mjs [rootDir] [--mode exact|struct] [--min-lines N] [--json]
// Example:
// node find-duplicate-functions.mjs . --mode struct --min-lines 3
import fs from "fs";
import path from "path";
import * as url from "url";
import ts from "typescript";
const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
/** -------- CLI OPTIONS -------- */
const args = process.argv.slice(2);
let rootDir = ".";
let mode = "struct"; // "exact" | "struct"
let minLines = 3;
let outputJson = false;
for (let i = 0; i < args.length; i++) {
const a = args[i];
if (!a.startsWith("--") && rootDir === ".") {
rootDir = a;
} else if (a === "--mode") {
mode = (args[++i] || "struct").toLowerCase();
if (!["exact", "struct"].includes(mode)) {
console.error("Invalid --mode. Use 'exact' or 'struct'.");
process.exit(1);
}
} else if (a === "--min-lines") {
minLines = parseInt(args[++i] || "3", 10);
} else if (a === "--json") {
outputJson = true;
}
}
/** -------- FILE DISCOVERY -------- */
const DEFAULT_IGNORES = new Set([
"node_modules",
".git",
".next",
".vercel",
"dist",
"build",
".astro", // Astro's generated cache dir
]);
const VALID_EXTS = new Set([".ts", ".tsx", ".astro", ".mts", ".cts"]);
function walk(dir) {
/** @type {string[]} */
const out = [];
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const e of entries) {
const p = path.join(dir, e.name);
if (e.isDirectory()) {
if (DEFAULT_IGNORES.has(e.name)) continue;
out.push(...walk(p));
} else if (e.isFile() && VALID_EXTS.has(path.extname(e.name))) {
out.push(p);
}
}
return out;
}
/** -------- ASTRO CODE EXTRACTION --------
* Extract TS/JS code from:
* - frontmatter: --- ... ---
* - <script ...> ... </script>
*/
function extractCodeFromAstro(source) {
/** @type {{code:string, offset:number}[]} */
const blocks = [];
// Frontmatter (must be at top in Astro)
// Match the FIRST pair of --- ... ---
const fm = source.startsWith("---")
? (() => {
const end = source.indexOf("\n---", 3);
if (end !== -1) {
const front = source.slice(3, end + 1); // include trailing \n
return { start: 0, end: end + 4, code: front };
}
return null;
})()
: null;
if (fm) {
// offset for line numbers is after the first '---\n'
blocks.push({ code: fm.code, offset: 4 }); // rough; well fix line numbers via positions later
}
// <script ...> ... </script>
const scriptRe = /<script\b[^>]*>([\s\S]*?)<\/script>/gi;
let m;
while ((m = scriptRe.exec(source))) {
const code = m[1] || "";
blocks.push({ code, offset: indexToLine(source, m.index) });
}
return blocks;
}
/** -------- UTIL: index -> 1-based line -------- */
function indexToLine(text, idx) {
let line = 1;
for (let i = 0; i < idx && i < text.length; i++) {
if (text.charCodeAt(i) === 10) line++;
}
return line;
}
/** -------- AST HELPERS -------- */
function createSourceFile(virtualPath, code) {
return ts.createSourceFile(
virtualPath,
code,
ts.ScriptTarget.Latest,
/*setParentNodes*/ true,
virtualPath.endsWith(".tsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS
);
}
// Normalize AST to a structural signature string
function structuralSignature(node) {
/** @type {string[]} */
const parts = [];
const visit = (n) => {
// Skip trivia: comments/whitespace are already not in AST
const kindName = ts.SyntaxKind[n.kind] || `K${n.kind}`;
switch (n.kind) {
case ts.SyntaxKind.Identifier:
parts.push("Id");
return;
case ts.SyntaxKind.PrivateIdentifier:
parts.push("PrivId");
return;
case ts.SyntaxKind.StringLiteral:
case ts.SyntaxKind.NoSubstitutionTemplateLiteral:
case ts.SyntaxKind.TemplateHead:
case ts.SyntaxKind.TemplateMiddle:
case ts.SyntaxKind.TemplateTail:
parts.push("Str");
return;
case ts.SyntaxKind.NumericLiteral:
parts.push("Num");
return;
case ts.SyntaxKind.TrueKeyword:
case ts.SyntaxKind.FalseKeyword:
parts.push("Bool");
return;
case ts.SyntaxKind.NullKeyword:
case ts.SyntaxKind.UndefinedKeyword:
parts.push("Nil");
return;
case ts.SyntaxKind.PropertyAssignment:
case ts.SyntaxKind.ShorthandPropertyAssignment:
case ts.SyntaxKind.MethodDeclaration:
case ts.SyntaxKind.MethodSignature:
parts.push("Prop");
break;
default:
parts.push(kindName);
}
n.forEachChild(visit);
};
visit(node);
return parts.join("|");
}
function getFunctionInfo(sf, filePath) {
/** @type {Array<{
name: string,
bodyText: string,
structKey: string,
start: number,
end: number,
startLine: number,
endLine: number
}>} */
const out = [];
const addFunc = (nameNode, bodyNode) => {
if (!bodyNode) return;
const bodyText = bodyNode.getText(sf).trim();
const start = bodyNode.getStart(sf);
const end = bodyNode.getEnd();
const { line: startLine } = sf.getLineAndCharacterOfPosition(start);
const { line: endLine } = sf.getLineAndCharacterOfPosition(end);
const name =
nameNode && ts.isIdentifier(nameNode) ? nameNode.escapedText.toString() : "(anonymous)";
// min-lines filter
const lines = bodyText.split(/\r?\n/).filter(Boolean);
if (lines.length < minLines) return;
// structural signature from the body
const structKey = structuralSignature(bodyNode);
out.push({
name,
bodyText,
structKey,
start,
end,
startLine: startLine + 1,
endLine: endLine + 1,
});
};
const visit = (node) => {
if (ts.isFunctionDeclaration(node) && node.body) {
addFunc(node.name ?? null, node.body);
} else if (
ts.isFunctionExpression(node) ||
ts.isArrowFunction(node)
) {
// find name if its assigned: const foo = () => {}
let name = null;
if (node.parent && ts.isVariableDeclaration(node.parent) && node.parent.name) {
name = node.parent.name;
} else if (
node.parent &&
ts.isPropertyAssignment(node.parent) &&
ts.isIdentifier(node.parent.name)
) {
name = node.parent.name;
} else if (node.name) {
name = node.name;
}
if (node.body) addFunc(name, node.body);
} else if (ts.isMethodDeclaration(node) && node.body) {
addFunc(node.name, node.body);
}
node.forEachChild(visit);
};
visit(sf);
return out;
}
/** -------- MAIN SCAN -------- */
const files = walk(path.resolve(process.cwd(), rootDir));
/** Maps from hash -> occurrences */
const groups = new Map();
/** Helper for exact hash */
import crypto from "crypto";
const exactHash = (text) => crypto.createHash("sha1").update(text.replace(/\s+/g, " ").trim()).digest("hex");
for (const file of files) {
try {
const ext = path.extname(file).toLowerCase();
const raw = fs.readFileSync(file, "utf8");
/** @type {Array<{virtualPath:string, code:string, lineOffset:number}>} */
const codeUnits = [];
if (ext === ".astro") {
const blocks = extractCodeFromAstro(raw);
blocks.forEach((b, i) => {
codeUnits.push({
virtualPath: file + `#astro${i + 1}.ts`,
code: b.code,
lineOffset: b.offset || 1,
});
});
} else {
codeUnits.push({ virtualPath: file, code: raw, lineOffset: 1 });
}
for (const { virtualPath, code, lineOffset } of codeUnits) {
const sf = createSourceFile(virtualPath, code);
const funcs = getFunctionInfo(sf, file);
for (const f of funcs) {
const key =
mode === "exact" ? exactHash(f.bodyText) : crypto.createHash("sha1").update(f.structKey).digest("hex");
const item = {
file,
where:
ext === ".astro"
? `${path.relative(process.cwd(), file)}:${f.startLine + lineOffset - 1}-${f.endLine + lineOffset - 1}`
: `${path.relative(process.cwd(), file)}:${f.startLine}-${f.endLine}`,
name: f.name,
lines: f.endLine - f.startLine + 1,
preview: f.bodyText.split(/\r?\n/).slice(0, 5).join("\n") + (f.endLine - f.startLine + 1 > 5 ? "\n..." : ""),
};
if (!groups.has(key)) groups.set(key, []);
groups.get(key).push(item);
}
}
} catch (e) {
console.warn(`⚠️ Skipping ${file}: ${e.message}`);
}
}
/** -------- REPORT -------- */
const dupes = [...groups.entries()]
.map(([key, arr]) => ({ key, items: arr }))
.filter((g) => g.items.length > 1)
.sort((a, b) => b.items.length - a.items.length);
if (outputJson) {
console.log(JSON.stringify({ mode, minLines, groups: dupes }, null, 2));
process.exit(0);
}
if (dupes.length === 0) {
console.log(`✅ No duplicate functions found (mode=${mode}, min-lines=${minLines}).`);
process.exit(0);
}
console.log(`\nFound ${dupes.length} duplicate group(s) (mode=${mode}, min-lines=${minLines}):\n`);
dupes.forEach((g, i) => {
console.log(`== Group ${i + 1} (${g.items.length} matches) ==`);
const example = g.items[0];
console.log(` Sample (${example.lines} lines) from ${example.where}${example.name ? ` [${example.name}]` : ""}`);
console.log(" ---");
console.log(indent(example.preview, " "));
console.log(" ---");
g.items.forEach((it) => {
console.log(`${it.where}${it.name ? ` [${it.name}]` : ""} (${it.lines} lines)`);
});
console.log();
});
function indent(s, pre) {
return s
.split("\n")
.map((l) => pre + l)
.join("\n");
}

View File

@@ -10,15 +10,14 @@
"astro": "astro"
},
"dependencies": {
"@astrojs/node": "^9.3.0",
"@aws-sdk/client-s3": "^3.864.0",
"@aws-sdk/s3-request-presigner": "^3.864.0",
"astro": "^5.12.3",
"@astrojs/node": "^9.4.3",
"astro": "^5.13.7",
"cookie": "^1.0.2",
"dotenv": "^16.4.5",
"jose": "^5.2.0",
"dotenv": "^16.6.1",
"jose": "^5.10.0",
"js-yaml": "^4.1.0",
"jsonwebtoken": "^9.0.2",
"simple-boost": "^2.0.2",
"zod": "^3.25.76"
},
"devDependencies": {

View File

@@ -1,5 +1,405 @@
# Manuell hinzufügen
# Video-Bereitstellung für ForensicPathways Knowledgebase
Hier müssen Videos, die eingebettet werden sollen, manuell abgespeichert werden.
Da diese anders lizensiert sein können, sind sie nicht Bestandteil des Open-Source-Repositorys.
Bei Bedarf bitte Kontakt aufnehmen mit mstoeck3@hs-mittweida.de.
Videos müssen manuell in diesem Verzeichnis bereitgestellt werden, da sie aufgrund unterschiedlicher Lizenzierung nicht Bestandteil des Open-Source-Git-Repositorys sind.
## 🎥 Video-Quelle und Lizenzierung
**Video-Quelle:** https://cloud.cc24.dev/f/47971 (Interner Nextcloud-Share)
**Kontakt bei Fragen:** mstoeck3@hs-mittweida.de
### Lizenzhinweise
- Videos können proprietäre Lizenzen haben
- Nicht für öffentliche Redistribution geeignet
- Nur für den internen Gebrauch in ForensicPathways
- Urheberrechte beachten bei eigenen Video-Beiträgen
## 📁 Empfohlene Verzeichnisstruktur
```
public/videos/
├── tools/ # Tool-spezifische Tutorials
│ ├── autopsy/
│ │ ├── autopsy-installation.mp4
│ │ ├── autopsy-basics.mp4
│ │ └── autopsy-advanced-analysis.webm
│ ├── volatility/
│ │ ├── volatility-setup.mp4
│ │ ├── volatility-pslist-demo.mp4
│ │ └── volatility-malfind-tutorial.webm
│ └── yara/
│ ├── yara-rules-basics.mp4
│ └── yara-advanced-hunting.mp4
├── methods/ # Methodologie-Videos
│ ├── timeline-analysis/
│ │ ├── timeline-fundamentals.mp4
│ │ └── timeline-correlation.webm
│ ├── disk-imaging/
│ │ ├── imaging-best-practices.mp4
│ │ └── imaging-verification.mp4
│ └── incident-response/
│ ├── ir-methodology.mp4
│ └── ir-documentation.webm
├── concepts/ # Konzeptuelle Erklärungen
│ ├── forensics-fundamentals/
│ │ ├── hash-functions-explained.mp4
│ │ ├── chain-of-custody.mp4
│ │ └── evidence-handling.webm
│ └── technical-concepts/
│ ├── regex-patterns.mp4
│ └── file-systems.webm
└── shared/ # Übergreifende Inhalte
├── nist-methodology.mp4
├── legal-considerations.webm
└── best-practices-overview.mp4
```
## 🦊 Firefox-Kompatibilität (KRITISCH)
### **Wichtiger Hinweis**
Videos **müssen** in Firefox-kompatiblen Formaten bereitgestellt werden, da das System automatische Firefox-Unterstützung implementiert. Nicht-kompatible Formate führen zu Fehlern!
### Unterstützte Formate
#### ✅ Empfohlene Formate (höchste Kompatibilität)
**MP4 (H.264/AVC + AAC):**
```bash
# Konvertierung mit ffmpeg
ffmpeg -i input.mov \
-c:v libx264 \
-c:a aac \
-profile:v baseline \
-level 3.0 \
-movflags +faststart \
output.mp4
```
**WebM (VP8/VP9 + Vorbis/Opus):**
```bash
# VP9 für beste Qualität
ffmpeg -i input.mov \
-c:v libvpx-vp9 \
-c:a libopus \
-b:v 1M \
-b:a 128k \
output.webm
# VP8 für breitere Kompatibilität
ffmpeg -i input.mov \
-c:v libvpx \
-c:a libvorbis \
-b:v 1M \
-b:a 128k \
output.webm
```
#### ⚠️ Fallback-Format
**OGG Theora (für ältere Firefox-Versionen):**
```bash
ffmpeg -i input.mov \
-c:v libtheora \
-c:a libvorbis \
-b:v 1M \
-b:a 128k \
output.ogv
```
### ❌ Nicht unterstützte Formate in Firefox
- **H.265/HEVC** (.mp4, .mov) - Wird nicht dekodiert
- **AV1** (.mp4, .webm) - Eingeschränkte Unterstützung
- **Proprietäre Codecs** (.wmv, .avi mit proprietären Codecs)
- **Apple-spezifische Formate** (.mov mit ProRes, .m4v)
### Multi-Format-Bereitstellung
Für maximale Kompatibilität mehrere Formate bereitstellen:
```html
<video title="Autopsy Installation Tutorial" controls>
<source src="/videos/tools/autopsy/installation.mp4" type="video/mp4">
<source src="/videos/tools/autopsy/installation.webm" type="video/webm">
<source src="/videos/tools/autopsy/installation.ogv" type="video/ogg">
<p>Ihr Browser unterstützt das Video-Element nicht.</p>
</video>
```
## 🔧 Video-Konvertierung und -Optimierung
### Qualitätsrichtlinien
#### Auflösung und Bitrate
**720p (empfohlen für Tutorials):**
```bash
ffmpeg -i input.mov \
-vf scale=1280:720 \
-c:v libx264 \
-b:v 2M \
-c:a aac \
-b:a 128k \
output.mp4
```
**1080p (für detaillierte Demonstrationen):**
```bash
ffmpeg -i input.mov \
-vf scale=1920:1080 \
-c:v libx264 \
-b:v 4M \
-c:a aac \
-b:a 128k \
output.mp4
```
**480p (mobile-optimiert):**
```bash
ffmpeg -i input.mov \
-vf scale=854:480 \
-c:v libx264 \
-b:v 1M \
-c:a aac \
-b:a 96k \
output.mp4
```
### Optimierung für Web-Streaming
#### Fast Start für progressive Download
```bash
# Metadata an Dateianfang verschieben
ffmpeg -i input.mp4 -c copy -movflags +faststart output.mp4
```
#### Keyframe-Intervall optimieren
```bash
# Keyframes alle 2 Sekunden für bessere Suche
ffmpeg -i input.mov \
-c:v libx264 \
-g 60 \
-keyint_min 60 \
-sc_threshold 0 \
output.mp4
```
### Batch-Konvertierung
**Alle Videos in einem Verzeichnis konvertieren:**
```bash
#!/bin/bash
# convert-all.sh
for file in *.mov *.avi *.mkv; do
if [ -f "$file" ]; then
name=$(basename "$file" | cut -d. -f1)
# MP4 erstellen
ffmpeg -i "$file" \
-c:v libx264 \
-c:a aac \
-b:v 2M \
-b:a 128k \
-movflags +faststart \
"${name}.mp4"
# WebM erstellen
ffmpeg -i "$file" \
-c:v libvpx-vp9 \
-c:a libopus \
-b:v 1.5M \
-b:a 128k \
"${name}.webm"
fi
done
```
## 📊 Dateigröße und Performance
### Größenrichtlinien
**Streaming-optimiert:**
- 720p: 5-15 MB/Minute
- 1080p: 20-40 MB/Minute
- 480p: 2-8 MB/Minute
**Maximale Dateigröße:**
- Tutorial-Videos: < 100 MB
- Kurze Demos: < 50 MB
- Konzept-Erklärungen: < 30 MB
### Kompressionseinstellungen
**Ausgewogene Qualität/Größe:**
```bash
ffmpeg -i input.mov \
-c:v libx264 \
-preset medium \
-crf 23 \
-c:a aac \
-b:a 128k \
output.mp4
```
**Hohe Kompression (kleinere Dateien):**
```bash
ffmpeg -i input.mov \
-c:v libx264 \
-preset slow \
-crf 28 \
-c:a aac \
-b:a 96k \
output.mp4
```
## 🎬 Video-Thumbnail-Generierung
Automatische Thumbnail-Erstellung:
```bash
# Thumbnail nach 10 Sekunden
ffmpeg -i input.mp4 -ss 00:00:10 -vframes 1 -q:v 2 thumbnail.jpg
# Mehrere Thumbnails für Auswahl
ffmpeg -i input.mp4 -vf fps=1/30 thumb_%03d.jpg
```
Thumbnails speichern in:
```
public/images/video-thumbnails/
├── autopsy-installation-thumb.jpg
├── volatility-basics-thumb.jpg
└── timeline-analysis-thumb.jpg
```
## 🔍 Qualitätskontrolle
### Pre-Upload-Checkliste
**✅ Format-Kompatibilität:**
- [ ] MP4 mit H.264/AVC Video-Codec
- [ ] AAC Audio-Codec
- [ ] Fast Start aktiviert (`movflags +faststart`)
- [ ] Keyframe-Intervall ≤ 2 Sekunden
**✅ Firefox-Test:**
- [ ] Video lädt in Firefox ohne Fehler
- [ ] Audio synchron mit Video
- [ ] Controls funktionieren
- [ ] Seeking funktioniert flüssig
**✅ Technische Qualität:**
- [ ] Auflösung angemessen (720p+ für GUI-Demos)
- [ ] Audio klar und verständlich
- [ ] Keine Kompressionsartefakte
- [ ] Dateigröße < 100 MB
**✅ Inhaltliche Qualität:**
- [ ] Beschreibender Dateiname
- [ ] Angemessene Länge (< 10 Minuten für Tutorials)
- [ ] Klare Demonstration der Funktionalität
- [ ] Sichtbare UI-Elemente
### Automated Testing
```bash
#!/bin/bash
# video-check.sh - Basis-Validierung
for video in public/videos/**/*.mp4; do
echo "Checking: $video"
# Format prüfen
format=$(ffprobe -v quiet -select_streams v:0 -show_entries stream=codec_name -of csv=p=0 "$video")
if [ "$format" != "h264" ]; then
echo "❌ Wrong codec: $format (should be h264)"
fi
# Dateigröße prüfen
size=$(stat -c%s "$video")
if [ $size -gt 104857600 ]; then # 100MB
echo "⚠️ Large file: $(($size / 1048576))MB"
fi
echo "$video validated"
done
```
## 🚨 Troubleshooting
### Häufige Firefox-Probleme
**Problem: Video lädt nicht**
```
Lösung:
1. Codec überprüfen: ffprobe -v quiet -show_format -show_streams video.mp4
2. Fallback-Format hinzufügen
3. Fast Start aktivieren
```
**Problem: Audio/Video out of sync**
```
Lösung:
ffmpeg -i input.mp4 -c:v copy -c:a aac -avoid_negative_ts make_zero output.mp4
```
**Problem: Seeking funktioniert nicht**
```
Lösung:
ffmpeg -i input.mp4 -c copy -movflags +faststart output.mp4
```
### Performance-Probleme
**Problem: Lange Ladezeiten**
```
Lösungsansätze:
1. Bitrate reduzieren
2. Auflösung verringern
3. Keyframe-Intervall optimieren
4. Progressive Download aktivieren
```
**Problem: Hohe Bandbreiten-Nutzung**
```
Lösungsansätze:
1. Adaptive Streaming implementieren
2. Multiple Qualitätsstufen bereitstellen
3. Preload="metadata" verwenden
```
## 📋 Deployment-Checkliste
**Nach Video-Upload:**
1. **✅ Dateistruktur prüfen**
```bash
ls -la public/videos/tools/autopsy/
```
2. **✅ Permissions setzen**
```bash
chmod 644 public/videos/**/*.mp4
```
3. **✅ Artikel-Verlinkung testen**
- Video-Tags in Markdown funktionieren
- Responsive Container werden generiert
- Thumbnails laden korrekt
4. **✅ Browser-Kompatibilität**
- Firefox: Codec-Support prüfen
- Chrome: Performance testen
- Safari: Fallback-Formate testen
- Mobile: Touch-Controls funktionieren
5. **✅ Build-System**
```bash
npm run build
# Keine Video-bezogenen Fehler in Console
```
Bei Problemen kontaktieren Sie mstoeck3@hs-mittweida.de mit:
- Browser und Version
- Video-Dateiname und -pfad
- Fehlermeldungen aus Browser-Console
- Screenshot des Problems

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
---
// src/components/ContributionButton.astro - CLEANED: Removed duplicate auth script
// src/components/ContributionButton.astro
export interface Props {
type: 'edit' | 'new' | 'write';
toolName?: string;

View File

@@ -1,5 +1,5 @@
---
import { createToolSlug } from '../utils/toolHelpers.js';
import { createToolSlug } from '../utils/clientUtils.js';
export interface Props {
toolName: string;

View File

@@ -4,7 +4,6 @@ import { getToolsData } from '../utils/dataService.js';
const data = await getToolsData();
const scenarios = data.scenarios || [];
// Configuration
const maxDisplayed = 9;
const displayedScenarios = scenarios.slice(0, maxDisplayed);
---

View File

@@ -1,4 +1,5 @@
---
//src/components/ToolFilters.astro
import { getToolsData } from '../utils/dataService.js';
const data = await getToolsData();
@@ -54,7 +55,7 @@ const sortedTags = Object.entries(tagFrequency)
<!-- Semantic Search Toggle - Inline -->
<div id="semantic-search-container" class="semantic-search-inline hidden">
<label class="semantic-toggle-wrapper" title="Semantische Suche verwendet Embeddings. Dadurch kann mit natürlicher Sprache/Begriffen gesucht werden, die Ergebnisse richten sich nach der euklidischen Distanz.">
<label class="semantic-toggle-wrapper" title="Semantische Suche verwendet Embeddings. Dadurch kann mit natürlicher Sprache/Begriffen gesucht werden, die Ergebnisse richten sich nach der cosinus-Distanz.">
<input type="checkbox" id="semantic-search-enabled" disabled/>
<div class="semantic-checkbox-custom"></div>
<span class="semantic-toggle-label">
@@ -305,8 +306,7 @@ const sortedTags = Object.entries(tagFrequency)
</div>
<script define:vars={{ toolsData: data.tools, tagFrequency, sortedTags }}>
window.toolsData = toolsData;
window.toolsData = toolsData;
document.addEventListener('DOMContentLoaded', () => {
const elements = {
searchInput: document.getElementById('search-input'),
@@ -358,7 +358,7 @@ const sortedTags = Object.entries(tagFrequency)
try {
const res = await fetch('/api/ai/embeddings-status');
const { embeddings } = await res.json();
semanticSearchAvailable = embeddings?.enabled && embeddings?.initialized;
semanticSearchAvailable = embeddings?.initialized;
if (semanticSearchAvailable) {
elements.semanticContainer.classList.remove('hidden');
@@ -392,6 +392,13 @@ const sortedTags = Object.entries(tagFrequency)
return null;
}
}
function isToolHosted(tool) {
return tool.projectUrl !== undefined &&
tool.projectUrl !== null &&
tool.projectUrl !== "" &&
tool.projectUrl.trim() !== "";
}
function toggleCollapsible(toggleBtn, content, storageKey) {
const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true';
@@ -432,13 +439,6 @@ const sortedTags = Object.entries(tagFrequency)
}
}
function isToolHosted(tool) {
return tool.projectUrl !== undefined &&
tool.projectUrl !== null &&
tool.projectUrl !== "" &&
tool.projectUrl.trim() !== "";
}
function initTagCloud() {
const visibleCount = 20;
elements.tagCloudItems.forEach((item, index) => {

View File

@@ -1,6 +1,6 @@
---
//src/components/ToolMatrix.astro
import { getToolsData } from '../utils/dataService.js';
import ShareButton from './ShareButton.astro';
const data = await getToolsData();

View File

@@ -1,46 +0,0 @@
---
// src/components/Video.astro - SIMPLE responsive video component
export interface Props {
src: string;
title?: string;
controls?: boolean;
autoplay?: boolean;
muted?: boolean;
loop?: boolean;
aspectRatio?: '16:9' | '4:3' | '1:1';
preload?: 'none' | 'metadata' | 'auto';
}
const {
src,
title = 'Video',
controls = true,
autoplay = false,
muted = false,
loop = false,
aspectRatio = '16:9',
preload = 'metadata'
} = Astro.props;
const aspectClass = `aspect-${aspectRatio.replace(':', '-')}`;
---
<div class={`video-container ${aspectClass}`}>
<video
src={src}
controls={controls}
autoplay={autoplay}
muted={muted}
loop={loop}
preload={preload}
style="width: 100%; height: 100%;"
data-video-title={title}
>
<p>Your browser does not support the video element.</p>
</video>
{title !== 'Video' && (
<div class="video-metadata">
<div class="video-title">{title}</div>
</div>
)}
</div>

View File

@@ -1,203 +1,263 @@
// src/config/prompts.ts - Enhanced with phase completion reasoning
// src/config/prompts.ts
const RELEVANCE_RUBRIC = `
TASK RELEVANCE (INTEGER 0100, NO %):
- 5565 = Basis/ok
- 6675 = Gut geeignet
- 7685 = Sehr gut geeignet
- >85 = Nur bei nahezu perfekter Übereinstimmung
`.trim();
const STRICTNESS = `
STRICTNESS:
- Output MUST be pure JSON (no prose, no code fences, no trailing commas).
- Use EXACT item names as provided (casing/spelling must match).
- Do NOT invent items or fields. If unsure, select fewer.
`.trim();
export const AI_PROMPTS = {
toolSelection: (mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number) => {
const modeInstruction = mode === 'workflow'
? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
: 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
enhancementQuestions: (input: string) => {
return `Sie sind DFIR-Experte. Ein Nutzer beschreibt unten ein Szenario/Problem.
return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.
ZIEL:
- Stellen Sie NUR dann 13 präzise Rückfragen, wenn entscheidende forensische Lücken die weitere Analyse/Toolauswahl PHASENREIHENFOLGE oder EVIDENCE-STRATEGIE wesentlich beeinflussen würden.
- Wenn ausreichend abgedeckt: Geben Sie eine leere Liste [] zurück.
AUSWAHLMETHODE: ${selectionMethod}
${selectionMethod === 'embeddings_candidates' ?
'✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe' :
'✓ Vollständige Datenbank verfügbar\n✓ Wähle die relevantesten Items'}
PRIORITÄT DER THEMEN (in dieser Reihenfolge prüfen):
1) Available Evidence & Artefakte (z.B. RAM-Dump, Disk-Image, Logs, PCAP, Registry, Cloud/Audit-Logs)
2) Scope/Systems (konkrete Plattformen/Assets/Identitäten/Netzsegmente)
3) Investigation Objectives (Ziele: IOC-Extraktion, Timeline, Impact, Attribution)
4) Timeline/Timeframe (kritische Zeitfenster, Erhalt flüchtiger Daten)
5) Legal & Compliance (Chain of Custody, Aufbewahrung, DSGVO/Branchenvorgaben)
6) Technical Constraints (Ressourcen, Zugriffsrechte, Tooling/EDR)
FRAGEN-QUALITÄT:
- Forensisch spezifisch und entscheidungsrelevant (keine Allgemeinplätze).
- Eine Frage pro Thema, keine Dopplungen.
- Antwortbar vom Nutzer (keine Spekulation, keine “Beweise senden”-Aufforderungen).
- Maximal 18 Wörter, endet mit "?".
VALIDIERUNG:
- Stellen Sie NUR Fragen zu Themen, die im Nutzertext NICHT hinreichend konkret beantwortet sind (keine Wiederholung bereits gegebener Details).
- Wenn alle priorisierten Themen ausreichend sind → [].
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
[
"präzise Frage 1?",
"präzise Frage 2?",
"präzise Frage 3?"
]
NUTZER-EINGABE:
${input}`.trim();
},
toolSelection: (mode: string, userQuery: string, maxSelectedItems: number) => {
const modeInstruction =
mode === 'workflow'
? 'Workflow mit 1525 Items über alle Phasen. Pflicht: ~40% Methoden, Rest Software/Konzepte (falls verfügbar).'
: 'Spezifische Lösung mit 410 Items. Pflicht: ≥30% Methoden (falls verfügbar).';
return `Du bist DFIR-Experte. Wähle die BESTEN Items aus dem bereits semantisch vorgefilterten Set für die konkrete Aufgabe.
${modeInstruction}
ANFRAGE: "${userQuery}"
VERFÜGBARE ITEM-TYPEN:
- TOOLS (type: "software"/"method") → praktische Anwendungen und Vorgehensweisen
- KONZEPTE (type: "concept") → theoretisches Wissen und Methodiken
ITEM-TYPEN:
- TOOLS (type: "software" | "method")
- KONZEPTE (type: "concept")
AUSWAHLSTRATEGIE:
1. **ERSTE PRIORITÄT: Relevanz zur Anfrage**
- Direkt anwendbar auf das Problem
- Löst die Kernherausforderung
2. **ZWEITE PRIORITÄT: Ausgewogene Mischung**
- Tools/Methoden für praktische Umsetzung → selectedTools
- Konzepte für methodisches Verständnis → selectedConcepts
- WICHTIG: Auch Konzepte auswählen, nicht nur Tools!
3. **QUALITÄT > QUANTITÄT**
- Lieber weniger perfekte Items als viele mittelmäßige
- Jedes Item muss begründbar sein
AUSWAHLPRINZIPIEN:
1) Relevanz zur Anfrage (direkt anwendbar, adressiert Kernproblem)
2) Ausgewogene Mischung (Praxis: selectedTools; Methodik: selectedConcepts)
3) Qualität > Quantität (lieber weniger, dafür passgenau)
4) Keine Erfindungen. Wenn etwas nicht passt, wähle weniger.
AUSWAHLREGELN:
- Wähle ${mode === 'workflow' ? '15-25' : '4-10'} Items total, max ${maxSelectedItems}
- BEIDE Arrays füllen: selectedTools UND selectedConcepts
- Mindestens 1-2 Konzepte auswählen für methodische Fundierung
- Tools: 40% Methoden (type="method"), Rest Software (type="software")
- Wähle ${mode === 'workflow' ? '1525' : '410'} Items total (max ${maxSelectedItems})
- Fülle BEIDE Arrays: selectedTools UND selectedConcepts
- Mindestens 12 Konzepte (falls verfügbar)
- Bevorzugt ~40% Methoden (Workflow) bzw. ≥30% Methoden (Tool-Modus), sofern vorhanden
- Sortiere selectedTools grob nach Eignung (bestes zuerst)
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
Skalenhinweis (für spätere Schritte einheitlich):
${RELEVANCE_RUBRIC}
${STRICTNESS}
ANTWORT (NUR JSON):
{
"selectedTools": ["ToolName1", "MethodName1", ...],
"selectedConcepts": ["ConceptName1", "ConceptName2", ...],
"reasoning": "Kurze Begründung mit Erwähnung der Tool/Konzept-Balance"
"selectedTools": ["ToolName1", "MethodName1", "..."],
"selectedConcepts": ["ConceptName1", "ConceptName2", "..."],
"reasoning": "Sehr kurz: Balance/Abdeckung begründen"
}`;
},
toolSelectionWithData: (basePrompt: string, toolsToSend: any[], conceptsToSend: any[]) => {
return `${basePrompt}
VERFÜGBARE TOOLS (${toolsToSend.length} Items - Methoden und Software):
VERFÜGBARE TOOLS (${toolsToSend.length}):
${JSON.stringify(toolsToSend, null, 2)}
VERFÜGBARE KONZEPTE (${conceptsToSend.length} Items - theoretisches Wissen):
VERFÜGBARE KONZEPTE (${conceptsToSend.length}):
${JSON.stringify(conceptsToSend, null, 2)}
WICHTIGER HINWEIS: Wähle sowohl aus TOOLS als auch aus KONZEPTEN aus! Konzepte sind essentiell für methodische Fundierung.`;
WICHTIG:
- Wähle nur aus obigen Listen. Keine neuen Namen.
- Nutze exakte Namen. Keine Synonyme/Varianten.
Hinweis zur einheitlichen Relevanz-Skala:
${RELEVANCE_RUBRIC}
${STRICTNESS}`;
},
scenarioAnalysis: (isWorkflow: boolean, userQuery: string) => {
const analysisType = isWorkflow ? 'Szenario' : 'Problem';
const focus = isWorkflow ?
'Angriffsvektoren, betroffene Systeme, Zeitkritikalität' :
'Kernherausforderung, verfügbare Daten, methodische Anforderungen';
const focus = isWorkflow
? 'Angriffsvektoren, betroffene Systeme, Zeitkritikalität'
: 'Kernherausforderung, verfügbare Daten, methodische Anforderungen';
return `DFIR-Experte: Analysiere das ${analysisType}.
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
Fokus: ${focus}
Antwort: Fließtext ohne Listen, max 100 Wörter.`;
Antwort: Fließtext, max 100 Wörter. Keine Liste, keine Einleitung.`;
},
investigationApproach: (isWorkflow: boolean, userQuery: string) => {
const approachType = isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz';
const focus = isWorkflow ?
'Triage-Prioritäten, Phasenabfolge, Kontaminationsvermeidung' :
'Methodenauswahl, Validierung, Integration';
const focus = isWorkflow
? 'Triage-Prioritäten, Phasenabfolge, Kontaminationsvermeidung'
: 'Methodenauswahl, Validierung, Integration';
return `Entwickle einen ${approachType}.
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
Fokus: ${focus}
Antwort: Fließtext ohne Listen, max 100 Wörter.`;
Antwort: Fließtext, max 100 Wörter.`;
},
criticalConsiderations: (isWorkflow: boolean, userQuery: string) => {
const focus = isWorkflow ?
'Beweissicherung vs. Gründlichkeit, Chain of Custody' :
'Tool-Validierung, False Positives/Negatives, Qualifikationen';
const focus = isWorkflow
? 'Beweissicherung vs. Gründlichkeit, Chain of Custody'
: 'Tool-Validierung, False Positives/Negatives, Qualifikationen';
return `Identifiziere kritische Überlegungen.
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
Fokus: ${focus}
Antwort: Fließtext ohne Listen, max 100 Wörter.`;
Antwort: Fließtext, max 100 Wörter.`;
},
phaseToolSelection: (userQuery: string, phase: any, phaseTools: any[]) => {
const methods = phaseTools.filter(t => t.type === 'method');
const tools = phaseTools.filter(t => t.type === 'software');
if (phaseTools.length === 0) {
return `Keine Methoden/Tools für Phase "${phase.name}" verfügbar. Antworte mit leerem Array: []`;
}
return `Du bist ein DFIR-Experte. Wähle die 2-3 BESTEN Items für Phase "${phase.name}".
return `Wähle die 23 BESTEN Items für Phase "${phase.name}".
SZENARIO: "${userQuery}"
PHASE: ${phase.name} - ${phase.description || ''}
PHASE: ${phase.name} ${phase.description || ''}
VERFÜGBARE ITEMS (bereits von KI vorausgewählt):
VERFÜGBARE ITEMS:
${methods.length > 0 ? `
METHODEN (${methods.length}):
${methods.map((method: any) =>
`- ${method.name}
Typ: ${method.type}
Beschreibung: ${method.description}
Domains: ${method.domains?.join(', ') || 'N/A'}
Skill Level: ${method.skillLevel}`
${methods.map((m: any) =>
`- ${m.name}
Typ: ${m.type}
Beschreibung: ${m.description}
Domains: ${m.domains?.join(', ') || 'N/A'}
Skill Level: ${m.skillLevel}`
).join('\n\n')}
` : 'Keine Methoden verfügbar'}
${tools.length > 0 ? `
SOFTWARE TOOLS (${tools.length}):
${tools.map((tool: any) =>
`- ${tool.name}
Typ: ${tool.type}
Beschreibung: ${tool.description}
Plattformen: ${tool.platforms?.join(', ') || 'N/A'}
Skill Level: ${tool.skillLevel}`
SOFTWARE (${tools.length}):
${tools.map((t: any) =>
`- ${t.name}
Typ: ${t.type}
Beschreibung: ${t.description}
Plattformen: ${t.platforms?.join(', ') || 'N/A'}
Skill Level: ${t.skillLevel}`
).join('\n\n')}
` : 'Keine Software-Tools verfügbar'}
AUSWAHLREGELN FÜR PHASE "${phase.name}":
1. Wähle die 2-3 BESTEN Items für diese spezifische Phase
2. Priorisiere Items, die DIREKT für "${phase.name}" relevant sind
3. Mindestens 1 Methode wenn verfügbar, Rest Software-Tools
4. Begründe WARUM jedes Item für diese Phase optimal ist
REGELN:
1) 23 Items, direkt phasenrelevant; mind. 1 Methode, falls verfügbar
2) Begründung pro Item (präzise, anwendungsbezogen)
3) Verwende EXAKTE Namen aus den Listen. Keine Erfindungen.
WICHTIG: Verwende EXAKT die Namen wie oben aufgelistet (ohne Präfixe wie M1./T2.)!
${RELEVANCE_RUBRIC}
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB:
${STRICTNESS}
ANTWORT (NUR JSON):
[
{
"toolName": "Exakter Name aus der Liste oben",
"taskRelevance": 85,
"justification": "Detaillierte Begründung (60-80 Wörter) warum optimal für ${phase.name} - erkläre Anwendung, Vorteile und spezifische Relevanz",
"limitations": ["Mögliche Einschränkung für diese Phase"]
"toolName": "Exakter Name",
"taskRelevance": 0,
"justification": "6080 Wörter zur phasenspezifischen Eignung",
"limitations": ["Optionale spezifische Einschränkung"]
}
]`;
},
toolEvaluation: (userQuery: string, tool: any, rank: number, taskRelevance: number) => {
toolEvaluation: (userQuery: string, tool: any, rank: number) => {
const itemType = tool.type === 'method' ? 'Methode' : 'Tool';
return `Erkläre die Anwendung dieser/dieses ${itemType}.
return `Bewerte diese/diesen ${itemType} ausschließlich bzgl. des PROBLEMS.
PROBLEM: "${userQuery}"
${itemType.toUpperCase()}: ${tool.name} (${taskRelevance}% Eignung)
${itemType.toUpperCase()}: ${tool.name}
TYP: ${tool.type}
Bereits als Rang ${rank} bewertet.
ANWEISUNGEN:
- Nur vorhandene Metadaten nutzen (keine Annahmen, keine Websuche).
- "taskRelevance" als GANZZAHL 0100 nach einheitlicher Skala vergeben.
- Realistische Scores i.d.R. 6080, >85 nur bei nahezu perfektem Fit.
- Keine Texte außerhalb des JSON.
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-STRUKTUR:
${RELEVANCE_RUBRIC}
${STRICTNESS}
ANTWORT (NUR JSON):
{
"detailed_explanation": "Warum und wie einsetzen",
"implementation_approach": "Konkrete Schritte",
"pros": ["Vorteil 1", "Vorteil 2"],
"limitations": ["Einschränkung 1"],
"alternatives": "Alternative Ansätze"
"alternatives": "Kurz zu sinnvollen Alternativen",
"taskRelevance": 0
}`;
},
backgroundKnowledgeSelection: (userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]) => {
return `Wähle 2-4 relevante Konzepte.
return `Wähle 24 Konzepte, die das Verständnis/den Einsatz der ausgewählten Tools verbessern.
${mode === 'workflow' ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
AUSGEWÄHLTE TOOLS: ${selectedToolNames.join(', ')}
VERFÜGBARE KONZEPTE (${availableConcepts.length} KI-kuratiert):
${availableConcepts.map((c: any) =>
`- ${c.name}: ${c.description}...`
).join('\n')}
VERFÜGBARE KONZEPTE (${availableConcepts.length}):
${availableConcepts.map((c: any) => `- ${c.name}: ${c.description}...`).join('\n')}
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-STRUKTUR:
REGELN:
- Nur Konzepte aus obiger Liste wählen.
- Relevanz kurz und konkret begründen.
${STRICTNESS}
ANTWORT (NUR JSON):
[
{
"conceptName": "Name",
"relevance": "Warum kritisch für Methodik"
"conceptName": "Exakter Name",
"relevance": "Warum dieses Konzept hier methodisch wichtig ist"
}
]`;
},
@@ -209,27 +269,14 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-S
tool: any,
completionContext: string
) => {
return `Du bist ein DFIR-Experte. Erkläre warum dieses Tool nachträglich zur Vervollständigung hinzugefügt wurde.
KONTEXT DER NACHTRÄGLICHEN ERGÄNZUNG:
- Ursprüngliche KI-Auswahl war zu spezifisch/eng gefasst
- Phase "${phase.name}" war unterrepräsentiert in der initialen Auswahl
- Semantische Suche fand zusätzlich relevante Tools für diese Phase
- Tool wird nachträglich hinzugefügt um Vollständigkeit zu gewährleisten
return `Begründe knapp die Nachergänzung für Phase "${phase.name}".
URSPRÜNGLICHE ANFRAGE: "${originalQuery}"
PHASE ZU VERVOLLSTÄNDIGEN: ${phase.name} - ${phase.description || ''}
PHASE: ${phase.name} ${phase.description || ''}
HINZUGEFÜGTES TOOL: ${selectedToolName} (${tool.type})
TOOL-BESCHREIBUNG: ${tool.description}
KONTEXT: ${completionContext}
BEGRÜNDUNGSKONTEXT: ${completionContext}
Erstelle eine präzise Begründung (max. 40 Wörter), die erklärt:
1. WARUM dieses Tool nachträglich hinzugefügt wurde
2. WIE es die ${phase.name}-Phase ergänzt
3. DASS es die ursprünglich zu spezifische Auswahl erweitert
Antwort: Prägnanter Fließtext, knappe Begründung für Nachergänzung. Vermeide Begriffe wie "Das Tool" und gib keinen einleitenden Text wie "Begründung (40 Wörter):" an.`;
Antwort: Prägnanter Fließtext, max 40 Wörter, keine Einleitung, keine Liste.`;
},
generatePhaseCompletionPrompt(
@@ -238,47 +285,48 @@ Antwort: Prägnanter Fließtext, knappe Begründung für Nachergänzung. Vermeid
candidateTools: any[],
candidateConcepts: any[]
): string {
return `Du bist ein DFIR-Experte. Die initiale KI-Auswahl war zu spezifisch - die Phase "${phase.name}" ist unterrepräsentiert.
return `Unterrepräsentierte Phase: "${phase.name}". Ergänze 12 passende Items aus der semantischen Nachsuche.
KONTEXT: Die Hauptauswahl hat zu wenige Tools für "${phase.name}" identifiziert. Wähle jetzt ergänzende Tools aus semantischer Nachsuche.
ORIGINALANFRAGE: "${originalQuery}"
PHASE: ${phase.name}${phase.description || ''}
ORIGINAL ANFRAGE: "${originalQuery}"
UNTERREPRÄSENTIERTE PHASE: ${phase.name} - ${phase.description || ''}
SEMANTISCH GEFUNDENE KANDIDATEN für Nachergänzung:
VERFÜGBARE TOOLS (${candidateTools.length}):
${candidateTools.map((tool: any) => `
- ${tool.name} (${tool.type})
Beschreibung: ${tool.description}
Skill Level: ${tool.skillLevel}
KANDIDATEN — TOOLS (${candidateTools.length}):
${candidateTools.map((t: any) => `
- ${t.name} (${t.type})
Beschreibung: ${t.description}
Skill Level: ${t.skillLevel}
`).join('')}
${candidateConcepts.length > 0 ? `
VERFÜGBARE KONZEPTE (${candidateConcepts.length}):
${candidateConcepts.map((concept: any) => `
- ${concept.name}
Beschreibung: ${concept.description}
KANDIDATEN — KONZEPTE (${candidateConcepts.length}):
${candidateConcepts.map((c: any) => `
- ${c.name}
Beschreibung: ${c.description}
`).join('')}
` : ''}
AUSWAHLREGELN FÜR NACHERGÄNZUNG:
1. Wähle 1-2 BESTE Methoden/Tools die die ${phase.name}-Phase optimal ergänzen
2. Methoden/Tools müssen für die ursprüngliche Anfrage relevant sein
3. Ergänzen, nicht ersetzen - erweitere die zu spezifische Erstauswahl
REGELN:
- Wähle 12 Tools/Methoden, die ${phase.name} sinnvoll ergänzen (keine Ersetzung).
- Nur aus obigen Kandidaten wählen; exakte Namen verwenden.
- Kurze Begründung, warum diese Ergänzung nötig ist.
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
Skalenhinweis (einheitlich):
${RELEVANCE_RUBRIC}
${STRICTNESS}
ANTWORT (NUR JSON):
{
"selectedTools": ["ToolName1", "ToolName2"],
"selectedConcepts": ["ConceptName1"],
"completionReasoning": "Kurze Erklärung warum diese Nachergänzung r ${phase.name} notwendig war"
"completionReasoning": "Kurze Erklärung zur Ergänzung der ${phase.name}-Phase"
}`;
},
finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => {
const focus = isWorkflow ?
'Workflow-Schritte, Best Practices, Objektivität' :
'Methodische Überlegungen, Validierung, Qualitätssicherung';
const focus = isWorkflow
? 'Knappe Workflow-Schritte & Best Practices; neutral formulieren'
: 'Methodische Überlegungen, Validierung, Qualitätssicherung';
return `Erstelle ${isWorkflow ? 'Workflow-Empfehlung' : 'methodische Überlegungen'}.
@@ -286,33 +334,31 @@ ${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
AUSGEWÄHLT: ${selectedToolNames.join(', ')}${selectedToolNames.length > 5 ? '...' : ''}
Fokus: ${focus}
Antwort: Fließtext ohne Listen, max ${isWorkflow ? '100' : '80'} Wörter.`;
Antwort: Fließtext, max ${isWorkflow ? '100' : '80'} Wörter. Keine Liste.`;
}
} as const;
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number): string;
export function getPrompt(key: 'enhancementQuestions', input: string): string;
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, maxSelectedItems: number): string;
export function getPrompt(key: 'toolSelectionWithData', basePrompt: string, toolsToSend: any[], conceptsToSend: any[]): string;
export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuery: string): string;
export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;
export function getPrompt(key: 'criticalConsiderations', isWorkflow: boolean, userQuery: string): string;
export function getPrompt(key: 'phaseToolSelection', userQuery: string, phase: any, phaseTools: any[]): string;
export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number, taskRelevance: number): string;
export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number): string;
export function getPrompt(key: 'backgroundKnowledgeSelection', userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]): string;
export function getPrompt(key: 'phaseCompletionReasoning', originalQuery: string, phase: any, selectedToolName: string, tool: any, completionContext: string): string;
export function getPrompt(key: 'finalRecommendations', isWorkflow: boolean, userQuery: string, selectedToolNames: string[]): string;
export function getPrompt(key: 'generatePhaseCompletionPrompt', originalQuery: string, phase: any, candidateTools: any[], candidateConcepts: any[]): string;
export function getPrompt(promptKey: keyof typeof AI_PROMPTS, ...args: any[]): string {
try {
const promptFunction = AI_PROMPTS[promptKey];
if (typeof promptFunction === 'function') {
return (promptFunction as (...args: any[]) => string)(...args);
} else {
console.error(`[PROMPTS] Invalid prompt key: ${promptKey}`);
return 'Error: Invalid prompt configuration';
}
} catch (error) {
console.error(`[PROMPTS] Error generating prompt ${promptKey}:`, error);
const f = AI_PROMPTS[promptKey];
if (typeof f === 'function') return (f as (...a: any[]) => string)(...args);
console.error(`[PROMPTS] Invalid prompt key: ${promptKey}`);
return 'Error: Invalid prompt configuration';
} catch (err) {
console.error(`[PROMPTS] Error generating prompt ${promptKey}:`, err);
return 'Error: Failed to generate prompt';
}
}
}

View File

@@ -121,9 +121,9 @@ vol -f memory.dmp windows.info
vol -f memory.dmp windows.pslist
```
## Video-Einbindung
## Video-Demonstration
<video src="/path/to/video.mp4" title="Volatility Demo" controls></video>
<video src="/videos/volatility-basics.mp4" title="Volatility Grundlagen Tutorial" controls preload="metadata"></video>
## Weiterführende Links
@@ -143,20 +143,181 @@ vol -f memory.dmp windows.pslist
### Video-Einbindung
Videos können direkt in Markdown eingebettet werden:
Videos können direkt in Markdown eingebettet werden und werden automatisch mit responsiven Containern erweitert:
#### Basis-Video-Einbindung
```html
<video src="/pfad/zum/video.mp4" title="Beschreibung" controls></video>
<video src="/videos/demo.mp4" title="Tool-Demonstration" controls></video>
```
Unterstützte Attribute:
- `src`: Pfad zur Videodatei
- `title`: Titel für Metadaten
- `controls`: Zeigt Player-Steuerung
- `autoplay`: Automatisches Abspielen
- `muted`: Stummgeschaltet
#### Vollständige Video-Konfiguration
```html
<video
src="/videos/advanced-tutorial.mp4"
title="Erweiterte Analysefunktionen"
controls
preload="metadata"
width="720"
height="405"
muted
poster="/images/video-thumbnail.jpg"
>
<p>Ihr Browser unterstützt das Video-Element nicht.</p>
</video>
```
#### Unterstützte Video-Attribute
**Basis-Attribute:**
- `src`: **Erforderlich** - Pfad zur Videodatei (relativ zu `/public/`)
- `title`: **Empfohlen** - Beschreibung für Metadaten und Accessibility
- `controls`: Zeigt Player-Steuerung (Standard-Empfehlung)
**Erweiterte Attribute:**
- `autoplay`: Automatisches Abspielen (nicht empfohlen für UX)
- `muted`: Stummgeschaltet (erforderlich für Autoplay in den meisten Browsern)
- `loop`: Endlosschleife
- `preload`: "none", "metadata", "auto"
- `preload`: `"none"` | `"metadata"` | `"auto"` (Standard: `"metadata"`)
- `poster`: Vorschaubild-URL
- `width`/`height`: Feste Dimensionen (Optional, responsive Container anpasst sich automatisch)
**Accessibility-Attribute:**
- `aria-label`: Alternative Beschreibung
- `aria-describedby`: ID eines Elements mit detaillierter Beschreibung
#### iframe-Einbindung (YouTube, Vimeo, etc.)
```html
<iframe
src="https://www.youtube.com/embed/VIDEO_ID"
title="YouTube-Tutorial: Forensic Analysis mit Tool XYZ"
width="560"
height="315"
frameborder="0"
allowfullscreen
></iframe>
```
**iframe-Attribute:**
- `src`: **Erforderlich** - Embed-URL des Video-Dienstes
- `title`: **Erforderlich** - Beschreibung für Accessibility
- `width`/`height`: Empfohlene Dimensionen (werden responsive angepasst)
- `frameborder`: Auf `"0"` setzen für modernen Look
- `allowfullscreen`: Vollbild-Modus erlauben
- `loading`: `"lazy"` für Performance-Optimierung
### Automatische Video-Verarbeitung
Das System erweitert Video-Tags automatisch:
**Input:**
```html
<video src="/videos/demo.mp4" title="Demo" controls></video>
```
**Output (automatisch generiert):**
```html
<div class="video-container">
<video
src="/videos/demo.mp4"
title="Demo"
controls
preload="metadata"
data-video-title="Demo"
>
<p>Your browser does not support the video element.</p>
</video>
<div class="video-metadata">
<div class="video-title">Demo</div>
</div>
</div>
```
### Firefox-Kompatibilität
**Wichtiger Hinweis:** Videos müssen in Firefox-kompatiblen Formaten bereitgestellt werden:
#### Empfohlene Formate
**Primäre Formate (höchste Kompatibilität):**
- **MP4 (H.264/AVC)**: `.mp4` - Beste Kompatibilität across Browser
- **WebM (VP8/VP9)**: `.webm` - Moderne Browser, gute Kompression
**Sekundäre Formate:**
- **OGG Theora**: `.ogv` - Fallback für ältere Firefox-Versionen
#### Format-Konvertierung
```bash
# Mit ffmpeg zu Firefox-kompatiblem MP4 konvertieren
ffmpeg -i input.mov -c:v libx264 -c:a aac -movflags +faststart output.mp4
# Mit ffmpeg zu WebM konvertieren
ffmpeg -i input.mov -c:v libvpx-vp9 -c:a libvorbis output.webm
# Multi-Format-Bereitstellung
<video title="Demo" controls>
<source src="/videos/demo.mp4" type="video/mp4">
<source src="/videos/demo.webm" type="video/webm">
<p>Ihr Browser unterstützt das Video-Element nicht.</p>
</video>
```
#### Firefox-spezifische Probleme
Das System erkennt automatisch Firefox und implementiert Error-Recovery:
- **Automatische Fehlererkennung** für nicht unterstützte Formate
- **Fallback-Mechanismen** bei Codec-Problemen
- **Erweiterte Logging** für Debugging
**Bekannte Firefox-Probleme:**
- H.265/HEVC nicht unterstützt
- Proprietäre Codecs teilweise eingeschränkt
- MIME-Type-Sensitivität höher als bei Chrome
### Video-Datei-Management
#### Dateistruktur
```
public/
├── videos/
│ ├── tools/
│ │ ├── autopsy-basics.mp4
│ │ ├── volatility-tutorial.webm
│ │ └── yara-rules-demo.mp4
│ ├── methods/
│ │ ├── timeline-analysis.mp4
│ │ └── disk-imaging.mp4
│ └── concepts/
│ ├── hash-functions.mp4
│ └── chain-custody.mp4
└── images/
└── video-thumbnails/
├── autopsy-thumb.jpg
└── volatility-thumb.jpg
```
#### Dateigröße-Empfehlungen
- **Streaming-Qualität**: 5-15 MB/Minute (720p)
- **High-Quality Tutorials**: 20-40 MB/Minute (1080p)
- **Mobile-Optimiert**: 2-8 MB/Minute (480p)
#### Konventionen
**Dateinamen:**
- Lowercase mit Bindestrichen: `tool-autopsy-installation.mp4`
- Präfix nach Kategorie: `tool-`, `method-`, `concept-`
- Beschreibender Suffix: `-basics`, `-advanced`, `-troubleshooting`
**Video-Titel:**
- Beschreibend und suchfreundlich
- Tool/Methode im Titel erwähnen
- Skill-Level angeben: "Grundlagen", "Erweitert", "Expertenlevel"
### Code-Blöcke
@@ -173,10 +334,10 @@ import volatility.registry as registry
### Tabellen
| Plugin | Beschreibung | Beispiel |
|--------|--------------|----------|
| pslist | Prozesse auflisten | `vol -f dump.raw windows.pslist` |
| malfind | Malware finden | `vol -f dump.raw windows.malfind` |
| Plugin | Beschreibung | Video-Tutorial |
|--------|--------------|----------------|
| pslist | Prozesse auflisten | [Tutorial ansehen](/videos/pslist-demo.mp4) |
| malfind | Malware finden | [Demo](/videos/malfind-basics.mp4) |
## Artikel-Typen
@@ -274,13 +435,20 @@ Das System validiert automatisch:
- Broken Links werden geloggt (development)
- Dateinamen-Präfixe helfen bei der Organisation und Verknüpfung
### Video-Validierung
- Dateipfade auf Existenz geprüft (development)
- Format-Kompatibilität gewarnt
- Firefox-spezifische Warnings bei problematischen Formaten
## Deployment
1. Artikel von Nextcloud-Share herunterladen: https://cloud.cc24.dev/f/47971
2. Artikel in `src/content/knowledgebase/` ablegen (flache Struktur mit Präfixen)
3. Frontmatter nach Schema überprüfen/anpassen
4. Build-Prozess validiert automatisch
5. Artikel erscheint in Knowledgebase-Übersicht
2. Videos manuell in `public/videos/` bereitstellen (siehe `public/videos/README.md`)
3. Artikel in `src/content/knowledgebase/` ablegen (flache Struktur mit Präfixen)
4. Frontmatter nach Schema überprüfen/anpassen
5. Build-Prozess validiert automatisch
6. Artikel erscheint in Knowledgebase-Übersicht
### Troubleshooting
@@ -295,9 +463,16 @@ Das System validiert automatisch:
- Groß-/Kleinschreibung beachten
**Video lädt nicht:**
- Pfad korrekt?
- Datei im `public/` Ordner?
- Pfad korrekt? (beginnt mit `/videos/`)
- Datei im `public/videos/` Ordner?
- Unterstütztes Format? (mp4, webm, ogg)
- Firefox-kompatibel? (H.264/AVC für MP4)
**Firefox-Video-Probleme:**
- H.265/HEVC-Codecs vermeiden
- Multiple `<source>`-Tags für Fallbacks nutzen
- Browser-Console auf Codec-Fehler prüfen
- MIME-Types korrekt gesetzt?
## Beispiel-Ordnerstruktur
@@ -311,4 +486,16 @@ src/content/knowledgebase/
├── concept-hash-functions-digital-signatures.md
├── concept-regex-pattern-matching.md
└── concept-chain-of-custody.md
public/videos/
├── tools/
│ ├── autopsy-timeline-tutorial.mp4
│ ├── volatility-basics-demo.mp4
│ └── yara-rules-advanced.webm
├── methods/
│ ├── timeline-analysis-walkthrough.mp4
│ └── disk-imaging-best-practices.mp4
└── concepts/
├── hash-functions-explained.mp4
└── chain-custody-procedures.mp4
```

View File

@@ -16,7 +16,7 @@ const knowledgebaseCollection = defineCollection({
tags: z.array(z.string()).default([]),
published: z.boolean().default(true),
gated_content: z.boolean().default(false), // NEW: Gated content flag
gated_content: z.boolean().default(false),
})
});

View File

@@ -57,6 +57,44 @@ tools:
accessType: download
license: Apache-2.0
knowledgebase: false
- name: Thorium
icon: ⚛️
type: software
description: >-
CISAs portable Hybrid-Analyse-Tool für die schnelle Untersuchung von Windows-
Systemen auf bösartige Aktivitäten. Scannt mit kuratierten YARA- und
Sigma-Regeln Arbeitsspeicher, Prozesse, Dateisystem, Netzwerkverbindungen und
Systemprotokolle. Ideal für schnelle Triage im Incident Response, sowohl live als auch
auf gemounteten Images. Die Ausgabe erfolgt in strukturierten JSON-Reports.
domains:
- incident-response
- malware-analysis
phases:
- examination
- analysis
platforms:
- Linux
related_software:
- Loki
- YARA
- Velociraptor
skillLevel: intermediate
accessType: download
url: https://github.com/cisagov/thorium
license: MIT
knowledgebase: false
tags:
- cli
- triage
- fast-scan
- ioc-matching
- yara-scan
- sigma-rules
- memory-analysis
- process-analysis
- filesystem-scanning
- log-analysis
- portable
- name: Volatility 3
type: software
description: >-
@@ -119,9 +157,8 @@ tools:
Kill-Chain-Phasen. Föderierte Architektur ermöglicht selektives
Intelligence-Sharing zwischen vertrauenswürdigen Partnern durch
Tagging-System. Correlation-Engine findet automatisch Zusammenhänge
zwischen scheinbar unabhängigen Incidents. ZeroMQ-Feed pusht IOCs in
Echtzeit an Firewalls, SIEMs und Detection-Systeme für automatisierte
Response.
zwischen scheinbar unabhängigen Incidents. Integriert mit Firewalls ind
SIEMS, die mit MISP-Anreicherungen gefüttert werden können.
url: https://misp-project.org/
skillLevel: intermediate
domains:
@@ -157,6 +194,7 @@ tools:
- OpenCTI
icon: 🌐
projectUrl: https://misp.cc24.dev
statusUrl: https://status.mikoshi.de/api/badge/34/status
license: AGPL-3.0
accessType: server-based
knowledgebase: true
@@ -221,18 +259,16 @@ tools:
- name: Timesketch
type: software
description: >-
Google's Collaborative Timeline-Analyse-Platform meistert Millionen von
korrelierten Events durch hochperformante
Elasticsearch-Backend-Architektur für Enterprise-Scale-Investigations.
Plaso-Integration parst automatisch über 300 verschiedene Log-Formate in
einheitliche Super-Timeline mit standardisierten Attributen. Interactive
Timeline-Explorer mit dynamischen Heatmaps, Activity-Graphen und
Statistical-Analysis für Advanced-Pattern-Recognition. Sigma-Rules werden
direkt auf Timelines angewendet für Automated-Threat-Detection,
Machine-Learning-Analyzers erkennen Login-Brute-Force, Lateral-Movement
und Data-Exfiltration-Patterns. Collaborative-Features: Shared-Sketches,
Analyst-Comments, Saved-Searches und narrative Stories für
Management-Reporting.
Google's Timeline-Analyse-Platform meistert Millionen von korrelierten
Events durch skalierende Elasticsearch-Backend-Architektur für
umfangreiche Zeitlinienanalysen. Plaso-Integration parst automatisch über
300 verschiedene Log-Formate in einheitliche Timeline mit standardisierten
Attributen. Statistische Analysen und Plugins zur Datenanreicherung wie
maxming GeoIP und MISP sind verfügbar. Sigma-Rules werden direkt auf
Timelines angewendet für automatisierte Detektion von Anomalien,
Login-Brute-Force, Lateral-Movement und Data-Exfiltration-Patterns.
Kollaborative Funktionen: Gemeinsames Bearbeiten, Analystenkommentare,
"Stories" für Management-Berichterstattung.
url: https://timesketch.org/
skillLevel: intermediate
domains:
@@ -269,6 +305,7 @@ tools:
- Kibana
icon: ⏱️
projectUrl: https://timesketch.cc24.dev
statusUrl: https://status.mikoshi.de/api/badge/37/status
license: Apache-2.0
accessType: server-based
- name: Wireshark
@@ -922,18 +959,20 @@ tools:
- name: Neo4j
type: software
description: >-
Native Graph-Datenbank transformiert komplexe Relationship-Data in
intuitive Visualisierungen durch Cypher-Query-Language für forensische
Pattern-Detection. Graph-Algorithmen finden kürzeste Pfade zwischen
Entities, Community-Detection identifiziert Fraud-Rings und
Criminal-Networks automatisch. Visual-Graph-Explorer macht verborgene
Multi-Hop-Connections sichtbar für Money-Laundering, Social-Engineering
und Organized-Crime-Investigations. APOC-Bibliothek bietet 450+
spezialisierte Procedures für Advanced-Analytics: Centrality-Measures,
PageRank, Clustering-Coefficients. Bloom-Visualization-Tool für
nicht-technische Stakeholder mit Point-and-Click-Exploration. Import aus
CSV, JSON und relationalen Datenbanken, Elasticsearch-Integration für
Hybrid-Search-Scenarios.
Graph-Datenbank transformiert komplexe relationale Daten in intuitive
Visualisierungen. Die SQL-ähnliche Cypher-Query-Language ist nach einer
gewissen Lernkurve intuitiv und bietet viele Möglichkeiten.
Cypher-Algorithmen finden kürzeste Pfade zwischen Entitäten, viele weitere
Automatisierungen sind möglich. Die Anwendbarkeiten sind wegen der
abstrakten Struktur von Neo4J daher unbegrenzt und in allen Domänen
(hauptsichlich Netzwerkforensik, Finanztransaktionsanalysen,
Kriminalermittlungen gegen organisiertes Verbrechen) zur Visualisierung
und ggf. auch zur Analyse einsetzbar. Die APOC-Bibliothek bietet darüber
hinaus noch zahlreiche weitere Plugins. Import aus CSV, JSON und
relationalen Datenbanken.
Leider versteckt Neo4J einige seiner Funktionen mittlerweile hinter einem
Premium-Modell und entfernt sich so vom Open-Source-Konzept.
url: https://neo4j.com/
skillLevel: intermediate
domains:
@@ -971,6 +1010,7 @@ tools:
- Linkurious
icon: 🕸️
projectUrl: https://graph.cc24.dev
statusUrl: https://status.mikoshi.de/api/badge/32/status
license: GPL-3.0 / Commercial
accessType: server-based
- name: QGIS
@@ -2141,23 +2181,25 @@ tools:
related_concepts:
- Digital Evidence Chain of Custody
- name: Aftermath
icon: 🎯
type: software
description: >-
Jamfs Open-Source-Juwel für macOS-Forensik sammelt systematisch Artefakte
ohne Full-System-Image. Optimiert für Incident-Response mit minimalem
System-Impact. Extrahiert kritische Daten: laufende Prozesse, Netzwerk-
verbindungen, installierte Software, Persistence-Mechanismen. Besonders
wertvoll: Unified-Log-Parser für System-Events, Browser-Artefakte aller
Major-Browser, Quick-Look-Thumbnails, FSEvents für Dateiaktivitäten. Die
modulare Architektur erlaubt selektive Sammlung. Output in strukturierten
JSON/CSV für einfache Analyse. Zeitstempel-Normalisierung für
Timeline-Erstellung. Unterstützt moderne macOS-Security-Features:
TCC-Permissions, Code-Signing-Status, XProtect-Matches. Die Remote-
Collection via MDM/SSH skaliert auf Unternehmensflotten. Besonders clever:
Sammlung von Cloud-Synchronisations-Artefakten (iCloud, Dropbox).
Regelmäßige Updates für neue macOS-Versionen. Die Alternative zu teuren
kommerziellen Mac-Forensik-Suiten.
Jamfs Open-Source-Software für macOS-Forensik sammelt systematisch
Artefakte, ohne zuvor ein Full-System-Image zu ziehen. Optimiert für
Incident-Response mit minimalem Systemeingriff. Extrahiert kritische
Daten: laufende Prozesse, Netzwerkverbindungen, installierte Software,
Persistenzmechanismen. Besonders wertvoll: Unified-Log-Parser für
System-Events, Browser-Artefakte aller größeren Browser,
Quick-Look-Thumbnails, FSEvents für Dateiaktivitäten. Die modulare
Architektur erlaubt selektive Sammlung. Output in strukturierten JSON/CSV
für einfache Analyse. Zeitstempel-Normalisierung für Timeline-Erstellung.
Unterstützt moderne macOS-Sicherheitsfeatures: TCC-Permissions,
Code-Signing-Status, XProtect-Matches. Die Remote-Collection via MDM/SSH
skaliert auf Unternehmensflotten. Besonders clever: Sammlung von
Cloud-Synchronisations-Artefakten (iCloud, Dropbox). Regelmäßige Updates
für neue macOS-Versionen. Die Alternative zu teuren kommerziellen
Mac-Forensik-Suiten.
url: https://github.com/jamf/aftermath/
skillLevel: intermediate
domains:
- incident-response
- static-investigations
@@ -2167,14 +2209,6 @@ tools:
- examination
platforms:
- macOS
related_software:
- osquery
- KAPE
skillLevel: intermediate
accessType: download
url: https://github.com/jamf/aftermath/
license: Apache-2.0
knowledgebase: false
tags:
- cli
- triage
@@ -2190,6 +2224,12 @@ tools:
- json-export
related_concepts:
- Digital Evidence Chain of Custody
related_software:
- osquery
- KAPE
icon: 🎯
license: Apache-2.0
accessType: download
- name: RegRipper
type: software
description: >-
@@ -2280,17 +2320,15 @@ tools:
- name: PhotoRec
type: software
description: >-
Signature-Based File-Carving-Tool rekonstruiert gelöschte Files durch
Signatur-basiertes File-Carving-Tool rekonstruiert gelöschte Daten durch
Header/Footer-Pattern-Matching unabhängig vom Dateisystem-Zustand oder
Partition-Table-Corruption. Unterstützt über 300 File-Formats: Images
(JPEG, PNG, TIFF), Documents (PDF, DOC, XLS), Archives (ZIP, RAR), Videos
(AVI, MP4) und Custom-Signatures. Read-Only-Operation gewährleistet
forensische Evidence-Integrity, funktioniert bei beschädigten,
formatierten oder korrupten Dateisystemen. Paranoid-Mode scannt jeden
einzelnen Sektor für Maximum-Recovery-Rate bei fragmentierten Files.
Konfigurierbare File-Extensions und Custom-Signature-Development für
proprietäre Formats. Companion-Software TestDisk repariert
Partition-Tables und Boot-Sectors für Filesystem-Recovery-Scenarios.
Korruption des Dateisystems. Unterstützt über 300 Datei-Formate: Bilder
(JPEG, PNG, TIFF), Dokumente (PDF, DOC, XLS), Archive (ZIP, RAR), Videos
(AVI, MP4) und selbstdefinierte Dateisignaturen. Read-Only gewährleistet
forensische Integrität, funktioniert bei beschädigten, formatierten oder
korrupten Dateisystemen. Paranoid-Mode scannt jeden einzelnen Sektor für
maximale Anzahl wiederhergestellter Daten. Integrierbar mit Software wie
TestDisk.
url: https://www.cgsecurity.org/wiki/PhotoRec
skillLevel: beginner
domains:
@@ -2299,6 +2337,7 @@ tools:
- fraud-investigation
phases:
- examination
- data-collection
platforms:
- Windows
- Linux
@@ -2689,42 +2728,6 @@ tools:
icon: 🔍
license: Proprietary
accessType: commercial
- name: FRED
type: software
description: >-
Hardware-Forensik-Workstation ermöglicht simultanes Imaging von 8
Evidenzen durch Hot-Swap-UltraBay
und integrierte Write-Blocker für SATA/IDE/USB/FireWire. Hardware-Hash-Acceleration beschleunigt
MD5/SHA-Verifizierung, Touchscreen-Konsole steuert Parallel-Processing ohne Host-System-Belastung.
Field-Kit-Version mit 4-Bay-Kapazität für Vor-Ort-Akquisition, modulares Design erlaubt
RAID-Controller-Upgrades für NAS-Forensik.
url: https://www.digitalintelligence.com/products/fred/
skillLevel: intermediate
domains:
- static-investigations
- incident-response
phases:
- data-collection
platforms:
- Hardware
tags:
- gui
- commercial
- write-blocker
- physical-copy
- scenario:disk_imaging
- multithreaded
- hardware-solution
- hot-swap
- raid-recovery
- parallel-imaging
- touch-control
- lab-equipment
related_concepts:
- Digital Evidence Chain of Custody
icon: 🖥️
license: Proprietary
accessType: commercial
- name: GraphSense
icon: 📊
type: software
@@ -3388,26 +3391,25 @@ tools:
description: >-
Die kommerzielle Blockchain-Analytics-Plattform konkurriert mit
Chainalysis durch erweiterte Compliance-Features und RegTech-Integration.
Clustering- Algorithmen identifizieren Services durch
Transaction-Pattern-Analysis: Exchanges, Darknet-Markets, Mixers,
Ransomware-Wallets. Die Compliance- Suite bietet Real-Time-Screening gegen
OFAC/EU-Sanctions-Listen. Besonders stark: DeFi-Protocol-Analysis
dekodiert Smart-Contract- Interactions, Cross-Chain-Tracking folgt Funds
über Bridges, Investigation-Tools für Complex-Money-Laundering-Schemes.
API-Integration ermöglicht Automated-AML-Workflows. Die Typology-Library
kategorisiert Verdachtsmuster nach FATF-Standards. Court-Ready-Reports
mit Blockchain- Evidence-Chain. Training-Programme zertifizieren
Investigators. Unterstützt Bitcoin, Ethereum, und 15+ andere Blockchains.
Enterprise- Deployment für Banken, Exchanges und Strafverfolgung. Der
Clustering-Algorithmen identifizieren Dienstleister durch
Transaktionsmusteranalyse: Exchanges, Darknet-Markets, Mixer,
Ransomware-Wallets. Die Compliance-Suite bietet Echtzeitüberwachung von
OFAC/EU-Sanktionslisten. Besonders stark: DeFi-Protokollanalyse dekodiert
Smart-Contract-Interaktionen, Cross-Chain-Tracking folgt Geldern über
verschiedene Blockchains hinweg, Ermittlungswerkzeuge für komplexe
Geldwäsche-Schemata. API-Integration ermöglicht programmatische
Integration. Unterstützt Bitcoin, Ethereum, und 15+ andere Blockchains.
Enterprise-Deployment für Banken, Exchanges und Strafverfolgung. Der
europäische Fokus macht es zur Alternative für EU-basierte
Organisationen.
skillLevel: intermediate
url: https://www.elliptic.co
icon:
skillLevel: intermediate
domains:
- fraud-investigation
phases:
- analysis
platforms:
- Web
tags:
- blockchain-analysis
- compliance-screening
@@ -3416,11 +3418,8 @@ tools:
- cross-chain-tracking
- aml-workflows
- court-reporting
platforms:
- Web
accessType: cloud
license: Subscription
knowledgebase: false
icon:
license: Proprietary
- name: FACT
type: software
description: >-
@@ -4329,13 +4328,13 @@ tools:
- name: ADB
type: software
description: >-
Kommuniziert mit Android-Geräten für forensische Datenextraktion über USB
oder Netzwerk ohne Root-Zugriff. Erstellt logische Backups von App-Daten,
installiert forensische Analysewerkzeuge, erfasst Live-Logcats für
Incident-Response. Port-Weiterleitung ermöglicht sichere Remote-Analyse.
File-Transfer-Funktionen extrahieren Beweise direkt vom Gerät.
Shell-Access für erweiterte Forensik-Kommandos. Unverzichtbar für
Mobile-Incident-Response und App-Entwicklungs-Forensik.
Die "Android Debug Bridge" ist grundsätzlich ein Werkzeug für
Android-Entwickler, wird aber auch gern in der Mobile-Forensik genutzt.
Sie ermöglicht bei Android-Geräten forensische Datenextraktion über USB
oder Netzwerk teilweise ohne Root-Zugriff, besonders einfach bei älteren
Geräten. Erstellt logische Backups von App-Daten, installiert forensische
Analysewerkzeuge.
url: https://developer.android.com/tools/adb
skillLevel: intermediate
domains:
@@ -4523,8 +4522,8 @@ tools:
Deauth-Frames für Handshake-Erfassung. WEP-Schlüssel-Rekonstruktion in
Minuten, WPA2-PSK-Recovery mit Dictionary-Angriffen.
Rogue-Access-Point-Erkennung und Client-Probing-Analyse für
Bewegungsprofile. GPU-Beschleunigung via hashcat für moderne
Verschlüsselungsstandards.
Bewegungsprofile. Ein sehr etabliertes Tool, das immer noch seine Relevanz
vor allem auch im Pentesting besitzt.
url: https://www.aircrack-ng.org/
skillLevel: advanced
domains:
@@ -7160,6 +7159,97 @@ tools:
- kernel-analysis
related_concepts:
- Memory Forensics & Process Analysis
- name: ChipWhisperer
type: software
description: >-
Hardware-Sicherheitsanalyse-Plattform für Firmware-Extraktion aus
eingebetteten Systemen durch Stromverbrauchsanalysen. Automatisierte
Differential-Power-Analysis (DPA) und Correlation-Power-Analysis (CPA)
brechen AES-Implementierungen und extrahieren Verschlüsselungsschlüssel
aus Mikrocontrollern. Fehlereinschleusung umgeht Bootloader-Überprüfung
und Secure-Boot-Mechanismen. Besonders wertvoll für IoT-Geräte-Forensik:
Umgehung von Hardware-Security-Modulen, Clock-Glitching für
Code-Ausführungs-Übernahme, Spannungsfehler für
Authentifizierungs-Umgehung. Python-API automatisiert Angriffsszenarien,
CW-Lite/Pro-Hardware skaliert von Hobby bis professionelle
Penetrationstests. Standardplattform für Hardware-Hacking und eingebettete
Systemforensik.
url: https://www.newae.com/chipwhisperer
skillLevel: expert
domains:
- ics-forensics
- static-investigations
phases:
- data-collection
- analysis
platforms:
- Windows
- Linux
- macOS
tags:
- hardware-analysis
- side-channel-attack
- power-analysis
- fault-injection
- embedded-security
- firmware-extraction
- iot-forensics
- hardware-hacking
- encryption-bypass
- python-api
related_concepts:
- Hash Functions & Digital Signatures
related_software:
- Binwalk
- Ghidra
- ICSpector
icon: 🫓
license: GPL-3.0
accessType: download
- name: JTAG-Analyse
type: method
description: >-
Direkter Hardware-Schnittstellenzugriff auf eingebettete Systeme über
Joint Test Action Group Debug-Schnittstelle für Firmware-Extraktion und
Systemanalyse. Boundary-Scan-Verfahren identifiziert verfügbare JTAG-Pins
auch bei undokumentierten Geräten durch systematische Pin-Tests.
Flash-Speicher-Abzüge umgehen Software-Schutzmaßnahmen und extrahieren
komplette Firmware-Abbilder inklusive verschlüsselter Bereiche.
Debug-Port-Ausnutzung ermöglicht Live-Speicherzugriff,
Register-Manipulation und Code-Injection in laufende Systeme. Besonders
kritisch für IoT-Forensik: Router-Hintertüren, intelligente
Geräte-Manipulationen, Industriesteuerungsanlagen-Kompromittierungen.
Kombiniert mit Chip-Off-Techniken für maximale Datenwiederherstellung bei
sicherheitsgehärteten Geräten. Standard-Methodik für Hardware-Forensik.
url: https://www.jtag.com/what-is-jtag-testing-of-electronics-tutorial/#
skillLevel: expert
domains:
- ics-forensics
- mobile-forensics
- static-investigations
phases:
- data-collection
- examination
tags:
- hardware-interface
- firmware-extraction
- debug-access
- boundary-scan
- embedded-analysis
- iot-forensics
- flash-memory
- system-exploitation
- hardware-forensics
- pin-identification
related_concepts:
- Digital Evidence Chain of Custody
related_software:
- ChipWhisperer
- Binwalk
- OpenOCD
icon: 💳
knowledgebase: true
domains:
- id: incident-response
name: Incident Response & Breach-Untersuchung
@@ -7228,3 +7318,4 @@ scenarios:
- id: scenario:windows-registry
icon: 📜
friendly_name: Windows Registry analysieren
skill_levels: {}

View File

@@ -184,7 +184,7 @@ import BaseLayout from '../layouts/BaseLayout.astro';
<div style="display: grid; gap: 1.25rem;">
<div style="background-color: var(--color-bg-secondary); padding: 1.25rem; border-radius: 0.5rem;">
<h4 style="margin: 0 0 0.5rem 0; color: var(--color-accent);">🔍 Vorschläge</h4>
<h4 style="margin: 0 0 0.5rem 0; color: var(--color-accent);">📝 Vorschläge</h4>
<p style="margin: 0;">
Du hast eine Idee, wie wir den Hub erweitern können? Reiche deinen Vorschlag unkompliziert
über unsere <a href="/contribute#vorschlaege">/contribute</a>-Seite ein.
@@ -210,15 +210,54 @@ import BaseLayout from '../layouts/BaseLayout.astro';
<svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
<path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z" />
</svg>
GitRepository besuchen
Git-Repository besuchen
</a>
</div>
</div>
<!-- Lightning Support Section with simple-boost integration -->
<div style="background-color: var(--color-bg-secondary); padding: 1.25rem; border-radius: 0.5rem;">
<h4 style="margin: 0 0 0.5rem 0; color: var(--color-accent);">⚡ Unterstützung</h4>
<p style="margin: 0;">
Kleine Spenden zur Infrastruktur-Finanzierung nehme ich auch gerne an, wenn es sein muss.
Fragt einfach nach der Lightning-Adresse oder BTC-Adresse!
<h4 style="margin: 0 0 0.75rem 0; color: var(--color-accent); display: flex; align-items: center; gap: 0.5rem;">
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
<polygon points="13,2 3,14 12,14 11,22 21,10 12,10 13,2"/>
</svg>
⚡ Unterstützung
</h4>
<p style="margin: 0 0 1rem 0; font-size: 0.875rem; line-height: 1.5;">
Kleine Spenden zur Server-Finanzierung sind willkommen.
</p>
<div style="margin-bottom: 1rem;">
<!-- Simple EUR Payment -->
<div style="display: flex; gap: 0.75rem; align-items: center; justify-content: center; max-width: 300px; margin: 0 auto;">
<input
type="number"
id="eur-amount"
min="0.01"
step="0.01"
placeholder="0,50"
value="0.5"
style="width: 80px; padding: 0.5rem; border: 1px solid var(--color-border); border-radius: 0.375rem; font-size: 0.875rem; text-align: center;">
<span style="font-size: 0.875rem; color: var(--color-text-secondary);">€</span>
<simple-boost
id="eur-boost"
class="bootstrap"
nwc="nostr+walletconnect://4fe05896e1faf09d1902ea24ef589f65a9606d1710420a9574ce331e3c7f486b?relay=wss://nostr.mikoshi.de&secret=bdfc861fe71e8d9e375b7a2484052e92def7caf4b317d8f6537b784d3cd6eb3b"
amount="0.5"
currency="eur"
memo="ForensicPathways Unterstützung - Vielen Dank!"
style="background-color: var(--color-accent); color: white; border: none; border-radius: 0.375rem; padding: 0.5rem 1rem; font-size: 0.875rem; cursor: pointer;">
⚡ Senden
</simple-boost>
</div>
</div>
<div style="margin-top: 1rem; padding: 0.75rem; background-color: var(--color-bg); border-radius: 0.375rem; border-left: 3px solid var(--color-accent);">
<p style="margin: 0; font-size: 0.75rem; color: var(--color-text-secondary); line-height: 1.4; text-align: center;">
<strong>⚡ Lightning-Unterstützung:</strong> Betrag eingeben und senden.
Benötigt eine Lightning-Wallet wie <a href="https://getalby.com" target="_blank" rel="noopener" style="color: var(--color-accent);">Alby</a> oder
<a href="https://phoenix.acinq.co" target="_blank" rel="noopener" style="color: var(--color-accent);">Phoenix</a>.
</p>
</div>
</div>
</div>
</div>
@@ -231,4 +270,70 @@ import BaseLayout from '../layouts/BaseLayout.astro';
</p>
</div>
</section>
</BaseLayout>
</BaseLayout>
<script>
// TODO: cleanup
import('simple-boost').then(() => {
console.log('Simple-boost loaded successfully from local dependencies');
setupDynamicAmounts();
}).catch(error => {
console.error('Failed to load simple-boost:', error);
const script = document.createElement('script');
script.type = 'module';
script.src = '/node_modules/simple-boost/dist/simple-boost.js';
script.onload = () => {
console.log('Simple-boost fallback loaded');
setupDynamicAmounts();
};
script.onerror = () => console.error('Simple-boost fallback failed');
document.head.appendChild(script);
});
function setupDynamicAmounts() {
const eurBoost = document.getElementById('eur-boost');
const eurInput = document.getElementById('eur-amount') as HTMLInputElement;
if (eurBoost && eurInput) {
eurBoost.addEventListener('click', (e) => {
const amount = parseFloat(eurInput.value) || 0.5;
eurBoost.setAttribute('amount', amount.toString());
console.log('EUR amount set to:', amount);
});
eurInput.addEventListener('input', () => {
const amount = parseFloat(eurInput.value) || 0.5;
eurBoost.setAttribute('amount', amount.toString());
});
}
}
</script>
<style>
simple-boost {
--simple-boost-primary: var(--color-warning);
--simple-boost-primary-hover: var(--color-accent);
--simple-boost-text: white;
transition: all 0.2s ease;
}
simple-boost:hover {
transform: translateY(-1px);
box-shadow: 0 4px 8px rgba(0,0,0,0.15) !important;
}
simple-boost .simple-boost-button {
display: flex;
align-items: center;
gap: 0.5rem;
font-family: inherit;
font-size: 0.875rem;
}
/* Loading state styling */
simple-boost[loading] {
opacity: 0.7;
cursor: not-allowed;
}
</style>

View File

@@ -1,16 +1,18 @@
// src/pages/api/ai/embeddings-status.ts
import type { APIRoute } from 'astro';
import { embeddingsService } from '../../../utils/embeddings.js';
export const prerender = false;
export const GET: APIRoute = async () => {
try {
const { embeddingsService } = await import('../../../utils/embeddings.js');
await embeddingsService.waitForInitialization();
const stats = embeddingsService.getStats();
const status = stats.enabled && stats.initialized ? 'ready' :
stats.enabled && !stats.initialized ? 'initializing' : 'disabled';
const status = stats.initialized ? 'ready' :
!stats.initialized ? 'initializing' : 'disabled';
console.log(`[EMBEDDINGS-STATUS-API] Service status: ${status}, stats:`, stats);
return new Response(JSON.stringify({
success: true,
@@ -23,6 +25,8 @@ export const GET: APIRoute = async () => {
});
} catch (error) {
console.error('[EMBEDDINGS-STATUS-API] Error checking embeddings status:', error);
return new Response(JSON.stringify({
success: false,
embeddings: { enabled: false, initialized: false, count: 0 },

View File

@@ -1,28 +1,57 @@
// src/pages/api/ai/enhance-input.ts - Enhanced AI service compatibility
// src/pages/api/ai/enhance-input.ts
import type { APIRoute } from 'astro';
import { withAPIAuth } from '../../../utils/auth.js';
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
import { aiService } from '../../../utils/aiService.js';
import { JSONParser } from '../../../utils/jsonUtils.js';
import { getPrompt } from '../../../config/prompts.js';
export const prerender = false;
function getEnv(key: string): string {
const value = process.env[key];
if (!value) {
throw new Error(`Missing environment variable: ${key}`);
}
return value;
}
const RATE_LIMIT_WINDOW_MS =
Number.isFinite(parseInt(process.env.RATE_LIMIT_WINDOW_MS ?? '', 10))
? parseInt(process.env.RATE_LIMIT_WINDOW_MS!, 10)
: 60_000;
const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
const AI_ANALYZER_API_KEY = getEnv('AI_ANALYZER_API_KEY');
const AI_ANALYZER_MODEL = getEnv('AI_ANALYZER_MODEL');
const RATE_LIMIT_MAX =
Number.isFinite(parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS ?? '', 10))
? parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS!, 10)
: 5;
const INPUT_MIN_CHARS = 40;
const INPUT_MAX_CHARS = 1000;
const Q_MIN_LEN = 15;
const Q_MAX_LEN = 160;
const Q_MAX_COUNT = 3;
const AI_TEMPERATURE = 0.3;
const CLEANER_TEMPERATURE = 0.0;
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
const RATE_LIMIT_WINDOW = 60 * 1000;
const RATE_LIMIT_MAX = 5;
function checkRateLimit(userId: string): boolean {
const now = Date.now();
const entry = rateLimitStore.get(userId);
if (!entry || now > entry.resetTime) {
rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW_MS });
return true;
}
if (entry.count >= RATE_LIMIT_MAX) return false;
entry.count++;
return true;
}
function cleanupExpiredRateLimits(): void {
const now = Date.now();
for (const [userId, entry] of rateLimitStore.entries()) {
if (now > entry.resetTime) rateLimitStore.delete(userId);
}
}
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
/**
* Helpers
*/
function sanitizeInput(input: string): string {
return input
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
@@ -30,112 +59,24 @@ function sanitizeInput(input: string): string {
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
.trim()
.slice(0, 1000);
.slice(0, INPUT_MAX_CHARS);
}
function checkRateLimit(userId: string): boolean {
const now = Date.now();
const userLimit = rateLimitStore.get(userId);
if (!userLimit || now > userLimit.resetTime) {
rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
return true;
}
if (userLimit.count >= RATE_LIMIT_MAX) {
return false;
}
userLimit.count++;
return true;
}
function cleanupExpiredRateLimits() {
const now = Date.now();
for (const [userId, limit] of rateLimitStore.entries()) {
if (now > limit.resetTime) {
rateLimitStore.delete(userId);
}
}
}
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
function createEnhancementPrompt(input: string): string {
return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein forensisches Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte forensische Untersuchung.
ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
1. **Incident Context**: Was ist passiert? Welche Angriffsvektoren oder technischen Probleme liegen vor?
2. **Affected Systems**: Welche spezifischen Technologien/Plattformen sind betroffen? (Windows/Linux/ICS/SCADA/Mobile/Cloud/Network Infrastructure)
3. **Available Evidence**: Welche forensischen Datenquellen stehen zur Verfügung? (RAM-Dumps, Disk-Images, Log-Files, Network-Captures, Registry-Hives)
4. **Investigation Objectives**: Was soll erreicht werden? (IOC-Extraktion, Timeline-Rekonstruktion, Attribution, Impact-Assessment)
5. **Timeline Constraints**: Wie zeitkritisch ist die Untersuchung?
6. **Legal & Compliance**: Rechtliche Anforderungen, Chain of Custody, Compliance-Rahmen (DSGVO, sector-specific regulations)
7. **Technical Constraints**: Verfügbare Ressourcen, Skills, Infrastrukturbeschränkungen
WENN die Beschreibung alle kritischen forensischen Aspekte abdeckt: Geben Sie eine leere Liste [] zurück.
WENN wichtige forensische Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte forensische Analyse schließen.
QUALITÄTSKRITERIEN FÜR FRAGEN:
- Forensisch spezifisch, nicht allgemein (NICHT: "Mehr Details?")
- Methodisch relevant (NICHT: "Wann passierte das?")
- Priorisiert nach Auswirkung auf die forensische Untersuchungsqualität
- Die Frage soll maximal 20 Wörter umfassen
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
[
"spezifische Frage 1?",
"spezifische Frage 2?",
"spezifische Frage 3?"
]
NUTZER-EINGABE:
${input}
`.trim();
}
async function callAIService(prompt: string): Promise<Response> {
const endpoint = AI_ENDPOINT;
const apiKey = AI_ANALYZER_API_KEY;
const model = AI_ANALYZER_MODEL;
let headers: Record<string, string> = {
'Content-Type': 'application/json'
};
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
console.log('[ENHANCE API] Using API key authentication');
} else {
console.log('[ENHANCE API] No API key - making request without authentication');
}
const requestBody = {
model,
messages: [{ role: 'user', content: prompt }],
max_tokens: 300,
temperature: 0.7,
top_p: 0.9,
frequency_penalty: 0.2,
presence_penalty: 0.1
};
return fetch(`${endpoint}/v1/chat/completions`, {
method: 'POST',
headers,
body: JSON.stringify(requestBody)
});
function stripJsonFences(s: string): string {
return s.replace(/^```json\s*/i, '')
.replace(/^```\s*/i, '')
.replace(/\s*```\s*$/, '')
.trim();
}
/**
* Handler
*/
export const POST: APIRoute = async ({ request }) => {
try {
const authResult = await withAPIAuth(request, 'ai');
if (!authResult.authenticated) {
return createAuthErrorResponse();
}
const userId = authResult.userId;
const auth = await withAPIAuth(request, 'ai');
if (!auth.authenticated) return createAuthErrorResponse();
const userId = auth.userId;
if (!checkRateLimit(userId)) {
return apiError.rateLimit('Enhancement rate limit exceeded');
@@ -144,79 +85,53 @@ export const POST: APIRoute = async ({ request }) => {
const body = await request.json();
const { input } = body;
if (!input || typeof input !== 'string' || input.length < 40) {
return apiError.badRequest('Input too short for enhancement (minimum 40 characters)');
if (!input || typeof input !== 'string' || input.length < INPUT_MIN_CHARS) {
return apiError.badRequest(`Input too short for enhancement (minimum ${INPUT_MIN_CHARS} characters)`);
}
const sanitizedInput = sanitizeInput(input);
if (sanitizedInput.length < 40) {
if (sanitizedInput.length < INPUT_MIN_CHARS) {
return apiError.badRequest('Input too short after sanitization');
}
const systemPrompt = createEnhancementPrompt(sanitizedInput);
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
const aiResponse = await enqueueApiCall(() => callAIService(systemPrompt), taskId);
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`;
const questionsPrompt = getPrompt('enhancementQuestions', sanitizedInput);
if (!aiResponse.ok) {
const errorText = await aiResponse.text();
console.error('[ENHANCE API] AI enhancement error:', errorText, 'Status:', aiResponse.status);
return apiServerError.unavailable('Enhancement service unavailable');
}
console.log(`[ENHANCE-API] Processing enhancement request for user: ${userId}`);
const aiData = await aiResponse.json();
const aiContent = aiData.choices?.[0]?.message?.content;
const aiResponse = await enqueueApiCall(
() => aiService.callAI(questionsPrompt, { temperature: AI_TEMPERATURE }),
taskId
);
if (!aiContent) {
if (!aiResponse?.content) {
return apiServerError.unavailable('No enhancement response');
}
let questions;
try {
const cleanedContent = aiContent
.replace(/^```json\s*/i, '')
.replace(/\s*```\s*$/, '')
.trim();
questions = JSON.parse(cleanedContent);
if (!Array.isArray(questions)) {
throw new Error('Response is not an array');
}
questions = questions
.filter(q => typeof q === 'string' && q.length > 20 && q.length < 200)
.filter(q => q.includes('?'))
.filter(q => {
const forensicsTerms = ['forensisch', 'log', 'dump', 'image', 'artefakt', 'evidence', 'incident', 'system', 'netzwerk', 'zeitraum', 'verfügbar'];
const lowerQ = q.toLowerCase();
return forensicsTerms.some(term => lowerQ.includes(term));
})
.map(q => q.trim())
.slice(0, 3);
if (questions.length === 0) {
questions = [];
}
let parsed: unknown = JSONParser.safeParseJSON(stripJsonFences(aiResponse.content), null);
} catch (error) {
console.error('Failed to parse enhancement response:', aiContent);
questions = [];
}
let questions: string[] = Array.isArray(parsed) ? parsed : [];
questions = questions
.filter(q => typeof q === 'string')
.map(q => q.trim())
.filter(q => q.endsWith('?'))
.filter(q => q.length >= Q_MIN_LEN && q.length <= Q_MAX_LEN)
.slice(0, Q_MAX_COUNT);
console.log(`[ENHANCE API] User: ${userId}, Forensics Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
console.log(`[ENHANCE-API] User: ${userId}, Questions generated: ${questions.length}, Input length: ${sanitizedInput.length}`);
return new Response(JSON.stringify({
success: true,
questions,
taskId,
inputComplete: questions.length === 0
inputComplete: questions.length === 0
}), {
status: 200,
headers: { 'Content-Type': 'application/json' }
});
} catch (error) {
console.error('Enhancement error:', error);
} catch (err) {
console.error('[ENHANCE-API] Enhancement error:', err);
return apiServerError.internal('Enhancement processing failed');
}
};
};

View File

@@ -1,5 +1,4 @@
// src/pages/api/ai/query.ts
import type { APIRoute } from 'astro';
import { withAPIAuth } from '../../../utils/auth.js';
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
@@ -21,15 +20,14 @@ const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '
const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10);
function sanitizeInput(input: string): string {
let sanitized = input
return input
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
.replace(/\<\/?[^>]+(>|$)/g, '')
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
.trim();
sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
return sanitized;
.trim()
.slice(0, 2000)
.replace(/\s+/g, ' ');
}
function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
@@ -78,7 +76,7 @@ function incrementMicroTaskCount(userId: string, aiCallsMade: number): void {
}
}
function cleanupExpiredRateLimits() {
function cleanupExpiredRateLimits(): void {
const now = Date.now();
const maxStoreSize = 1000;
@@ -118,51 +116,52 @@ export const POST: APIRoute = async ({ request }) => {
const body = await request.json();
const { query, mode = 'workflow', taskId: clientTaskId } = body;
console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
console.log(`[AI-API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
console.log(`[AI-API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
if (!query || typeof query !== 'string') {
console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
console.log(`[AI-API] Invalid query for task ${clientTaskId}`);
return apiError.badRequest('Query required');
}
if (!['workflow', 'tool'].includes(mode)) {
console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
console.log(`[AI-API] Invalid mode for task ${clientTaskId}: ${mode}`);
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
}
const sanitizedQuery = sanitizeInput(query);
if (sanitizedQuery.includes('[FILTERED]')) {
console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
console.log(`[AI-API] Filtered input detected for task ${clientTaskId}`);
return apiError.badRequest('Invalid input detected');
}
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
console.log(`[AI-API] Enqueueing pipeline task ${taskId}`);
const result = await enqueueApiCall(() =>
aiPipeline.processQuery(sanitizedQuery, mode)
, taskId);
if (!result || !result.recommendation) {
return apiServerError.unavailable('No response from micro-task AI pipeline');
return apiServerError.unavailable('No response from AI pipeline');
}
const stats = result.processingStats;
const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
incrementMicroTaskCount(userId, estimatedAICallsMade);
console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
console.log(` - Mode: ${mode}`);
console.log(` - User: ${userId}`);
console.log(` - Query length: ${sanitizedQuery.length}`);
console.log(` - Processing time: ${stats.processingTimeMs}ms`);
console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`);
console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`);
console.log(` - Estimated AI calls: ${estimatedAICallsMade}`);
console.log(` - Embeddings used: ${stats.embeddingsUsed}`);
console.log(` - Final items: ${stats.finalSelectedItems}`);
console.log(`[AI-API] Pipeline completed for ${taskId}:`, {
mode,
user: userId,
queryLength: sanitizedQuery.length,
processingTime: stats.processingTimeMs,
microTasksCompleted: stats.microTasksCompleted,
microTasksFailed: stats.microTasksFailed,
estimatedAICalls: estimatedAICallsMade,
embeddingsUsed: stats.embeddingsUsed,
finalItems: stats.finalSelectedItems
});
const currentLimit = rateLimitStore.get(userId);
const remainingMicroTasks = currentLimit ?
@@ -176,7 +175,7 @@ export const POST: APIRoute = async ({ request }) => {
query: sanitizedQuery,
processingStats: {
...result.processingStats,
pipelineType: 'micro-task',
pipelineType: 'refactored',
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
estimatedAICallsMade
@@ -192,18 +191,16 @@ export const POST: APIRoute = async ({ request }) => {
});
} catch (error) {
console.error('[MICRO-TASK API] Pipeline error:', error);
console.error('[AI-API] Pipeline error:', error);
if (error.message.includes('embeddings')) {
return apiServerError.unavailable('Embeddings service error - using AI fallback');
} else if (error.message.includes('micro-task')) {
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed');
} else if (error.message.includes('selector')) {
return apiServerError.unavailable('AI selector service error');
return apiServerError.unavailable('Embeddings service error');
} else if (error.message.includes('AI')) {
return apiServerError.unavailable('AI service error');
} else if (error.message.includes('rate limit')) {
return apiError.rateLimit('AI service rate limits exceeded during micro-task processing');
return apiError.rateLimit('AI service rate limits exceeded');
} else {
return apiServerError.internal('Micro-task AI pipeline error');
return apiServerError.internal('AI pipeline error');
}
}
};

View File

@@ -180,7 +180,6 @@ export const POST: APIRoute = async ({ request }) => {
return apiSpecial.invalidJSON();
}
// Preprocess form data to handle autocomplete inputs
body = preprocessFormData(body);
const sanitizedBody = sanitizeInput(body);

View File

@@ -37,13 +37,6 @@ export const POST: APIRoute = async ({ request }) => {
const { embeddingsService } = await import('../../../utils/embeddings.js');
if (!embeddingsService.isEnabled()) {
return new Response(
JSON.stringify({ success: false, error: 'Semantic search not available' }),
{ status: 400, headers: { 'Content-Type': 'application/json' } }
);
}
await embeddingsService.waitForInitialization();
const similarItems = await embeddingsService.findSimilar(

View File

@@ -1,5 +1,5 @@
---
// src/pages/contribute/index.astro - Consolidated Auth
// src/pages/contribute/index.astro
import BaseLayout from '../../layouts/BaseLayout.astro';
import { withAuth } from '../../utils/auth.js';

View File

@@ -1,4 +1,5 @@
---
//src/pages/index.astro
import BaseLayout from '../layouts/BaseLayout.astro';
import ToolCard from '../components/ToolCard.astro';
import ToolFilters from '../components/ToolFilters.astro';
@@ -509,9 +510,7 @@ if (aiAuthRequired) {
}, 500);
};
function handleSharedURL() {
console.log('[SHARE] Handling shared URL:', window.location.search);
function handleSharedURL() {
const urlParams = new URLSearchParams(window.location.search);
const toolParam = urlParams.get('tool');
const viewParam = urlParams.get('view');

View File

@@ -1,4 +1,5 @@
---
//src/pages/knowledgebase.astro
import BaseLayout from '../layouts/BaseLayout.astro';
import { getCollection } from 'astro:content';
import { getToolsData } from '../utils/dataService.js';

View File

@@ -588,10 +588,8 @@ const currentUrl = Astro.url.href;
});
}
// Make generateTOCContent available globally for the auth check script
window.generateTOCContent = generateTOCContent;
// Initialize everything on page load
document.addEventListener('DOMContentLoaded', () => {
calculateReadingTime();
generateSidebarTOC();

View File

@@ -1,4 +1,5 @@
---
//src/pages/status.astro
import BaseLayout from '../layouts/BaseLayout.astro';
import { getToolsData } from '../utils/dataService.js';

View File

@@ -675,6 +675,7 @@ input[type="checkbox"] {
border-radius: 0.25rem;
font-size: 0.75rem;
margin: 0.125rem;
max-height: 1.5rem;
}
/* ===================================================================
@@ -1806,11 +1807,44 @@ input[type="checkbox"] {
.ai-textarea-section {
flex: 1;
min-width: 0;
display: flex;
flex-direction: column;
}
.ai-textarea-section textarea {
width: 100%;
height: 180px;
min-height: 180px;
max-height: 300px;
resize: vertical;
font-size: 0.9375rem;
line-height: 1.5;
padding: 0.75rem;
border: 1px solid var(--color-border);
border-radius: 0.375rem;
background-color: var(--color-bg);
color: var(--color-text);
transition: var(--transition-fast);
flex: 1;
}
.confidence-tooltip {
background: var(--color-bg) !important;
border: 2px solid var(--color-border) !important;
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15) !important;
z-index: 2000 !important;
}
.ai-textarea-section textarea:focus {
outline: none;
border-color: var(--color-primary);
box-shadow: 0 0 0 3px rgb(37 99 235 / 10%);
}
.ai-suggestions-section {
flex: 0 0 320px;
min-height: 120px;
min-height: 180px;
height: auto;
}
.ai-input-container textarea {
@@ -1887,7 +1921,6 @@ input[type="checkbox"] {
box-shadow: 0 2px 4px 0 rgb(255 255 255 / 10%);
}
/* Enhanced contextual analysis cards */
.contextual-analysis-card {
margin-bottom: 2rem;
border-left: 4px solid;
@@ -1984,7 +2017,6 @@ input[type="checkbox"] {
opacity: 1;
}
/* Enhanced queue status for micro-tasks */
.queue-status-card.micro-task-mode {
border-left: 4px solid var(--color-primary);
}
@@ -1997,7 +2029,6 @@ input[type="checkbox"] {
border-radius: 0.5rem 0.5rem 0 0;
}
/* Mobile responsive adjustments */
@media (max-width: 768px) {
.micro-task-steps {
grid-template-columns: repeat(2, 1fr);
@@ -2189,12 +2220,20 @@ input[type="checkbox"] {
border-radius: 1rem;
font-weight: 500;
text-transform: uppercase;
position: relative;
z-index: 1;
}
.tool-rec-priority.high { background-color: var(--color-error); color: white; }
.tool-rec-priority.medium { background-color: var(--color-warning); color: white; }
.tool-rec-priority.low { background-color: var(--color-accent); color: white; }
[data-theme="dark"] .confidence-tooltip {
background: var(--color-bg-secondary) !important;
border-color: var(--color-border) !important;
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.4) !important;
}
.tool-rec-justification {
font-size: 0.875rem;
line-height: 1.5;
@@ -2613,7 +2652,8 @@ footer {
================================================================= */
.smart-prompting-container {
height: 100%;
height: auto;
min-height: 180px;
animation: smartPromptSlideIn 0.4s cubic-bezier(0.4, 0, 0.2, 1);
}
@@ -2622,8 +2662,10 @@ footer {
border: 1px solid var(--color-border);
border-radius: 0.5rem;
padding: 1rem;
height: 100%;
min-height: 120px;
height: auto;
min-height: 180px;
max-height: 400px;
overflow-y: auto;
display: flex;
flex-direction: column;
opacity: 0.85;
@@ -2663,8 +2705,8 @@ footer {
/* Smart Prompting Hint */
.smart-prompting-hint {
height: 100%;
min-height: 120px;
height: 180px;
min-height: 180px;
display: flex;
align-items: center;
animation: hintFadeIn 0.3s ease-in-out;
@@ -3378,8 +3420,8 @@ footer {
.ai-suggestions-section {
flex: 0 0 auto;
width: 100%;
max-width: none;
height: auto;
min-height: 120px;
}
.ai-textarea-section {
@@ -3389,6 +3431,11 @@ footer {
min-height: 100px;
}
.ai-textarea-section textarea {
height: 150px;
min-height: 150px;
}
.ai-spotlight-content {
flex-direction: column;
gap: 0.75rem;

View File

@@ -691,12 +691,11 @@
/* ==========================================================================
VIDEO EMBEDDING - Add to knowledgebase.css
VIDEO EMBEDDING - ULTRA SIMPLE: Just full width, natural aspect ratios
========================================================================== */
/* Video Container and Responsive Wrapper */
/* Video Container - just a styled wrapper */
:where(.markdown-content) .video-container {
position: relative;
width: 100%;
margin: 2rem 0;
border-radius: var(--radius-lg, 0.75rem);
@@ -705,84 +704,34 @@
box-shadow: var(--shadow-lg, 0 12px 30px rgba(0,0,0,0.16));
}
/* Responsive 16:9 aspect ratio by default */
:where(.markdown-content) .video-container.aspect-16-9 {
aspect-ratio: 16 / 9;
}
:where(.markdown-content) .video-container.aspect-4-3 {
aspect-ratio: 4 / 3;
}
:where(.markdown-content) .video-container.aspect-1-1 {
aspect-ratio: 1 / 1;
}
/* Video Element Styling */
/* Video Element - full width, natural aspect ratio */
:where(.markdown-content) .video-container video {
width: 100%;
height: 100%;
object-fit: contain;
height: auto;
display: block;
background-color: #000;
border: none;
outline: none;
}
/* Custom Video Controls Enhancement */
:where(.markdown-content) video::-webkit-media-controls-panel {
background-color: rgba(0, 0, 0, 0.8);
/* YouTube iframe - full width, preserve embedded dimensions ratio */
:where(.markdown-content) .video-container iframe {
width: 100%;
height: auto;
aspect-ratio: 16 / 9; /* Only for iframes since they don't have intrinsic ratio */
display: block;
border: none;
outline: none;
}
:where(.markdown-content) video::-webkit-media-controls-current-time-display,
:where(.markdown-content) video::-webkit-media-controls-time-remaining-display {
color: white;
text-shadow: none;
/* Focus states for accessibility */
:where(.markdown-content) .video-container video:focus,
:where(.markdown-content) .video-container iframe:focus {
outline: 3px solid var(--color-primary);
outline-offset: 3px;
}
/* Video Loading State */
:where(.markdown-content) .video-container .video-loading {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
color: var(--color-text-secondary);
display: flex;
flex-direction: column;
align-items: center;
gap: 1rem;
}
:where(.markdown-content) .video-container .video-loading .spinner {
width: 2rem;
height: 2rem;
border: 3px solid var(--color-border);
border-top: 3px solid var(--color-primary);
border-radius: 50%;
animation: spin 1s linear infinite;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
/* Video Error State */
:where(.markdown-content) .video-container .video-error {
position: absolute;
top: 50%;
left: 50%;
transform: translate(-50%, -50%);
text-align: center;
color: var(--color-error, #dc3545);
padding: 2rem;
}
:where(.markdown-content) .video-container .video-error .error-icon {
font-size: 3rem;
margin-bottom: 1rem;
}
/* Video Metadata Overlay */
/* Video Metadata */
:where(.markdown-content) .video-metadata {
background-color: var(--color-bg-secondary);
border: 1px solid var(--color-border);
@@ -796,69 +745,13 @@
:where(.markdown-content) .video-metadata .video-title {
font-weight: 600;
color: var(--color-text);
margin-bottom: 0.5rem;
}
:where(.markdown-content) .video-metadata .video-info {
display: flex;
gap: 1rem;
flex-wrap: wrap;
align-items: center;
}
:where(.markdown-content) .video-metadata .video-duration,
:where(.markdown-content) .video-metadata .video-size,
:where(.markdown-content) .video-metadata .video-format {
display: flex;
align-items: center;
gap: 0.25rem;
}
/* Fullscreen Support */
:where(.markdown-content) .video-container video:fullscreen {
background-color: #000;
}
:where(.markdown-content) .video-container video:-webkit-full-screen {
background-color: #000;
}
:where(.markdown-content) .video-container video:-moz-full-screen {
background-color: #000;
}
/* Video Thumbnail/Poster Styling */
:where(.markdown-content) .video-container video[poster] {
object-fit: cover;
}
/* Protected Video Overlay */
:where(.markdown-content) .video-container .video-protected {
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: rgba(0, 0, 0, 0.8);
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
color: white;
text-align: center;
padding: 2rem;
}
:where(.markdown-content) .video-container .video-protected .lock-icon {
font-size: 3rem;
margin-bottom: 1rem;
opacity: 0.8;
margin: 0;
}
/* Responsive Design */
@media (max-width: 768px) {
:where(.markdown-content) .video-container {
margin: 1.5rem -0.5rem; /* Extend to edges on mobile */
margin: 1.5rem -0.5rem;
border-radius: 0;
}
@@ -867,15 +760,9 @@
font-size: 0.8125rem;
border-radius: 0;
}
:where(.markdown-content) .video-metadata .video-info {
flex-direction: column;
gap: 0.5rem;
align-items: flex-start;
}
}
/* Dark Theme Adjustments */
/* Dark Theme */
[data-theme="dark"] :where(.markdown-content) .video-container {
box-shadow: 0 12px 30px rgba(0,0,0,0.4);
}
@@ -885,48 +772,23 @@
border-color: color-mix(in srgb, var(--color-border) 60%, transparent);
}
/* Video Caption/Description Support */
:where(.markdown-content) .video-caption {
margin-top: 1rem;
font-size: 0.9375rem;
color: var(--color-text-secondary);
text-align: center;
font-style: italic;
line-height: 1.5;
}
/* Video Gallery Support (multiple videos) */
:where(.markdown-content) .video-gallery {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 2rem;
margin: 2rem 0;
}
:where(.markdown-content) .video-gallery .video-container {
margin: 0;
}
/* Accessibility Improvements */
:where(.markdown-content) .video-container video:focus {
outline: 3px solid var(--color-primary);
outline-offset: 3px;
}
/* Print Media - Hide Videos */
/* Print Media */
@media print {
:where(.markdown-content) .video-container {
border: 2px solid #ddd;
background-color: #f5f5f5;
padding: 2rem;
text-align: center;
}
:where(.markdown-content) .video-container video,
:where(.markdown-content) .video-container iframe {
display: none !important;
}
:where(.markdown-content) .video-container::after {
content: "[Video: " attr(data-video-title, "Embedded Video") "]";
:where(.markdown-content) .video-container::before {
content: "📹 Video: " attr(data-video-title, "Embedded Video");
display: block;
padding: 1rem;
background-color: #f5f5f5;
border: 1px solid #ddd;
text-align: center;
font-style: italic;
color: #666;
font-weight: 600;
}
}

View File

@@ -1,62 +1,72 @@
/* PALETTE OPTION 1: BLUEPRINT & AMBER */
:root {
/* Light Theme Colors */
--color-bg: #fff;
--color-bg-secondary: #f8fafc;
--color-bg-tertiary: #e2e8f0;
--color-text: #1e293b;
--color-text-secondary: #64748b;
--color-border: #cbd5e1;
--color-primary: #2563eb;
--color-primary-hover: #1d4ed8;
--color-accent: #059669;
--color-accent-hover: #047857;
/* Light Theme */
--color-bg: #ffffff;
--color-bg-secondary: #f1f5f9; /* Slate 100 */
--color-bg-tertiary: #e2e8f0; /* Slate 200 */
--color-text: #0f172a; /* Slate 900 */
--color-text-secondary: #475569; /* Slate 600 */
--color-border: #cbd5e1; /* Slate 300 */
--color-primary: #334155; /* Slate 700 - A strong, serious primary */
--color-primary-hover: #1e293b; /* Slate 800 */
--color-accent: #b45309; /* A sharp, focused amber for highlights */
--color-accent-hover: #92400e;
--color-warning: #d97706;
--color-error: #dc2626;
/* Enhanced card type colors */
--color-hosted: #7c3aed;
--color-hosted-bg: #f3f0ff;
--color-oss: #059669;
--color-oss-bg: #ecfdf5;
--color-method: #0891b2;
--color-method-bg: #f0f9ff;
--color-concept: #ea580c;
--color-error: #be123c; /* A deeper, more serious red */
/* Card/Tag Category Colors */
--color-hosted: #4f46e5; /* Indigo */
--color-hosted-bg: #eef2ff;
--color-oss: #0d9488; /* Teal */
--color-oss-bg: #f0fdfa;
--color-method: #0891b2; /* Cyan */
--color-method-bg: #ecfeff;
--color-concept: #c2410c; /* Orange */
--color-concept-bg: #fff7ed;
/* Shadows */
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 5%);
--shadow-md: 0 4px 6px -1px rgb(0 0 0 / 10%);
--shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 10%);
/* Shadows (Crisper) */
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 6%);
--shadow-md: 0 3px 5px -1px rgb(0 0 0 / 8%);
--shadow-lg: 0 8px 12px -3px rgb(0 0 0 / 10%);
/* Transitions */
--transition-fast: all 0.2s ease;
--transition-medium: all 0.3s ease;
}
[data-theme="dark"] {
--color-bg: #0f172a;
--color-bg-secondary: #1e293b;
--color-bg-tertiary: #334155;
--color-text: #f1f5f9;
--color-text-secondary: #94a3b8;
--color-border: #475569;
--color-primary: #3b82f6;
--color-primary-hover: #60a5fa;
--color-accent: #10b981;
--color-accent-hover: #34d399;
/* Dark Theme */
--color-bg: #0f172a; /* Slate 900 */
--color-bg-secondary: #1e293b; /* Slate 800 */
--color-bg-tertiary: #334155; /* Slate 700 */
--color-text: #f1f5f9; /* Slate 100 */
--color-text-secondary: #94a3b8; /* Slate 400 */
--color-border: #475569; /* Slate 600 */
--color-primary: #64748b; /* Slate 500 */
--color-primary-hover: #94a3b8; /* Slate 400 */
--color-accent: #f59e0b; /* A brighter amber for dark mode contrast */
--color-accent-hover: #fbbf24;
--color-warning: #f59e0b;
--color-error: #f87171;
--color-hosted: #a855f7;
--color-hosted-bg: #2e1065;
--color-oss: #10b981;
--color-oss-bg: #064e3b;
--color-method: #0891b2;
--color-error: #f43f5e;
/* Card/Tag Category Colors */
--color-hosted: #818cf8; /* Indigo */
--color-hosted-bg: #3730a3;
--color-oss: #2dd4bf; /* Teal */
--color-oss-bg: #115e59;
--color-method: #22d3ee; /* Cyan */
--color-method-bg: #164e63;
--color-concept: #f97316;
--color-concept: #fb923c; /* Orange */
--color-concept-bg: #7c2d12;
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 30%);
--shadow-md: 0 4px 6px -1px rgb(0 0 0 / 40%);
--shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 50%);
/* Shadows (Subtler for dark mode) */
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 20%);
--shadow-md: 0 4px 6px -1px rgb(0 0 0 / 30%);
--shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 40%);
}

File diff suppressed because it is too large Load Diff

137
src/utils/aiService.ts Normal file
View File

@@ -0,0 +1,137 @@
// src/utils/aiService.ts
import 'dotenv/config';
export interface AIServiceConfig {
endpoint: string;
apiKey: string;
model: string;
}
export interface AICallOptions {
temperature?: number;
timeout?: number;
}
export interface AIResponse {
content: string;
usage?: {
promptTokens: number;
completionTokens: number;
totalTokens: number;
};
}
class AIService {
private config: AIServiceConfig;
private defaultOptions: AICallOptions;
constructor() {
this.config = {
endpoint: this.getRequiredEnv('AI_ANALYZER_ENDPOINT'),
apiKey: this.getRequiredEnv('AI_ANALYZER_API_KEY'),
model: this.getRequiredEnv('AI_ANALYZER_MODEL')
};
this.defaultOptions = {
temperature: 0.3,
timeout: 60000
};
console.log('[AI-SERVICE] Initialized with model:', this.config.model);
}
private getRequiredEnv(key: string): string {
const value = process.env[key];
if (!value) {
throw new Error(`Missing required environment variable: ${key}`);
}
return value;
}
async callAI(prompt: string, options: AICallOptions = {}): Promise<AIResponse> {
const mergedOptions = { ...this.defaultOptions, ...options };
console.log('[AI-SERVICE] Making API call:', {
promptLength: prompt.length,
temperature: mergedOptions.temperature
});
const headers: Record<string, string> = {
'Content-Type': 'application/json'
};
if (this.config.apiKey) {
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
}
const requestBody = {
model: this.config.model,
messages: [{ role: 'user', content: prompt }],
temperature: mergedOptions.temperature
};
try {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), mergedOptions.timeout);
const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
method: 'POST',
headers,
body: JSON.stringify(requestBody),
signal: controller.signal
});
clearTimeout(timeoutId);
if (!response.ok) {
const errorText = await response.text();
console.error('[AI-SERVICE] API Error:', response.status, errorText);
throw new Error(`AI API error: ${response.status} - ${errorText}`);
}
const data = await response.json();
const content = data.choices?.[0]?.message?.content;
if (!content) {
console.error('[AI-SERVICE] No response content from AI model');
throw new Error('No response from AI model');
}
console.log('[AI-SERVICE] API call successful:', {
responseLength: content.length,
usage: data.usage
});
return {
content: content.trim(),
usage: data.usage
};
} catch (error) {
if (error.name === 'AbortError') {
console.error('[AI-SERVICE] Request timeout');
throw new Error('AI request timeout');
}
console.error('[AI-SERVICE] API call failed:', error.message);
throw error;
}
}
async callMicroTaskAI(prompt: string): Promise<AIResponse> {
return this.callAI(prompt, {
temperature: 0.3,
timeout: 30000
});
}
estimateTokens(text: string): number {
return Math.ceil(text.length / 4);
}
getConfig(): AIServiceConfig {
return { ...this.config };
}
}
export const aiService = new AIService();

View File

@@ -83,26 +83,21 @@ export const apiServerError = {
};
export const apiSpecial = {
// JSON parsing error
invalidJSON: (): Response =>
apiError.badRequest('Invalid JSON in request body'),
// Missing required fields
missingRequired: (fields: string[]): Response =>
apiError.badRequest(`Missing required fields: ${fields.join(', ')}`),
// Empty request body
emptyBody: (): Response =>
apiError.badRequest('Request body cannot be empty'),
// File upload responses
uploadSuccess: (data: { url: string; filename: string; size: number; storage: string }): Response =>
apiResponse.created(data),
uploadFailed: (error: string): Response =>
apiServerError.internal(`Upload failed: ${error}`),
// Contribution responses
contributionSuccess: (data: { prUrl?: string; branchName?: string; message: string }): Response =>
apiResponse.created({ success: true, ...data }),
@@ -114,7 +109,6 @@ export const apiWithHeaders = {
successWithHeaders: (data: any, headers: Record<string, string>): Response =>
createAPIResponse(data, 200, headers),
// Redirect response
redirect: (location: string, temporary: boolean = true): Response =>
new Response(null, {
status: temporary ? 302 : 301,

File diff suppressed because it is too large Load Diff

View File

@@ -52,22 +52,17 @@ function getEnv(key: string): string {
export function getSessionFromRequest(request: Request): string | null {
const cookieHeader = request.headers.get('cookie');
console.log('[DEBUG] Cookie header:', cookieHeader ? 'present' : 'missing');
if (!cookieHeader) return null;
const cookies = parseCookie(cookieHeader);
console.log('[DEBUG] Parsed cookies:', Object.keys(cookies));
console.log('[DEBUG] Session cookie found:', !!cookies.session);
return cookies.session || null;
}
export async function verifySession(sessionToken: string): Promise<SessionData | null> {
try {
console.log('[DEBUG] Verifying session token, length:', sessionToken.length);
const { payload } = await jwtVerify(sessionToken, SECRET_KEY);
console.log('[DEBUG] JWT verification successful, payload keys:', Object.keys(payload));
if (
typeof payload.userId === 'string' &&
@@ -75,7 +70,6 @@ export async function verifySession(sessionToken: string): Promise<SessionData |
typeof payload.authenticated === 'boolean' &&
typeof payload.exp === 'number'
) {
console.log('[DEBUG] Session validation successful for user:', payload.userId);
return {
userId: payload.userId,
email: payload.email,
@@ -84,17 +78,14 @@ export async function verifySession(sessionToken: string): Promise<SessionData |
};
}
console.log('[DEBUG] Session payload validation failed, payload:', payload);
return null;
} catch (error) {
console.log('[DEBUG] Session verification failed:', error.message);
return null;
}
}
export async function createSession(userId: string, email: string): Promise<string> {
const exp = Math.floor(Date.now() / 1000) + SESSION_DURATION;
console.log('[DEBUG] Creating session for user:', userId, 'exp:', exp);
const token = await new SignJWT({
userId,
@@ -106,7 +97,6 @@ export async function createSession(userId: string, email: string): Promise<stri
.setExpirationTime(exp)
.sign(SECRET_KEY);
console.log('[DEBUG] Session token created, length:', token.length);
return token;
}
@@ -123,7 +113,6 @@ export function createSessionCookie(sessionToken: string): string {
path: '/'
});
console.log('[DEBUG] Created session cookie:', cookie.substring(0, 100) + '...');
return cookie;
}
@@ -292,8 +281,6 @@ export async function createSessionWithCookie(userInfo: UserInfo): Promise<{
export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'general'): Promise<AuthContext | Response> {
const authRequired = getAuthRequirement(context);
console.log(`[DEBUG PAGE] Auth required for ${context}:`, authRequired);
console.log('[DEBUG PAGE] Request URL:', Astro.url.toString());
if (!authRequired) {
return {
@@ -305,10 +292,8 @@ export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'g
}
const sessionToken = getSessionFromRequest(Astro.request);
console.log('[DEBUG PAGE] Session token found:', !!sessionToken);
if (!sessionToken) {
console.log('[DEBUG PAGE] No session token, redirecting to login');
const loginUrl = `/api/auth/login?returnTo=${encodeURIComponent(Astro.url.toString())}`;
return new Response(null, {
status: 302,
@@ -317,10 +302,8 @@ export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'g
}
const session = await verifySession(sessionToken);
console.log('[DEBUG PAGE] Session verification result:', !!session);
if (!session) {
console.log('[DEBUG PAGE] Session verification failed, redirecting to login');
const loginUrl = `/api/auth/login?returnTo=${encodeURIComponent(Astro.url.toString())}`;
return new Response(null, {
status: 302,
@@ -328,7 +311,6 @@ export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'g
});
}
console.log(`[DEBUG PAGE] Page authentication successful for ${context}:`, session.userId);
return {
authenticated: true,
session,
@@ -354,10 +336,8 @@ export async function withAPIAuth(request: Request, context: AuthContextType = '
}
const sessionToken = getSessionFromRequest(request);
console.log(`[DEBUG API] Session token found for ${context}:`, !!sessionToken);
if (!sessionToken) {
console.log(`[DEBUG API] No session token found for ${context}`);
return {
authenticated: false,
userId: '',
@@ -366,10 +346,8 @@ export async function withAPIAuth(request: Request, context: AuthContextType = '
}
const session = await verifySession(sessionToken);
console.log(`[DEBUG API] Session verification result for ${context}:`, !!session);
if (!session) {
console.log(`[DEBUG API] Session verification failed for ${context}`);
return {
authenticated: false,
userId: '',
@@ -377,7 +355,6 @@ export async function withAPIAuth(request: Request, context: AuthContextType = '
};
}
console.log(`[DEBUG API] Authentication successful for ${context}:`, session.userId);
return {
authenticated: true,
userId: session.userId,

View File

@@ -1,9 +1,8 @@
// src/utils/clientUtils.ts
export function createToolSlug(toolName: string): string {
if (!toolName || typeof toolName !== 'string') {
console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
console.warn('[CLIENT-UTILS] Invalid toolName provided to createToolSlug:', toolName);
return '';
}
@@ -30,6 +29,81 @@ export function isToolHosted(tool: any): boolean {
tool.projectUrl.trim() !== "";
}
export function sanitizeText(text: string): string {
if (typeof text !== 'string') return '';
return text
.replace(/^#{1,6}\s+/gm, '')
.replace(/^\s*[-*+]\s+/gm, '')
.replace(/^\s*\d+\.\s+/gm, '')
.replace(/\*\*(.+?)\*\*/g, '$1')
.replace(/\*(.+?)\*/g, '$1')
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
.replace(/```[\s\S]*?```/g, '[CODE BLOCK]')
.replace(/`([^`]+)`/g, '$1')
.replace(/<[^>]+>/g, '')
.replace(/\n\s*\n\s*\n/g, '\n\n')
.trim();
}
export function escapeHtml(text: string): string {
if (typeof text !== 'string') return String(text);
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
export function truncateText(text: string, maxLength: number): string {
if (!text || text.length <= maxLength) return text;
return text.slice(0, maxLength) + '...';
}
export function summarizeData(data: any): string {
if (data === null || data === undefined) return 'null';
if (typeof data === 'string') {
return data.length > 100 ? data.slice(0, 100) + '...' : data;
}
if (typeof data === 'number' || typeof data === 'boolean') {
return data.toString();
}
if (Array.isArray(data)) {
if (data.length === 0) return '[]';
if (data.length <= 3) return JSON.stringify(data);
return `[${data.slice(0, 3).map(i => typeof i === 'string' ? i : JSON.stringify(i)).join(', ')}, ...+${data.length - 3}]`;
}
if (typeof data === 'object') {
const keys = Object.keys(data);
if (keys.length === 0) return '{}';
if (keys.length <= 3) {
return '{' + keys.map(k => `${k}: ${typeof data[k] === 'string' ? data[k].slice(0, 20) + (data[k].length > 20 ? '...' : '') : JSON.stringify(data[k])}`).join(', ') + '}';
}
return `{${keys.slice(0, 3).join(', ')}, ...+${keys.length - 3} keys}`;
}
return String(data);
}
export function formatDuration(ms: number): string {
if (ms < 1000) return '< 1s';
if (ms < 60000) return `${Math.ceil(ms / 1000)}s`;
const minutes = Math.floor(ms / 60000);
const seconds = Math.ceil((ms % 60000) / 1000);
return seconds > 0 ? `${minutes}m ${seconds}s` : `${minutes}m`;
}
export function showElement(element: HTMLElement | null): void {
if (element) {
element.style.display = 'block';
element.classList.remove('hidden');
}
}
export function hideElement(element: HTMLElement | null): void {
if (element) {
element.style.display = 'none';
element.classList.add('hidden');
}
}
interface AutocompleteOptions {
minLength?: number;
maxResults?: number;
@@ -202,7 +276,7 @@ export class AutocompleteManager {
defaultRender(item: any): string {
const text = typeof item === 'string' ? item : item.name || item.label || item.toString();
return `<div class="autocomplete-item">${this.escapeHtml(text)}</div>`;
return `<div class="autocomplete-item">${escapeHtml(text)}</div>`;
}
renderDropdown(): void {
@@ -284,8 +358,8 @@ export class AutocompleteManager {
align-items: center;
gap: 0.25rem;
">
${this.escapeHtml(item)}
<button type="button" class="autocomplete-remove" data-item="${this.escapeHtml(item)}" style="
${escapeHtml(item)}
<button type="button" class="autocomplete-remove" data-item="${escapeHtml(item)}" style="
background: none;
border: none;
color: white;
@@ -327,12 +401,6 @@ export class AutocompleteManager {
this.selectedIndex = -1;
}
escapeHtml(text: string): string {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
setDataSource(newDataSource: any[]): void {
this.dataSource = newDataSource;
}

View File

@@ -0,0 +1,225 @@
// src/utils/confidenceScoring.ts
import { isToolHosted } from './clientUtils.js';
import 'dotenv/config';
export interface ConfidenceMetrics {
overall: number;
semanticRelevance: number;
taskSuitability: number;
uncertaintyFactors: string[];
strengthIndicators: string[];
}
export interface ConfidenceConfig {
semanticWeight: number;
suitabilityWeight: number;
minimumThreshold: number;
mediumThreshold: number;
highThreshold: number;
}
export interface AnalysisContext {
userQuery: string;
mode: string;
embeddingsSimilarities: Map<string, number>;
selectedTools?: Array<{
tool: any;
phase: string;
priority: string;
justification?: string;
taskRelevance?: number;
limitations?: string[];
}>;
}
class ConfidenceScoring {
private config: ConfidenceConfig;
constructor() {
this.config = {
semanticWeight: this.getEnvFloat('CONFIDENCE_SEMANTIC_WEIGHT', 0.3),
suitabilityWeight: this.getEnvFloat('CONFIDENCE_SUITABILITY_WEIGHT', 0.7),
minimumThreshold: this.getEnvInt('CONFIDENCE_MINIMUM_THRESHOLD', 40),
mediumThreshold: this.getEnvInt('CONFIDENCE_MEDIUM_THRESHOLD', 60),
highThreshold: this.getEnvInt('CONFIDENCE_HIGH_THRESHOLD', 80)
};
console.log('[CONFIDENCE-SCORING] Initialized with restored config:', this.config);
}
private getEnvFloat(key: string, defaultValue: number): number {
const value = process.env[key];
return value ? parseFloat(value) : defaultValue;
}
private getEnvInt(key: string, defaultValue: number): number {
const value = process.env[key];
return value ? parseInt(value, 10) : defaultValue;
}
calculateRecommendationConfidence(
tool: any,
context: AnalysisContext,
taskRelevance: number = 70,
limitations: string[] = []
): ConfidenceMetrics {
console.log('[CONFIDENCE-SCORING] Calculating confidence for tool:', tool.name);
const rawSemanticRelevance = context.embeddingsSimilarities.has(tool.name) ?
context.embeddingsSimilarities.get(tool.name)! * 100 : 50;
let enhancedTaskSuitability = taskRelevance;
if (context.mode === 'workflow') {
const toolSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name);
if (toolSelection && tool.phases && Array.isArray(tool.phases) && tool.phases.includes(toolSelection.phase)) {
const phaseBonus = Math.min(15, 100 - taskRelevance);
enhancedTaskSuitability = Math.min(100, taskRelevance + phaseBonus);
console.log('[CONFIDENCE-SCORING] Phase alignment bonus applied:', phaseBonus);
}
}
const overall = (
rawSemanticRelevance * this.config.semanticWeight +
enhancedTaskSuitability * this.config.suitabilityWeight
);
const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, limitations, overall);
const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall);
const result = {
overall: Math.round(overall),
semanticRelevance: Math.round(rawSemanticRelevance),
taskSuitability: Math.round(enhancedTaskSuitability),
uncertaintyFactors,
strengthIndicators
};
console.log('[CONFIDENCE-SCORING] Confidence calculated:', {
tool: tool.name,
overall: result.overall,
semantic: result.semanticRelevance,
task: result.taskSuitability,
uncertaintyCount: uncertaintyFactors.length,
strengthCount: strengthIndicators.length
});
return result;
}
private identifyUncertaintyFactors(
tool: any,
context: AnalysisContext,
limitations: string[],
confidence: number
): string[] {
const factors: string[] = [];
if (limitations?.length > 0) {
factors.push(...limitations.slice(0, 2));
}
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
if (similarity < 0.7) {
factors.push('Geringe semantische Ähnlichkeit zur Anfrage');
}
if (tool.skillLevel === 'expert' && /schnell|rapid|triage|urgent|sofort/i.test(context.userQuery)) {
factors.push('Experten-Tool für zeitkritisches Szenario');
}
if (tool.skillLevel === 'novice' && /komplex|erweitert|tiefgehend|advanced|forensisch/i.test(context.userQuery)) {
factors.push('Einsteiger-Tool für komplexe Analyse');
}
if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') {
factors.push('Installation und Setup erforderlich');
}
if (tool.license === 'Proprietary') {
factors.push('Kommerzielle Software - Lizenzkosten zu beachten');
}
if (confidence < 60) {
factors.push('Moderate Gesamtbewertung - alternative Ansätze empfohlen');
}
return factors.slice(0, 4);
}
private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
const indicators: string[] = [];
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
if (similarity >= 0.7) {
indicators.push('Sehr gute semantische Übereinstimmung mit Ihrer Anfrage');
}
if (tool.knowledgebase === true) {
indicators.push('Umfassende Dokumentation und Wissensbasis verfügbar');
}
if (isToolHosted(tool)) {
indicators.push('Sofort verfügbar über gehostete Lösung');
}
if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') {
indicators.push('Ausgewogenes Verhältnis zwischen Funktionalität und Benutzerfreundlichkeit');
}
if (tool.type === 'method' && /methodik|vorgehen|prozess|ansatz/i.test(context.userQuery)) {
indicators.push('Methodischer Ansatz passt zu Ihrer prozeduralen Anfrage');
}
return indicators.slice(0, 4);
}
calculateSelectionConfidence(result: any, candidateCount: number): number {
if (!result?.selectedTools) {
console.log('[CONFIDENCE-SCORING] No selected tools for confidence calculation');
return 30;
}
const selectionRatio = result.selectedTools.length / candidateCount;
const hasReasoning = result.reasoning && result.reasoning.length > 50;
let confidence = 60;
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
else if (selectionRatio <= 0.05) confidence -= 10;
else confidence -= 15;
if (hasReasoning) confidence += 15;
if (result.selectedConcepts?.length > 0) confidence += 5;
const finalConfidence = Math.min(95, Math.max(25, confidence));
console.log('[CONFIDENCE-SCORING] Selection confidence calculated:', {
candidateCount,
selectedCount: result.selectedTools.length,
selectionRatio: selectionRatio.toFixed(3),
hasReasoning,
confidence: finalConfidence
});
return finalConfidence;
}
getConfidenceLevel(confidence: number): 'weak' | 'moderate' | 'strong' {
if (confidence >= this.config.highThreshold) return 'strong';
if (confidence >= this.config.mediumThreshold) return 'moderate';
return 'weak';
}
getConfidenceColor(confidence: number): string {
if (confidence >= this.config.highThreshold) return 'var(--color-accent)';
if (confidence >= this.config.mediumThreshold) return 'var(--color-warning)';
return 'var(--color-error)';
}
getConfig(): ConfidenceConfig {
return { ...this.config };
}
}
export const confidenceScoring = new ConfidenceScoring();

View File

@@ -1,4 +1,4 @@
// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
// src/utils/dataService.ts
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
@@ -85,7 +85,7 @@ let cachedData: ToolsData | null = null;
let cachedRandomizedData: ToolsData | null = null;
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
let lastRandomizationDate: string | null = null;
let dataVersion: string | null = null;
let cachedToolsHash: string | null = null;
function seededRandom(seed: number): () => number {
let x = Math.sin(seed) * 10000;
@@ -110,17 +110,6 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
return shuffled;
}
function generateDataVersion(data: any): string {
const str = JSON.stringify(data, Object.keys(data).sort());
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash;
}
return Math.abs(hash).toString(36);
}
async function loadRawData(): Promise<ToolsData> {
if (!cachedData) {
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
@@ -142,8 +131,9 @@ async function loadRawData(): Promise<ToolsData> {
};
}
dataVersion = generateDataVersion(cachedData);
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
const { getToolsFileHash } = await import('./hashUtils.js');
cachedToolsHash = await getToolsFileHash();
console.log(`[DATA SERVICE] Loaded data with hash: ${cachedToolsHash.slice(0, 12)}...`);
} catch (error) {
if (error instanceof z.ZodError) {
@@ -234,7 +224,7 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
}
export function getDataVersion(): string | null {
return dataVersion;
return cachedToolsHash;
}
export function clearCache(): void {
@@ -242,7 +232,7 @@ export function clearCache(): void {
cachedRandomizedData = null;
cachedCompressedData = null;
lastRandomizationDate = null;
dataVersion = null;
cachedToolsHash = null;
console.log('[DATA SERVICE] Enhanced cache cleared');
}

View File

@@ -1,11 +1,11 @@
// src/utils/embeddings.ts
// src/utils/embeddings.ts - Refactored
import { promises as fs } from 'fs';
import path from 'path';
import { getCompressedToolsDataForAI } from './dataService.js';
import 'dotenv/config';
import crypto from 'crypto';
interface EmbeddingData {
export interface EmbeddingData {
id: string;
type: 'tool' | 'concept';
name: string;
@@ -20,14 +20,22 @@ interface EmbeddingData {
};
}
export interface SimilarityResult extends EmbeddingData {
similarity: number;
}
interface EmbeddingsDatabase {
version: string;
lastUpdated: number;
embeddings: EmbeddingData[];
}
interface SimilarityResult extends EmbeddingData {
similarity: number;
interface EmbeddingsConfig {
endpoint?: string;
apiKey?: string;
model?: string;
batchSize: number;
batchDelay: number;
}
class EmbeddingsService {
@@ -35,48 +43,30 @@ class EmbeddingsService {
private isInitialized = false;
private initializationPromise: Promise<void> | null = null;
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
private readonly batchSize: number;
private readonly batchDelay: number;
private enabled: boolean = false;
private config: EmbeddingsConfig;
constructor() {
this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
this.enabled = true;
this.config = this.loadConfig();
console.log('[EMBEDDINGS-SERVICE] Initialized:', {
hasEndpoint: !!this.config.endpoint,
hasModel: !!this.config.model
});
}
private async checkEnabledStatus(): Promise<void> {
try {
const envEnabled = process.env.AI_EMBEDDINGS_ENABLED;
if (envEnabled === 'true') {
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
const model = process.env.AI_EMBEDDINGS_MODEL;
if (!endpoint || !model) {
console.warn('[EMBEDDINGS] Embeddings enabled but API configuration missing - disabling');
this.enabled = false;
return;
}
console.log('[EMBEDDINGS] All requirements met - enabling embeddings');
this.enabled = true;
return;
}
try {
await fs.stat(this.embeddingsPath);
console.log('[EMBEDDINGS] Existing embeddings file found - enabling');
this.enabled = true;
} catch {
console.log('[EMBEDDINGS] Embeddings not explicitly enabled - disabling');
this.enabled = false;
}
} catch (error) {
console.error('[EMBEDDINGS] Error checking enabled status:', error);
this.enabled = false;
}
private loadConfig(): EmbeddingsConfig {
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
const model = process.env.AI_EMBEDDINGS_MODEL;
const batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
const batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
return {
endpoint,
apiKey,
model,
batchSize,
batchDelay
};
}
async initialize(): Promise<void> {
@@ -93,63 +83,55 @@ class EmbeddingsService {
}
private async performInitialization(): Promise<void> {
await this.checkEnabledStatus();
if (!this.enabled) {
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
return;
}
const initStart = Date.now();
try {
console.log('[EMBEDDINGS] Initializing embeddings system…');
console.log('[EMBEDDINGS-SERVICE] Starting initialization');
/*if (!this.config.enabled) {
console.log('[EMBEDDINGS-SERVICE] Service disabled via configuration');
return;
}*/
await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
const toolsData = await getCompressedToolsDataForAI();
const currentDataHash = await this.hashToolsFile();
const toolsData = await getCompressedToolsDataForAI();
const { getToolsFileHash } = await import('./hashUtils.js');
const currentDataHash = await getToolsFileHash();
const existing = await this.loadEmbeddings();
console.log('[EMBEDDINGS] Current hash:', currentDataHash);
console.log('[EMBEDDINGS] Existing file version:', existing?.version);
console.log('[EMBEDDINGS] Existing embeddings length:', existing?.embeddings?.length);
const cacheIsUsable =
existing &&
const existing = await this.loadEmbeddings();
const cacheIsUsable = existing &&
existing.version === currentDataHash &&
Array.isArray(existing.embeddings) &&
existing.embeddings.length > 0;
if (cacheIsUsable) {
console.log('[EMBEDDINGS] Using cached embeddings');
this.embeddings = existing.embeddings;
console.log('[EMBEDDINGS-SERVICE] Using cached embeddings');
this.embeddings = existing.embeddings;
} else {
console.log('[EMBEDDINGS] Generating new embeddings');
console.log('[EMBEDDINGS-SERVICE] Generating new embeddings');
await this.generateEmbeddings(toolsData, currentDataHash);
}
this.isInitialized = true;
console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings in ${Date.now() - initStart} ms`);
} catch (err) {
console.error('[EMBEDDINGS] Failed to initialize:', err);
console.log(`[EMBEDDINGS-SERVICE] Initialized successfully with ${this.embeddings.length} embeddings in ${Date.now() - initStart}ms`);
} catch (error) {
console.error('[EMBEDDINGS-SERVICE] Initialization failed:', error);
this.isInitialized = false;
throw err;
throw error;
} finally {
this.initializationPromise = null;
}
}
private async hashToolsFile(): Promise<string> {
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
const raw = await fs.readFile(file, 'utf8');
return crypto.createHash('sha256').update(raw).digest('hex');
}
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
try {
const data = await fs.readFile(this.embeddingsPath, 'utf8');
return JSON.parse(data);
} catch (error) {
console.log('[EMBEDDINGS] No existing embeddings found');
console.log('[EMBEDDINGS-SERVICE] No existing embeddings file found');
return null;
}
}
@@ -162,7 +144,7 @@ class EmbeddingsService {
};
await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
console.log(`[EMBEDDINGS-SERVICE] Saved ${this.embeddings.length} embeddings to disk`);
}
private createContentString(item: any): string {
@@ -178,30 +160,23 @@ class EmbeddingsService {
}
private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
const model = process.env.AI_EMBEDDINGS_MODEL;
if (!endpoint || !model) {
const missing: string[] = [];
if (!endpoint) missing.push('AI_EMBEDDINGS_ENDPOINT');
if (!model) missing.push('AI_EMBEDDINGS_MODEL');
throw new Error(`Missing embeddings API configuration: ${missing.join(', ')}`);
if (!this.config.endpoint || !this.config.model) {
throw new Error('Missing embeddings API configuration');
}
const headers: Record<string, string> = {
'Content-Type': 'application/json'
};
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`;
if (this.config.apiKey) {
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
}
const response = await fetch(endpoint, {
const response = await fetch(this.config.endpoint, {
method: 'POST',
headers,
body: JSON.stringify({
model,
model: this.config.model,
input: contents
})
});
@@ -233,11 +208,16 @@ class EmbeddingsService {
const contents = allItems.map(item => this.createContentString(item));
this.embeddings = [];
for (let i = 0; i < contents.length; i += this.batchSize) {
const batch = contents.slice(i, i + this.batchSize);
const batchItems = allItems.slice(i, i + this.batchSize);
console.log(`[EMBEDDINGS-SERVICE] Generating embeddings for ${contents.length} items`);
for (let i = 0; i < contents.length; i += this.config.batchSize) {
const batch = contents.slice(i, i + this.config.batchSize);
const batchItems = allItems.slice(i, i + this.config.batchSize);
console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
const batchNumber = Math.ceil((i + 1) / this.config.batchSize);
const totalBatches = Math.ceil(contents.length / this.config.batchSize);
console.log(`[EMBEDDINGS-SERVICE] Processing batch ${batchNumber}/${totalBatches}`);
try {
const embeddings = await this.generateEmbeddingsBatch(batch);
@@ -260,12 +240,12 @@ class EmbeddingsService {
});
});
if (i + this.batchSize < contents.length) {
await new Promise(resolve => setTimeout(resolve, this.batchDelay));
if (i + this.config.batchSize < contents.length) {
await new Promise(resolve => setTimeout(resolve, this.config.batchDelay));
}
} catch (error) {
console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
console.error(`[EMBEDDINGS-SERVICE] Batch ${batchNumber} failed:`, error);
throw error;
}
}
@@ -273,18 +253,17 @@ class EmbeddingsService {
await this.saveEmbeddings(version);
}
public async embedText(text: string): Promise<number[]> {
if (!this.enabled || !this.isInitialized) {
async embedText(text: string): Promise<number[]> {
if (!this.isInitialized) {
throw new Error('Embeddings service not available');
}
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
return embedding;
}
async waitForInitialization(): Promise<void> {
await this.checkEnabledStatus();
if (!this.enabled || this.isInitialized) {
if (this.isInitialized) {
return Promise.resolve();
}
@@ -296,13 +275,6 @@ class EmbeddingsService {
return this.initialize();
}
async forceRecheckEnvironment(): Promise<void> {
this.enabled = false;
this.isInitialized = false;
await this.checkEnabledStatus();
console.log('[EMBEDDINGS] Environment status re-checked, enabled:', this.enabled);
}
private cosineSimilarity(a: number[], b: number[]): number {
let dotProduct = 0;
let normA = 0;
@@ -318,145 +290,62 @@ class EmbeddingsService {
}
async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<SimilarityResult[]> {
if (!this.enabled) {
console.log('[EMBEDDINGS] Service disabled for similarity search');
/*if (!this.config.enabled) {
console.log('[EMBEDDINGS-SERVICE] Service disabled, returning empty results');
return [];
}*/
if (!this.isInitialized || this.embeddings.length === 0) {
console.log('[EMBEDDINGS-SERVICE] Not initialized or no embeddings available');
return [];
}
try {
if (this.isInitialized && this.embeddings.length > 0) {
console.log(`[EMBEDDINGS] Using embeddings data for similarity search: ${query}`);
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
const queryEmbedding = queryEmbeddings[0];
console.log(`[EMBEDDINGS-SERVICE] Finding similar items for query: "${query}"`);
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
const queryEmbedding = queryEmbeddings[0];
console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
...item,
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
}));
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
...item,
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
}));
const topScore = Math.max(...similarities.map(s => s.similarity));
const dynamicThreshold = Math.max(threshold, topScore * 0.85);
const topScore = Math.max(...similarities.map(s => s.similarity));
const dynamicCutOff = Math.max(threshold, topScore * 0.85);
const results = similarities
.filter(item => item.similarity >= dynamicThreshold)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, maxResults);
const results = similarities
.filter(item => item.similarity >= dynamicCutOff)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, maxResults);
const orderingValid = results.every((item, index) => {
if (index === 0) return true;
return item.similarity <= results[index - 1].similarity;
console.log(`[EMBEDDINGS-SERVICE] Found ${results.length} similar items (threshold: ${dynamicThreshold.toFixed(3)})`);
if (results.length > 0) {
console.log('[EMBEDDINGS-SERVICE] Top 5 matches:');
results.slice(0, 5).forEach((item, idx) => {
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
});
if (!orderingValid) {
console.error('[EMBEDDINGS] CRITICAL: Similarity ordering is broken!');
results.forEach((item, idx) => {
console.error(` ${idx}: ${item.name} = ${item.similarity.toFixed(4)}`);
});
}
console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
if (results.length > 0) {
console.log('[EMBEDDINGS] Top 10 similarity matches:');
results.slice(0, 10).forEach((item, idx) => {
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
});
const topSimilarity = results[0].similarity;
const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
if (hasHigherSimilarity) {
console.error('[EMBEDDINGS] CRITICAL: Top result is not actually the highest similarity!');
}
}
return results;
} else {
console.log(`[EMBEDDINGS] No embeddings data, using fallback text matching: ${query}`);
const { getToolsData } = await import('./dataService.js');
const toolsData = await getToolsData();
const queryLower = query.toLowerCase();
const queryWords = queryLower.split(/\s+/).filter(w => w.length > 2);
const similarities: SimilarityResult[] = toolsData.tools
.map((tool: any) => {
let similarity = 0;
if (tool.name.toLowerCase().includes(queryLower)) {
similarity += 0.8;
}
if (tool.description && tool.description.toLowerCase().includes(queryLower)) {
similarity += 0.6;
}
if (tool.tags && Array.isArray(tool.tags)) {
const matchingTags = tool.tags.filter((tag: string) =>
tag.toLowerCase().includes(queryLower) || queryLower.includes(tag.toLowerCase())
);
if (tool.tags.length > 0) {
similarity += (matchingTags.length / tool.tags.length) * 0.4;
}
}
const toolText = `${tool.name} ${tool.description || ''} ${(tool.tags || []).join(' ')}`.toLowerCase();
const matchingWords = queryWords.filter(word => toolText.includes(word));
if (queryWords.length > 0) {
similarity += (matchingWords.length / queryWords.length) * 0.3;
}
return {
id: `tool_${tool.name.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase()}`,
type: 'tool' as const,
name: tool.name,
content: toolText,
embedding: [],
metadata: {
domains: tool.domains || [],
phases: tool.phases || [],
tags: tool.tags || [],
skillLevel: tool.skillLevel,
type: tool.type
},
similarity: Math.min(similarity, 1.0)
};
})
.filter(item => item.similarity >= threshold)
.sort((a, b) => b.similarity - a.similarity)
.slice(0, maxResults);
console.log(`[EMBEDDINGS] Fallback found ${similarities.length} similar items`);
return similarities;
}
return results;
} catch (error) {
console.error('[EMBEDDINGS] Failed to find similar items:', error);
console.error('[EMBEDDINGS-SERVICE] Similarity search failed:', error);
return [];
}
}
isEnabled(): boolean {
if (!this.enabled && !this.isInitialized) {
this.checkEnabledStatus().catch(console.error);
}
return this.enabled;
}
getStats(): { enabled: boolean; initialized: boolean; count: number } {
getStats(): {initialized: boolean; count: number } {
return {
enabled: this.enabled,
initialized: this.isInitialized,
count: this.embeddings.length
};
}
getConfig(): EmbeddingsConfig {
return { ...this.config };
}
}
const embeddingsService = new EmbeddingsService();
export { embeddingsService, type EmbeddingData, type SimilarityResult };
export const embeddingsService = new EmbeddingsService();

20
src/utils/hashUtils.ts Normal file
View File

@@ -0,0 +1,20 @@
// src/utils/hashUtils.ts
import { promises as fs } from 'fs';
import path from 'path';
import crypto from 'crypto';
export async function getToolsFileHash(): Promise<string> {
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
const raw = await fs.readFile(file, 'utf8');
return crypto.createHash('sha256').update(raw).digest('hex');
}
export function getToolsFileHashSync(): string | null {
try {
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
const raw = require('fs').readFileSync(file, 'utf8');
return crypto.createHash('sha256').update(raw).digest('hex');
} catch {
return null;
}
}

356
src/utils/jsonUtils.ts Normal file
View File

@@ -0,0 +1,356 @@
// src/utils/jsonUtils.ts
export class JSONParser {
static safeParseJSON(jsonString: string, fallback: any = null): any {
try {
let cleaned = jsonString.trim();
const jsonBlockPatterns = [
/```json\s*([\s\S]*?)\s*```/i,
/```\s*([\s\S]*?)\s*```/i,
/\{[\s\S]*\}/,
];
for (const pattern of jsonBlockPatterns) {
const match = cleaned.match(pattern);
if (match) {
cleaned = match[1] || match[0];
break;
}
}
if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
console.warn('[JSON-PARSER] JSON appears truncated, attempting recovery');
cleaned = this.repairTruncatedJSON(cleaned);
}
const parsed = JSON.parse(cleaned);
if (parsed && typeof parsed === 'object') {
if (!parsed.selectedTools) parsed.selectedTools = [];
if (!parsed.selectedConcepts) parsed.selectedConcepts = [];
if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
}
return parsed;
} catch (error) {
console.warn('[JSON-PARSER] JSON parsing failed:', error.message);
return fallback;
}
}
private static repairTruncatedJSON(cleaned: string): string {
let braceCount = 0;
let bracketCount = 0;
let inString = false;
let escaped = false;
let lastCompleteStructure = '';
for (let i = 0; i < cleaned.length; i++) {
const char = cleaned[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\') {
escaped = true;
continue;
}
if (char === '"' && !escaped) {
inString = !inString;
continue;
}
if (!inString) {
if (char === '{') braceCount++;
if (char === '}') braceCount--;
if (char === '[') bracketCount++;
if (char === ']') bracketCount--;
if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
lastCompleteStructure = cleaned.substring(0, i + 1);
}
}
}
if (lastCompleteStructure) {
return lastCompleteStructure;
} else {
if (braceCount > 0) cleaned += '}';
if (bracketCount > 0) cleaned += ']';
return cleaned;
}
}
static extractToolsFromMalformedJSON(jsonString: string): { selectedTools: string[]; selectedConcepts: string[] } {
const selectedTools: string[] = [];
const selectedConcepts: string[] = [];
const toolsMatch = jsonString.match(/"selectedTools"\s*:\s*\[([\s\S]*?)\]/i);
if (toolsMatch) {
const toolMatches = toolsMatch[1].match(/"([^"]+)"/g);
if (toolMatches) {
selectedTools.push(...toolMatches.map(match => match.replace(/"/g, '')));
}
}
const conceptsMatch = jsonString.match(/"selectedConcepts"\s*:\s*\[([\s\S]*?)\]/i);
if (conceptsMatch) {
const conceptMatches = conceptsMatch[1].match(/"([^"]+)"/g);
if (conceptMatches) {
selectedConcepts.push(...conceptMatches.map(match => match.replace(/"/g, '')));
}
}
if (selectedTools.length === 0 && selectedConcepts.length === 0) {
const allMatches = jsonString.match(/"([^"]+)"/g);
if (allMatches) {
const possibleNames = allMatches
.map(match => match.replace(/"/g, ''))
.filter(name =>
name.length > 2 &&
!['selectedTools', 'selectedConcepts', 'reasoning'].includes(name) &&
!name.includes(':') &&
!name.match(/^\d+$/)
)
.slice(0, 15);
selectedTools.push(...possibleNames);
}
}
return { selectedTools, selectedConcepts };
}
static secureParseJSON(jsonString: string, maxSize: number = 10 * 1024 * 1024): any {
if (typeof jsonString !== 'string') {
throw new Error('Input must be a string');
}
if (jsonString.length > maxSize) {
throw new Error(`JSON string too large (${jsonString.length} bytes, max ${maxSize})`);
}
const suspiciousPatterns = [
/<script/i,
/javascript:/i,
/eval\(/i,
/function\s*\(/i,
/__proto__/i,
/constructor/i
];
for (const pattern of suspiciousPatterns) {
if (pattern.test(jsonString)) {
throw new Error('Potentially malicious content detected in JSON');
}
}
try {
const parsed = JSON.parse(jsonString);
if (typeof parsed !== 'object' || parsed === null) {
throw new Error('JSON must be an object');
}
return parsed;
} catch (error) {
if (error instanceof SyntaxError) {
throw new Error(`Invalid JSON syntax: ${error.message}`);
}
throw error;
}
}
static sanitizeForAudit(obj: any, maxDepth: number = 5, currentDepth: number = 0): any {
if (currentDepth >= maxDepth) {
return '[Max depth reached]';
}
if (obj === null || obj === undefined) {
return obj;
}
if (typeof obj === 'string') {
if (obj.length > 500) {
return obj.slice(0, 500) + '...[truncated]';
}
return obj.replace(/<script[\s\S]*?<\/script>/gi, '[script removed]');
}
if (typeof obj === 'number' || typeof obj === 'boolean') {
return obj;
}
if (Array.isArray(obj)) {
if (obj.length > 20) {
return [
...obj.slice(0, 20).map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1)),
`...[${obj.length - 20} more items]`
];
}
return obj.map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1));
}
if (typeof obj === 'object') {
const keys = Object.keys(obj);
if (keys.length > 50) {
const sanitized: any = {};
keys.slice(0, 50).forEach(key => {
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
});
sanitized['[truncated]'] = `${keys.length - 50} more properties`;
return sanitized;
}
const sanitized: any = {};
keys.forEach(key => {
if (['__proto__', 'constructor', 'prototype'].includes(key)) {
return;
}
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
});
return sanitized;
}
return String(obj);
}
static validateAuditExportStructure(data: any): { isValid: boolean; errors: string[] } {
const errors: string[] = [];
if (!data || typeof data !== 'object') {
errors.push('Export data must be an object');
return { isValid: false, errors };
}
const requiredProps = ['metadata', 'recommendation', 'auditTrail'];
for (const prop of requiredProps) {
if (!(prop in data)) {
errors.push(`Missing required property: ${prop}`);
}
}
if (data.metadata && typeof data.metadata === 'object') {
const requiredMetadataProps = ['timestamp', 'version', 'userQuery', 'mode'];
for (const prop of requiredMetadataProps) {
if (!(prop in data.metadata)) {
errors.push(`Missing required metadata property: ${prop}`);
}
}
} else {
errors.push('Invalid metadata structure');
}
if (!Array.isArray(data.auditTrail)) {
errors.push('auditTrail must be an array');
} else {
data.auditTrail.forEach((entry: any, index: number) => {
if (!entry || typeof entry !== 'object') {
errors.push(`Audit entry ${index} is not a valid object`);
return;
}
const requiredEntryProps = ['timestamp', 'phase', 'action', 'confidence', 'processingTimeMs'];
for (const prop of requiredEntryProps) {
if (!(prop in entry)) {
errors.push(`Audit entry ${index} missing required property: ${prop}`);
}
}
});
}
return {
isValid: errors.length === 0,
errors
};
}
static prepareAuditExport(
recommendation: any,
userQuery: string,
mode: string,
auditTrail: any[] = [],
additionalMetadata: any = {}
): any {
return {
metadata: {
timestamp: new Date().toISOString(),
version: "1.0",
userQuery: userQuery.slice(0, 1000),
mode,
exportedBy: 'ForensicPathways',
toolsDataHash: additionalMetadata.toolsDataHash || 'unknown',
aiModel: additionalMetadata.aiModel || 'unknown',
aiParameters: additionalMetadata.aiParameters || {},
processingStats: additionalMetadata.processingStats || {}
},
recommendation: this.sanitizeForAudit(recommendation, 6),
auditTrail: auditTrail.map(entry => this.sanitizeForAudit(entry, 4)),
rawContext: {
selectedTools: additionalMetadata.selectedTools || [],
backgroundKnowledge: additionalMetadata.backgroundKnowledge || [],
contextHistory: additionalMetadata.contextHistory || [],
embeddingsSimilarities: additionalMetadata.embeddingsSimilarities || {}
}
};
}
static validateUploadedAnalysis(data: any): { isValid: boolean; issues: string[]; warnings: string[] } {
const issues: string[] = [];
const warnings: string[] = [];
const structureValidation = this.validateAuditExportStructure(data);
if (!structureValidation.isValid) {
issues.push(...structureValidation.errors);
return { isValid: false, issues, warnings };
}
if (data.metadata) {
const timestamp = new Date(data.metadata.timestamp);
if (isNaN(timestamp.getTime())) {
warnings.push('Invalid timestamp in metadata');
} else {
const age = Date.now() - timestamp.getTime();
const maxAge = 30 * 24 * 60 * 60 * 1000; // 30 days
if (age > maxAge) {
warnings.push(`Analysis is ${Math.floor(age / (24 * 60 * 60 * 1000))} days old`);
}
}
if (!['workflow', 'tool'].includes(data.metadata.mode)) {
warnings.push(`Unknown analysis mode: ${data.metadata.mode}`);
}
}
if (Array.isArray(data.auditTrail)) {
const aiDecisions = data.auditTrail.filter(e => e.action === 'ai-decision').length;
const toolSelections = data.auditTrail.filter(e => e.action === 'selection-decision').length;
if (aiDecisions === 0) {
warnings.push('No AI decisions found in audit trail');
}
if (toolSelections === 0) {
warnings.push('No tool selections found in audit trail');
}
const entriesWithConfidence = data.auditTrail.filter(e => typeof e.confidence === 'number').length;
const confidenceRatio = entriesWithConfidence / data.auditTrail.length;
if (confidenceRatio < 0.8) {
warnings.push(`Only ${Math.round(confidenceRatio * 100)}% of audit entries have confidence scores`);
}
}
return {
isValid: issues.length === 0,
issues,
warnings
};
}
}

View File

@@ -1,5 +1,4 @@
// src/utils/nextcloud.ts
import { promises as fs } from 'fs';
import path from 'path';
import crypto from 'crypto';

View File

@@ -1,5 +1,4 @@
// src/utils/rateLimitedQueue.ts
import dotenv from "dotenv";
dotenv.config();

View File

@@ -3,37 +3,44 @@ import { visit } from 'unist-util-visit';
import type { Plugin } from 'unified';
import type { Root } from 'hast';
function escapeHtml(unsafe: string): string {
if (typeof unsafe !== 'string') return '';
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
export const remarkVideoPlugin: Plugin<[], Root> = () => {
return (tree: Root) => {
visit(tree, 'html', (node: any, index: number | undefined, parent: any) => {
if (node.value && node.value.includes('<video') && typeof index === 'number') {
const srcMatch = node.value.match(/src=["']([^"']+)["']/);
const titleMatch = node.value.match(/title=["']([^"']+)["']/);
if (srcMatch) {
const originalSrc = srcMatch[1];
const title = titleMatch?.[1] || 'Video';
const hasControls = node.value.includes('controls');
const hasAutoplay = node.value.includes('autoplay');
const hasMuted = node.value.includes('muted');
const hasLoop = node.value.includes('loop');
const hasPreload = node.value.match(/preload=["']([^"']+)["']/);
const preloadMatch = node.value.match(/preload=["']([^"']+)["']/);
const enhancedHTML = `
<div class="video-container aspect-16-9">
<div class="video-container">
<video
src="${escapeHtml(originalSrc)}"
${hasControls ? 'controls' : ''}
${hasAutoplay ? 'autoplay' : ''}
${hasMuted ? 'muted' : ''}
${hasLoop ? 'loop' : ''}
${hasPreload ? `preload="${hasPreload[1]}"` : 'preload="metadata"'}
style="width: 100%; height: 100%;"
${preloadMatch ? `preload="${preloadMatch[1]}"` : 'preload="metadata"'}
data-video-title="${escapeHtml(title)}"
data-original-src="${escapeHtml(originalSrc)}"
>
<p>Your browser does not support the video element.</p>
</video>
@@ -46,23 +53,31 @@ export const remarkVideoPlugin: Plugin<[], Root> = () => {
`.trim();
parent.children[index] = { type: 'html', value: enhancedHTML };
console.log(`[VIDEO] Processed: ${title}`);
console.log(`[VIDEO] Final URL: ${originalSrc}`);
console.log(`[VIDEO] Enhanced: ${title} (${originalSrc})`);
}
}
if (node.value && node.value.includes('<iframe') && typeof index === 'number' && parent) {
if (node.value.includes('video-container')) {
return;
}
const titleMatch = node.value.match(/title=["']([^"']+)["']/);
const title = titleMatch?.[1] || 'Embedded Video';
const enhancedHTML = `
<div class="video-container">
${node.value}
</div>
<div class="video-metadata">
<div class="video-title">${escapeHtml(title)}</div>
</div>
`.trim();
parent.children[index] = { type: 'html', value: enhancedHTML };
console.log(`[VIDEO] Enhanced iframe: ${title}`);
}
});
};
};
function escapeHtml(unsafe: string): string {
if (typeof unsafe !== 'string') return '';
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
};

View File

@@ -1,22 +0,0 @@
// src/utils/toolHelpers.ts
export interface Tool {
name: string;
type?: 'software' | 'method' | 'concept';
projectUrl?: string | null;
license?: string;
knowledgebase?: boolean;
domains?: string[];
phases?: string[];
platforms?: string[];
skillLevel?: string;
description?: string;
tags?: string[];
related_concepts?: string[];
}
export {
createToolSlug,
findToolByIdentifier,
isToolHosted
} from './clientUtils.js';

372
src/utils/toolSelector.ts Normal file
View File

@@ -0,0 +1,372 @@
// src/utils/toolSelector.ts
import { aiService } from './aiService.js';
import { embeddingsService, type SimilarityResult } from './embeddings.js';
import { confidenceScoring } from './confidenceScoring.js';
import { JSONParser } from './jsonUtils.js';
import { getPrompt } from '../config/prompts.js';
import 'dotenv/config';
export interface ToolSelectionConfig {
maxSelectedItems: number;
embeddingCandidates: number;
similarityThreshold: number;
embeddingSelectionLimit: number;
embeddingConceptsLimit: number;
embeddingsMinTools: number;
embeddingsMaxReductionRatio: number;
methodSelectionRatio: number;
softwareSelectionRatio: number;
}
export interface SelectionContext {
userQuery: string;
mode: string;
embeddingsSimilarities: Map<string, number>;
seenToolNames: Set<string>;
selectedTools?: Array<{
tool: any;
phase: string;
priority: string;
justification?: string;
taskRelevance?: number;
limitations?: string[];
}>;
}
export interface ToolSelectionResult {
selectedTools: any[];
selectedConcepts: any[];
confidence: number;
}
class ToolSelector {
private config: ToolSelectionConfig;
constructor() {
this.config = {
maxSelectedItems: this.getEnvInt('AI_MAX_SELECTED_ITEMS', 25),
embeddingCandidates: this.getEnvInt('AI_EMBEDDING_CANDIDATES', 50),
similarityThreshold: this.getEnvFloat('AI_SIMILARITY_THRESHOLD', 0.3),
embeddingSelectionLimit: this.getEnvInt('AI_EMBEDDING_SELECTION_LIMIT', 30),
embeddingConceptsLimit: this.getEnvInt('AI_EMBEDDING_CONCEPTS_LIMIT', 15),
embeddingsMinTools: this.getEnvInt('AI_EMBEDDINGS_MIN_TOOLS', 8),
embeddingsMaxReductionRatio: this.getEnvFloat('AI_EMBEDDINGS_MAX_REDUCTION_RATIO', 0.75),
methodSelectionRatio: this.getEnvFloat('AI_METHOD_SELECTION_RATIO', 0.4),
softwareSelectionRatio: this.getEnvFloat('AI_SOFTWARE_SELECTION_RATIO', 0.5),
};
console.log('[TOOL-SELECTOR] Initialized with config:', this.config);
}
private getEnvInt(key: string, defaultValue: number): number {
const value = process.env[key];
return value ? parseInt(value, 10) : defaultValue;
}
private getEnvFloat(key: string, defaultValue: number): number {
const value = process.env[key];
return value ? parseFloat(value) : defaultValue;
}
async getIntelligentCandidates(
userQuery: string,
toolsData: any,
mode: string,
context: SelectionContext
): Promise<{
tools: any[];
concepts: any[];
domains: any[];
phases: any[];
'domain-agnostic-software': any[];
}> {
console.log('[TOOL-SELECTOR] Getting intelligent candidates for query');
let candidateTools: any[] = [];
let candidateConcepts: any[] = [];
context.embeddingsSimilarities.clear();
try {
await embeddingsService.waitForInitialization();
} catch (error) {
console.error('[TOOL-SELECTOR] Embeddings initialization failed:', error);
}
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
const embeddingsSearchStart = Date.now();
const similarItems = await embeddingsService.findSimilar(
userQuery,
this.config.embeddingCandidates,
this.config.similarityThreshold
) as SimilarityResult[];
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
const { auditService } = await import('./auditService.js');
const { getDataVersion } = await import('./dataService.js');
const toolsDataHash = getDataVersion() || 'unknown';
auditService.addEmbeddingsSearch(
userQuery,
similarItems,
this.config.similarityThreshold,
embeddingsSearchStart,
{
toolsDataHash: toolsDataHash,
selectionPhase: 'initial-candidate-selection',
candidateLimit: this.config.embeddingCandidates,
mode: mode,
reasoning: `Initiale semantische Suche für ${mode}-Modus - Reduzierung der ${toolsData.tools.length} verfügbaren Tools auf ${similarItems.length} relevante Kandidaten`
}
);
similarItems.forEach(item => {
context.embeddingsSimilarities.set(item.name, item.similarity);
});
const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept]));
const similarTools = similarItems
.filter((item: any) => item.type === 'tool')
.map((item: any) => toolsMap.get(item.name))
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
const similarConcepts = similarItems
.filter((item: any) => item.type === 'concept')
.map((item: any) => conceptsMap.get(item.name))
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
const totalAvailableTools = toolsData.tools.length;
const reductionRatio = similarTools.length / totalAvailableTools;
if (similarTools.length >= this.config.embeddingsMinTools && reductionRatio <= this.config.embeddingsMaxReductionRatio) {
candidateTools = similarTools;
candidateConcepts = similarConcepts;
console.log('[TOOL-SELECTOR] Using embeddings filtering:', totalAvailableTools, '→', similarTools.length, 'tools');
} else {
console.log('[TOOL-SELECTOR] Embeddings filtering insufficient, using full dataset');
candidateTools = toolsData.tools;
candidateConcepts = toolsData.concepts;
}
const selection = await this.performAISelection(
userQuery,
candidateTools,
candidateConcepts,
mode,
context
);
return {
tools: selection.selectedTools,
concepts: selection.selectedConcepts,
domains: toolsData.domains,
phases: toolsData.phases,
'domain-agnostic-software': toolsData['domain-agnostic-software']
};
}
private async performAISelection(
userQuery: string,
candidateTools: any[],
candidateConcepts: any[],
mode: string,
context: SelectionContext
): Promise<ToolSelectionResult> {
console.log('[TOOL-SELECTOR] Performing AI selection');
const candidateMethods = candidateTools.filter((t: any) => t && t.type === 'method');
const candidateSoftware = candidateTools.filter((t: any) => t && t.type === 'software');
console.log('[TOOL-SELECTOR] Candidates:',
candidateMethods.length, 'methods,',
candidateSoftware.length, 'software,',
candidateConcepts.length, 'concepts'
);
const methodsWithFullData = candidateMethods.map(this.createToolData);
const softwareWithFullData = candidateSoftware.map(this.createToolData);
const conceptsWithFullData = candidateConcepts.map(this.createConceptData);
const maxTools = Math.min(this.config.embeddingSelectionLimit, candidateTools.length);
const maxConcepts = Math.min(this.config.embeddingConceptsLimit, candidateConcepts.length);
const methodRatio = Math.max(0, Math.min(1, this.config.methodSelectionRatio));
const softwareRatio = Math.max(0, Math.min(1, this.config.softwareSelectionRatio));
let methodLimit = Math.round(maxTools * methodRatio);
let softwareLimit = Math.round(maxTools * softwareRatio);
if (methodLimit + softwareLimit > maxTools) {
const scale = maxTools / (methodLimit + softwareLimit);
methodLimit = Math.floor(methodLimit * scale);
softwareLimit = Math.floor(softwareLimit * scale);
}
const methodsPrimary = methodsWithFullData.slice(0, methodLimit);
const softwarePrimary = softwareWithFullData.slice(0, softwareLimit);
const toolsToSend: any[] = [...methodsPrimary, ...softwarePrimary];
let mIdx = methodsPrimary.length;
let sIdx = softwarePrimary.length;
while (toolsToSend.length < maxTools && (mIdx < methodsWithFullData.length || sIdx < softwareWithFullData.length)) {
const remM = methodsWithFullData.length - mIdx;
const remS = softwareWithFullData.length - sIdx;
if (remS >= remM && sIdx < softwareWithFullData.length) {
toolsToSend.push(softwareWithFullData[sIdx++]);
} else if (mIdx < methodsWithFullData.length) {
toolsToSend.push(methodsWithFullData[mIdx++]);
} else if (sIdx < softwareWithFullData.length) {
toolsToSend.push(softwareWithFullData[sIdx++]);
} else {
break;
}
}
const conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
console.log('[TOOL-SELECTOR-DEBUG] maxTools:', maxTools, 'maxConcepts:', maxConcepts);
console.log('[TOOL-SELECTOR] Sending to AI:',
toolsToSend.filter((t: any) => t.type === 'method').length, 'methods,',
toolsToSend.filter((t: any) => t.type === 'software').length, 'software,',
conceptsToSend.length, 'concepts'
);
const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
try {
const response = await aiService.callAI(prompt);
const result = JSONParser.safeParseJSON(response.content, null);
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
console.error('[TOOL-SELECTOR] AI selection returned invalid structure');
throw new Error('AI selection failed to return valid tool and concept selection');
}
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
if (totalSelected === 0) {
throw new Error('AI selection returned empty selection');
}
const toolsMap = new Map(candidateTools.map((tool: any) => [tool.name, tool]));
const conceptsMap = new Map(candidateConcepts.map((concept: any) => [concept.name, concept]));
const selectedTools = result.selectedTools
.map((name: string) => toolsMap.get(name))
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
const selectedConcepts = result.selectedConcepts
.map((name: string) => conceptsMap.get(name))
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
const selectedMethods = selectedTools.filter((t: any) => t && t.type === 'method');
const selectedSoftware = selectedTools.filter((t: any) => t && t.type === 'software');
console.log('[TOOL-SELECTOR] AI selected:',
selectedMethods.length, 'methods,',
selectedSoftware.length, 'software,',
selectedConcepts.length, 'concepts'
);
const confidence = confidenceScoring.calculateSelectionConfidence(
result,
candidateTools.length + candidateConcepts.length
);
return { selectedTools, selectedConcepts, confidence };
} catch (error) {
console.error('[TOOL-SELECTOR] AI selection failed:', error);
throw error;
}
}
async selectToolsForPhase(
userQuery: string,
phase: any,
availableTools: any[],
context: SelectionContext
): Promise<Array<{
toolName: string;
taskRelevance: number;
justification: string;
limitations: string[];
}>> {
console.log('[TOOL-SELECTOR] Selecting tools for phase:', phase.id);
if (availableTools.length === 0) {
console.log('[TOOL-SELECTOR] No tools available for phase:', phase.id);
return [];
}
const prompt = getPrompt('phaseToolSelection', userQuery, phase, availableTools);
try {
const response = await aiService.callMicroTaskAI(prompt);
const selections = JSONParser.safeParseJSON(response.content, []);
if (Array.isArray(selections)) {
const validSelections = selections.filter((sel: any) => {
const matchingTool = availableTools.find((tool: any) => tool && tool.name === sel.toolName);
if (!matchingTool) {
console.warn('[TOOL-SELECTOR] Invalid tool selection for phase:', phase.id, sel.toolName);
}
return !!matchingTool;
});
console.log('[TOOL-SELECTOR] Valid selections for phase:', phase.id, validSelections.length);
return validSelections;
}
return [];
} catch (error) {
console.error('[TOOL-SELECTOR] Phase tool selection failed:', error);
return [];
}
}
private createToolData = (tool: any) => ({
name: tool.name,
type: tool.type,
description: tool.description,
domains: tool.domains,
phases: tool.phases,
platforms: tool.platforms || [],
tags: tool.tags || [],
skillLevel: tool.skillLevel,
license: tool.license,
accessType: tool.accessType,
projectUrl: tool.projectUrl,
knowledgebase: tool.knowledgebase,
related_concepts: tool.related_concepts || [],
related_software: tool.related_software || []
});
private createConceptData = (concept: any) => ({
name: concept.name,
type: 'concept',
description: concept.description,
domains: concept.domains,
phases: concept.phases,
tags: concept.tags || [],
skillLevel: concept.skillLevel,
related_concepts: concept.related_concepts || [],
related_software: concept.related_software || []
});
getConfig(): ToolSelectionConfig {
return { ...this.config };
}
}
export const toolSelector = new ToolSelector();

View File

@@ -1,115 +0,0 @@
// src/utils/videoUtils.ts - SIMPLIFIED - Basic utilities only
import 'dotenv/config';
export interface SimpleVideoMetadata {
title?: string;
description?: string;
}
export function getVideoMimeType(url: string): string {
let extension: string | undefined;
try {
const pathname = new URL(url).pathname;
extension = pathname.split('.').pop()?.toLowerCase();
} catch {
extension = url.split('?')[0].split('.').pop()?.toLowerCase();
}
const mimeTypes: Record<string, string> = {
mp4: 'video/mp4',
webm: 'video/webm',
ogg: 'video/ogg',
mov: 'video/quicktime',
avi: 'video/x-msvideo',
m4v: 'video/m4v',
mkv: 'video/x-matroska',
flv: 'video/x-flv'
};
return (extension && mimeTypes[extension]) || 'video/mp4';
}
export function formatDuration(seconds: number): string {
const hours = Math.floor(seconds / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
const remainingSeconds = Math.floor(seconds % 60);
if (hours > 0) {
return `${hours}:${minutes.toString().padStart(2, '0')}:${remainingSeconds.toString().padStart(2, '0')}`;
}
return `${minutes}:${remainingSeconds.toString().padStart(2, '0')}`;
}
export function formatFileSize(bytes: number): string {
if (bytes < 1024) return `${bytes} B`;
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
}
export function escapeHtml(unsafe: string): string {
if (typeof unsafe !== 'string') return '';
return unsafe
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&#039;");
}
export function generateVideoHTML(
src: string,
options: {
title?: string;
controls?: boolean;
autoplay?: boolean;
muted?: boolean;
loop?: boolean;
preload?: 'none' | 'metadata' | 'auto';
aspectRatio?: '16:9' | '4:3' | '1:1';
showMetadata?: boolean;
} = {}
): string {
const {
title = 'Video',
controls = true,
autoplay = false,
muted = false,
loop = false,
preload = 'metadata',
aspectRatio = '16:9',
showMetadata = true
} = options;
const aspectClass = `aspect-${aspectRatio.replace(':', '-')}`;
const videoAttributes = [
controls ? 'controls' : '',
autoplay ? 'autoplay' : '',
muted ? 'muted' : '',
loop ? 'loop' : '',
`preload="${preload}"`
].filter(Boolean).join(' ');
const metadataHTML = showMetadata && title !== 'Video' ? `
<div class="video-metadata">
<div class="video-title">${escapeHtml(title)}</div>
</div>
` : '';
return `
<div class="video-container ${aspectClass}">
<video
src="${escapeHtml(src)}"
${videoAttributes}
style="width: 100%; height: 100%;"
data-video-title="${escapeHtml(title)}"
>
<p>Your browser does not support the video element.</p>
</video>
${metadataHTML}
</div>
`.trim();
}