Compare commits
51 Commits
8283b71b8c
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bdee77f459 | ||
| 8a6d9d3324 | |||
|
|
dc9f52fb7c | ||
|
|
b17458d153 | ||
|
|
b14ca1d243 | ||
|
|
4ee1cc4984 | ||
|
|
bbe1b12251 | ||
|
|
d569b74a20 | ||
|
|
a2d3d3170a | ||
|
|
3823407d49 | ||
|
|
496f2a5b43 | ||
|
|
20a4c71d02 | ||
|
|
dad5e5ea0c | ||
|
|
b689f24502 | ||
|
|
630fc1643e | ||
|
|
1d750307c4 | ||
| 05d957324a | |||
|
|
6160620e24 | ||
|
|
1d91dbf478 | ||
|
|
76694e003c | ||
|
|
28af56d6ef | ||
|
|
3d5d2506e9 | ||
|
|
6b09eb062f | ||
|
|
70fb012d63 | ||
|
|
2cb25d1dd6 | ||
|
|
bcd92af8a0 | ||
|
|
5ecbabea90 | ||
|
|
07c8f707df | ||
|
|
e63ec367a5 | ||
|
|
5c3c308225 | ||
|
|
dd26d45a21 | ||
|
|
afbd8d2cd3 | ||
|
|
8bba0eefa9 | ||
|
|
170638a5fa | ||
|
|
c60730b4aa | ||
|
|
b9964685f9 | ||
|
|
5d72549bb7 | ||
|
|
15d302031e | ||
|
|
48209c4639 | ||
|
|
6d08dbdcd0 | ||
|
|
77f09ed399 | ||
|
|
0c7c502b03 | ||
|
|
1d98dd3257 | ||
|
|
3ad0d8120a | ||
|
|
88cf682790 | ||
|
|
182b9d01f9 | ||
|
|
12368ed7c8 | ||
|
|
c4c52f6064 | ||
|
|
e93f394263 | ||
|
|
75410e2b84 | ||
|
|
88e79d7780 |
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"_variables": {
|
||||
"lastUpdateCheck": 1754571688630
|
||||
"lastUpdateCheck": 1755901660216
|
||||
}
|
||||
}
|
||||
33
.env.example
33
.env.example
@@ -59,8 +59,7 @@ FORENSIC_AUDIT_RETENTION_HOURS=24
|
||||
FORENSIC_AUDIT_MAX_ENTRIES=50
|
||||
|
||||
# === AI SEMANTIC SEARCH ===
|
||||
# Enable semantic search (highly recommended for better results)
|
||||
AI_EMBEDDINGS_ENABLED=true
|
||||
# semantic search
|
||||
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
|
||||
AI_EMBEDDINGS_API_KEY=your-embeddings-api-key-here
|
||||
AI_EMBEDDINGS_MODEL=mistral-embed
|
||||
@@ -68,26 +67,6 @@ AI_EMBEDDINGS_MODEL=mistral-embed
|
||||
# User rate limiting (queries per minute)
|
||||
AI_RATE_LIMIT_MAX_REQUESTS=4
|
||||
|
||||
# ============================================================================
|
||||
# 🎥 VIDEO EMBEDDING - PRODUCTION CONFIGURATION
|
||||
# ============================================================================
|
||||
|
||||
# Enable local caching of Nextcloud videos (highly recommended)
|
||||
VIDEO_CACHE_ENABLED=true
|
||||
|
||||
# Directory for cached videos (ensure it's writable and has sufficient space)
|
||||
# This directory will grow over time as videos are cached permanently
|
||||
VIDEO_CACHE_DIR=./cache/videos
|
||||
|
||||
# Emergency cleanup threshold in MB - videos are cached indefinitely
|
||||
# Only triggers cleanup when approaching this limit to prevent disk full
|
||||
# Recommended: 2000MB (2GB) for small deployments, 5000MB+ for larger ones
|
||||
VIDEO_CACHE_MAX_SIZE=2000
|
||||
|
||||
# Maximum individual video file size for caching in MB
|
||||
# Videos larger than this will stream directly without caching
|
||||
VIDEO_MAX_SIZE=200
|
||||
|
||||
# ============================================================================
|
||||
# CACHING BEHAVIOR
|
||||
# ============================================================================
|
||||
@@ -121,17 +100,11 @@ AI_SOFTWARE_SELECTION_RATIO=0.5 # 50% software tools (increase for more tool re
|
||||
|
||||
# AI selection limits
|
||||
AI_MAX_SELECTED_ITEMS=25
|
||||
AI_MAX_TOOLS_TO_ANALYZE=20
|
||||
AI_MAX_CONCEPTS_TO_ANALYZE=10
|
||||
|
||||
# Efficiency thresholds
|
||||
AI_EMBEDDINGS_MIN_TOOLS=8
|
||||
AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75
|
||||
|
||||
# Fallback limits when embeddings are disabled
|
||||
AI_NO_EMBEDDINGS_TOOL_LIMIT=25
|
||||
AI_NO_EMBEDDINGS_CONCEPT_LIMIT=10
|
||||
|
||||
# === Rate Limiting & Timing ===
|
||||
AI_MICRO_TASK_TOTAL_LIMIT=30
|
||||
AI_MICRO_TASK_DELAY_MS=500
|
||||
@@ -141,10 +114,6 @@ AI_RATE_LIMIT_DELAY_MS=2000
|
||||
AI_EMBEDDINGS_BATCH_SIZE=10
|
||||
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
|
||||
|
||||
# === Context Management ===
|
||||
AI_MAX_CONTEXT_TOKENS=4000
|
||||
AI_MAX_PROMPT_TOKENS=2500
|
||||
|
||||
# === Confidence Scoring ===
|
||||
CONFIDENCE_SEMANTIC_WEIGHT=0.5
|
||||
CONFIDENCE_SUITABILITY_WEIGHT=0.5
|
||||
|
||||
16
README.md
16
README.md
@@ -250,10 +250,26 @@ Ihr Artikel-Inhalt hier...
|
||||
2. Installer ausführen
|
||||
3. Einstellungen konfigurieren
|
||||
|
||||
## Video-Demonstration
|
||||
<video src="/videos/setup-tutorial.mp4" title="Setup-Tutorial" controls></video>
|
||||
|
||||
## Häufige Probleme
|
||||
Lösungen für typische Probleme...
|
||||
```
|
||||
|
||||
### Video-Integration
|
||||
|
||||
Knowledgebase-Artikel unterstützen eingebettete Videos für praktische Demonstrationen:
|
||||
|
||||
```html
|
||||
<video src="/videos/demo.mp4" title="Tool-Demonstration" controls></video>
|
||||
```
|
||||
|
||||
**Wichtige Hinweise**:
|
||||
- Videos müssen manuell in `public/videos/` bereitgestellt werden (nicht im Git-Repository enthalten)
|
||||
- Firefox-kompatible Formate verwenden (MP4 H.264, WebM VP9)
|
||||
- Detaillierte Video-Dokumentation: siehe `src/content/knowledgebase/README.md`
|
||||
|
||||
### Artikel-Struktur-Richtlinien
|
||||
|
||||
**Erforderliche Felder**:
|
||||
|
||||
381146
data/embeddings.json
381146
data/embeddings.json
File diff suppressed because it is too large
Load Diff
83
embedding-test-config.json
Normal file
83
embedding-test-config.json
Normal file
@@ -0,0 +1,83 @@
|
||||
{
|
||||
"toolsYamlPath": "./src/data/tools.yaml",
|
||||
"models": [
|
||||
{
|
||||
"name": "granite-embedding:278m",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 512
|
||||
},
|
||||
{
|
||||
"name": "paraphrase-multilingual:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 128
|
||||
},
|
||||
{
|
||||
"name": "bge-large:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 512
|
||||
},
|
||||
{
|
||||
"name": "snowflake-arctic-embed2:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 8192
|
||||
},
|
||||
{
|
||||
"name": "snowflake-arctic-embed:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 512
|
||||
},
|
||||
{
|
||||
"name": "all-minilm:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 256
|
||||
},
|
||||
{
|
||||
"name": "bge-m3:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 8192
|
||||
},
|
||||
{
|
||||
"name": "mxbai-embed-large:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 512
|
||||
},
|
||||
{
|
||||
"name": "nomic-embed-text:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 2048
|
||||
},
|
||||
{
|
||||
"name": "mistral-embed",
|
||||
"type": "mistral",
|
||||
"endpoint": "https://api.mistral.ai/v1/embeddings",
|
||||
"apiKey": "${AI_EMBEDDINGS_API_KEY}",
|
||||
"rateLimit": true,
|
||||
"rateLimitDelayMs": 2000,
|
||||
"contextSize": 8192
|
||||
}
|
||||
],
|
||||
"testSettings": {
|
||||
"maxToolsPerCategory": 6,
|
||||
"maxNegativeExamples": 4,
|
||||
"contextSizeTests": true,
|
||||
"performanceIterations": 3
|
||||
}
|
||||
}
|
||||
897
embeddings-comparison.js
Normal file
897
embeddings-comparison.js
Normal file
@@ -0,0 +1,897 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// efficient-embedding-comparison.js
|
||||
// Proper embedding model evaluation with batch processing and vector search
|
||||
// Run with: node efficient-embedding-comparison.js --config=config.json
|
||||
|
||||
import fs from 'fs/promises';
|
||||
import yaml from 'js-yaml';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
|
||||
class EmbeddingCache {
|
||||
constructor(cacheDir = './embedding-cache') {
|
||||
this.cacheDir = cacheDir;
|
||||
}
|
||||
|
||||
async ensureCacheDir() {
|
||||
try {
|
||||
await fs.access(this.cacheDir);
|
||||
} catch {
|
||||
await fs.mkdir(this.cacheDir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
getCacheKey(model, text) {
|
||||
const content = `${model.name}:${text}`;
|
||||
return crypto.createHash('md5').update(content).digest('hex');
|
||||
}
|
||||
|
||||
async getCachedEmbedding(model, text) {
|
||||
await this.ensureCacheDir();
|
||||
const key = this.getCacheKey(model, text);
|
||||
const cachePath = path.join(this.cacheDir, `${key}.json`);
|
||||
|
||||
try {
|
||||
const data = await fs.readFile(cachePath, 'utf8');
|
||||
return JSON.parse(data);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async setCachedEmbedding(model, text, embedding) {
|
||||
await this.ensureCacheDir();
|
||||
const key = this.getCacheKey(model, text);
|
||||
const cachePath = path.join(this.cacheDir, `${key}.json`);
|
||||
|
||||
await fs.writeFile(cachePath, JSON.stringify(embedding));
|
||||
}
|
||||
|
||||
async getCacheStats(model) {
|
||||
await this.ensureCacheDir();
|
||||
const files = await fs.readdir(this.cacheDir);
|
||||
const modelFiles = files.filter(f => f.includes(model.name.replace(/[^a-zA-Z0-9]/g, '_')));
|
||||
return { cached: modelFiles.length, total: files.length };
|
||||
}
|
||||
}
|
||||
|
||||
class SearchEvaluator {
|
||||
constructor() {
|
||||
this.cache = new EmbeddingCache();
|
||||
}
|
||||
|
||||
async rateLimitedDelay(model) {
|
||||
if (model.rateLimit && model.rateLimitDelayMs) {
|
||||
await new Promise(resolve => setTimeout(resolve, model.rateLimitDelayMs));
|
||||
}
|
||||
}
|
||||
|
||||
async getEmbedding(text, model) {
|
||||
// Check cache first
|
||||
const cached = await this.cache.getCachedEmbedding(model, text);
|
||||
if (cached) return cached;
|
||||
|
||||
const headers = { 'Content-Type': 'application/json' };
|
||||
let body, endpoint;
|
||||
|
||||
if (model.type === 'mistral') {
|
||||
if (model.apiKey) {
|
||||
headers['Authorization'] = `Bearer ${model.apiKey.replace('${AI_EMBEDDINGS_API_KEY}', process.env.AI_EMBEDDINGS_API_KEY || '')}`;
|
||||
}
|
||||
body = { model: model.name, input: [text] };
|
||||
endpoint = model.endpoint;
|
||||
} else {
|
||||
body = { model: model.name, prompt: text };
|
||||
endpoint = model.endpoint;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(body)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
if (response.status === 429 && model.rateLimit) {
|
||||
console.log(` ⚠️ Rate limited, waiting...`);
|
||||
await new Promise(resolve => setTimeout(resolve, 10000));
|
||||
return this.getEmbedding(text, model);
|
||||
}
|
||||
throw new Error(`API error ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const embedding = model.type === 'mistral' ? data.data[0].embedding : data.embedding;
|
||||
|
||||
// Cache the result
|
||||
await this.cache.setCachedEmbedding(model, text, embedding);
|
||||
return embedding;
|
||||
|
||||
} catch (error) {
|
||||
console.error(`❌ Failed to get embedding: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
constructToolText(item, maxLength = null) {
|
||||
if (typeof item === 'string') {
|
||||
// Even for string inputs, don't truncate to match real app behavior
|
||||
return item.toLowerCase();
|
||||
}
|
||||
|
||||
// EXACT match to embeddings.ts createContentString() - NO TRUNCATION
|
||||
const parts = [
|
||||
item.name,
|
||||
item.description || '',
|
||||
...(item.tags || []),
|
||||
...(item.domains || []),
|
||||
...(item.phases || [])
|
||||
];
|
||||
|
||||
const contentString = parts.filter(Boolean).join(' ').toLowerCase();
|
||||
|
||||
// CRITICAL: No truncation! Return full content like real app
|
||||
return contentString;
|
||||
}
|
||||
|
||||
calculateOptimalBatchSize(model) {
|
||||
// Factors that ACTUALLY matter for batching individual API calls:
|
||||
|
||||
// 1. Rate limiting aggressiveness
|
||||
if (model.rateLimit && model.rateLimitDelayMs > 2000) {
|
||||
return 5; // Conservative batching for heavily rate-limited APIs
|
||||
}
|
||||
|
||||
// 2. API latency expectations
|
||||
if (model.type === 'ollama') {
|
||||
return 15; // Local APIs are fast, can handle larger batches
|
||||
} else if (model.type === 'mistral') {
|
||||
return 10; // Remote APIs might be slower, medium batches
|
||||
}
|
||||
|
||||
// 3. Progress reporting frequency preference
|
||||
// For 185 tools:
|
||||
// - Batch size 10 = 19 progress updates
|
||||
// - Batch size 15 = 13 progress updates
|
||||
// - Batch size 20 = 10 progress updates
|
||||
|
||||
return 15; // Good balance for ~13 progress updates
|
||||
}
|
||||
|
||||
async createBatchEmbeddings(items, model) {
|
||||
const batchSize = this.calculateOptimalBatchSize(model);
|
||||
const contextSize = model.contextSize || 2000; // Only for display/info
|
||||
|
||||
console.log(` 📦 Creating embeddings for ${items.length} items`);
|
||||
console.log(` 📏 Model context: ${contextSize} chars (for reference - NOT truncating)`);
|
||||
console.log(` 📋 Batch size: ${batchSize} (for progress reporting)`);
|
||||
|
||||
const embeddings = new Map();
|
||||
let apiCalls = 0;
|
||||
let cacheHits = 0;
|
||||
const totalBatches = Math.ceil(items.length / batchSize);
|
||||
|
||||
for (let i = 0; i < items.length; i += batchSize) {
|
||||
const batch = items.slice(i, i + batchSize);
|
||||
const batchNum = Math.floor(i/batchSize) + 1;
|
||||
|
||||
console.log(` 📋 Processing batch ${batchNum}/${totalBatches} (${batch.length} tools)`);
|
||||
|
||||
for (const item of batch) {
|
||||
// Get FULL content (no truncation)
|
||||
const text = this.constructToolText(item);
|
||||
|
||||
// Show actual text length for first few tools (full length!)
|
||||
if (i < batchSize && batch.indexOf(item) < 3) {
|
||||
const truncatedDisplay = text.length > 100 ? text.slice(0, 100) + '...' : text;
|
||||
console.log(` 📝 ${item.name}: ${text.length} chars (full) - "${truncatedDisplay}"`);
|
||||
}
|
||||
|
||||
try {
|
||||
const embedding = await this.getEmbedding(text, model);
|
||||
embeddings.set(item.id || item.name || text, {
|
||||
text,
|
||||
embedding,
|
||||
metadata: item
|
||||
});
|
||||
|
||||
const cached = await this.cache.getCachedEmbedding(model, text);
|
||||
if (cached) cacheHits++; else apiCalls++;
|
||||
|
||||
await this.rateLimitedDelay(model);
|
||||
} catch (error) {
|
||||
console.warn(` ⚠️ Failed to embed: ${item.name || text.slice(0, 50)}...`);
|
||||
// Log the error for debugging
|
||||
if (text.length > 8000) {
|
||||
console.warn(` 📏 Text was ${text.length} chars - may exceed model limits`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Show content length statistics
|
||||
const lengths = Array.from(embeddings.values()).map(e => e.text.length);
|
||||
const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
|
||||
const maxLength = Math.max(...lengths);
|
||||
const minLength = Math.min(...lengths);
|
||||
|
||||
console.log(` 📊 Content stats: avg ${avgLength.toFixed(0)} chars, range ${minLength}-${maxLength} chars`);
|
||||
console.log(` ✅ Created ${embeddings.size} embeddings (${apiCalls} API calls, ${cacheHits} cache hits)`);
|
||||
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
cosineSimilarity(a, b) {
|
||||
if (!a || !b || a.length === 0 || b.length === 0) return 0;
|
||||
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
const minLength = Math.min(a.length, b.length);
|
||||
|
||||
for (let i = 0; i < minLength; i++) {
|
||||
dotProduct += a[i] * b[i];
|
||||
normA += a[i] * a[i];
|
||||
normB += b[i] * b[i];
|
||||
}
|
||||
|
||||
if (normA === 0 || normB === 0) return 0;
|
||||
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
||||
}
|
||||
|
||||
searchSimilar(queryEmbedding, toolEmbeddings, topK = 10) {
|
||||
const similarities = [];
|
||||
|
||||
for (const [id, data] of toolEmbeddings) {
|
||||
const similarity = this.cosineSimilarity(queryEmbedding, data.embedding);
|
||||
similarities.push({
|
||||
id,
|
||||
similarity,
|
||||
metadata: data.metadata,
|
||||
text: data.text
|
||||
});
|
||||
}
|
||||
|
||||
return similarities
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, topK);
|
||||
}
|
||||
|
||||
calculateRetrievalMetrics(results, relevantIds, k = 10) {
|
||||
const topK = results.slice(0, k);
|
||||
const retrievedIds = new Set(topK.map(r => r.id));
|
||||
const relevantSet = new Set(relevantIds);
|
||||
|
||||
// Precision@K
|
||||
const relevantRetrieved = topK.filter(r => relevantSet.has(r.id)).length;
|
||||
const precisionAtK = topK.length > 0 ? relevantRetrieved / topK.length : 0;
|
||||
|
||||
// Recall@K
|
||||
const recallAtK = relevantIds.length > 0 ? relevantRetrieved / relevantIds.length : 0;
|
||||
|
||||
// F1@K
|
||||
const f1AtK = (precisionAtK + recallAtK) > 0 ?
|
||||
2 * (precisionAtK * recallAtK) / (precisionAtK + recallAtK) : 0;
|
||||
|
||||
// Mean Reciprocal Rank (MRR)
|
||||
let mrr = 0;
|
||||
for (let i = 0; i < topK.length; i++) {
|
||||
if (relevantSet.has(topK[i].id)) {
|
||||
mrr = 1 / (i + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// NDCG@K (simplified binary relevance)
|
||||
let dcg = 0;
|
||||
let idcg = 0;
|
||||
|
||||
for (let i = 0; i < k; i++) {
|
||||
const rank = i + 1;
|
||||
const discount = Math.log2(rank + 1);
|
||||
|
||||
// DCG
|
||||
if (i < topK.length && relevantSet.has(topK[i].id)) {
|
||||
dcg += 1 / discount;
|
||||
}
|
||||
|
||||
// IDCG (ideal ranking)
|
||||
if (i < relevantIds.length) {
|
||||
idcg += 1 / discount;
|
||||
}
|
||||
}
|
||||
|
||||
const ndcgAtK = idcg > 0 ? dcg / idcg : 0;
|
||||
|
||||
return {
|
||||
precisionAtK,
|
||||
recallAtK,
|
||||
f1AtK,
|
||||
mrr,
|
||||
ndcgAtK,
|
||||
relevantRetrieved,
|
||||
totalRelevant: relevantIds.length
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
class EfficientEmbeddingComparison {
|
||||
constructor(configPath = './embedding-test-config.json') {
|
||||
this.configPath = configPath;
|
||||
this.config = null;
|
||||
this.tools = [];
|
||||
this.evaluator = new SearchEvaluator();
|
||||
|
||||
// Test queries tailored to the actual tools.yaml content
|
||||
this.testQueries = [
|
||||
{
|
||||
query: "memory forensics RAM analysis",
|
||||
keywords: ["memory", "forensics", "volatility", "ram", "dump", "analysis"],
|
||||
category: "memory_analysis"
|
||||
},
|
||||
{
|
||||
query: "network packet capture traffic analysis",
|
||||
keywords: ["network", "packet", "pcap", "wireshark", "traffic", "capture"],
|
||||
category: "network_analysis"
|
||||
},
|
||||
{
|
||||
query: "malware reverse engineering binary analysis",
|
||||
keywords: ["malware", "reverse", "engineering", "ghidra", "binary", "disassemble"],
|
||||
category: "malware_analysis"
|
||||
},
|
||||
{
|
||||
query: "digital forensics disk imaging",
|
||||
keywords: ["forensics", "disk", "imaging", "autopsy", "investigation", "evidence"],
|
||||
category: "disk_forensics"
|
||||
},
|
||||
{
|
||||
query: "incident response threat hunting",
|
||||
keywords: ["incident", "response", "threat", "hunting", "investigation", "compromise"],
|
||||
category: "incident_response"
|
||||
},
|
||||
{
|
||||
query: "mobile device smartphone forensics",
|
||||
keywords: ["mobile", "smartphone", "android", "ios", "device", "cellebrite"],
|
||||
category: "mobile_forensics"
|
||||
},
|
||||
{
|
||||
query: "timeline analysis event correlation",
|
||||
keywords: ["timeline", "analysis", "correlation", "events", "plaso", "timesketch"],
|
||||
category: "timeline_analysis"
|
||||
},
|
||||
{
|
||||
query: "registry analysis windows artifacts",
|
||||
keywords: ["registry", "windows", "artifacts", "regripper", "hives", "keys"],
|
||||
category: "registry_analysis"
|
||||
},
|
||||
{
|
||||
query: "cloud forensics container analysis",
|
||||
keywords: ["cloud", "container", "docker", "virtualization", "aws", "azure"],
|
||||
category: "cloud_forensics"
|
||||
},
|
||||
{
|
||||
query: "blockchain cryptocurrency investigation",
|
||||
keywords: ["blockchain", "cryptocurrency", "bitcoin", "chainalysis", "transaction"],
|
||||
category: "blockchain_analysis"
|
||||
}
|
||||
];
|
||||
|
||||
console.log('[INIT] Efficient embedding comparison initialized');
|
||||
}
|
||||
|
||||
async loadConfig() {
|
||||
try {
|
||||
const configData = await fs.readFile(this.configPath, 'utf8');
|
||||
this.config = JSON.parse(configData);
|
||||
console.log(`[CONFIG] Loaded ${this.config.models.length} models`);
|
||||
} catch (error) {
|
||||
console.error('[CONFIG] Failed to load configuration:', error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async loadTools() {
|
||||
try {
|
||||
const yamlContent = await fs.readFile(this.config.toolsYamlPath, 'utf8');
|
||||
const data = yaml.load(yamlContent);
|
||||
|
||||
// Extract tools (flexible - handle different YAML structures)
|
||||
this.tools = data.tools || data.entries || data.applications || data;
|
||||
if (!Array.isArray(this.tools)) {
|
||||
this.tools = Object.values(this.tools);
|
||||
}
|
||||
|
||||
// Filter out concepts and ensure required fields
|
||||
this.tools = this.tools.filter(tool =>
|
||||
tool &&
|
||||
tool.type !== 'concept' &&
|
||||
(tool.name || tool.title) &&
|
||||
(tool.description || tool.summary)
|
||||
);
|
||||
|
||||
// Normalize tool structure
|
||||
this.tools = this.tools.map((tool, index) => ({
|
||||
id: tool.id || tool.name || tool.title || `tool_${index}`,
|
||||
name: tool.name || tool.title,
|
||||
description: tool.description || tool.summary || '',
|
||||
tags: tool.tags || [],
|
||||
domains: tool.domains || tool.categories || [],
|
||||
phases: tool.phases || [],
|
||||
platforms: tool.platforms || [],
|
||||
type: tool.type || 'tool',
|
||||
skillLevel: tool.skillLevel,
|
||||
license: tool.license
|
||||
}));
|
||||
|
||||
console.log(`[DATA] Loaded ${this.tools.length} tools from ${this.config.toolsYamlPath}`);
|
||||
|
||||
// Show some statistics
|
||||
const domainCounts = {};
|
||||
const tagCounts = {};
|
||||
|
||||
this.tools.forEach(tool => {
|
||||
(tool.domains || []).forEach(domain => {
|
||||
domainCounts[domain] = (domainCounts[domain] || 0) + 1;
|
||||
});
|
||||
(tool.tags || []).forEach(tag => {
|
||||
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
|
||||
});
|
||||
});
|
||||
|
||||
const topDomains = Object.entries(domainCounts)
|
||||
.sort(([,a], [,b]) => b - a)
|
||||
.slice(0, 5)
|
||||
.map(([domain, count]) => `${domain}(${count})`)
|
||||
.join(', ');
|
||||
|
||||
console.log(`[DATA] Top domains: ${topDomains}`);
|
||||
console.log(`[DATA] Sample tools: ${this.tools.slice(0, 3).map(t => t.name).join(', ')}`);
|
||||
|
||||
if (this.tools.length === 0) {
|
||||
throw new Error('No valid tools found in YAML file');
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('[DATA] Failed to load tools:', error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
findRelevantTools(query) {
|
||||
const queryLower = query.query.toLowerCase();
|
||||
const keywords = query.keywords.map(k => k.toLowerCase());
|
||||
|
||||
const relevantTools = this.tools.filter(tool => {
|
||||
// Build searchable text from all tool metadata
|
||||
const searchableFields = [
|
||||
tool.name || '',
|
||||
tool.description || '',
|
||||
(tool.tags || []).join(' '),
|
||||
(tool.domains || []).join(' '),
|
||||
(tool.phases || []).join(' '),
|
||||
(tool.platforms || []).join(' ')
|
||||
];
|
||||
|
||||
const toolText = searchableFields.join(' ').toLowerCase();
|
||||
|
||||
// Check for keyword matches
|
||||
const hasKeywordMatch = keywords.some(keyword => toolText.includes(keyword));
|
||||
|
||||
// Check for query word matches (words longer than 3 chars)
|
||||
const queryWords = queryLower.split(' ').filter(word => word.length > 3);
|
||||
const hasQueryWordMatch = queryWords.some(word => toolText.includes(word));
|
||||
|
||||
// Check for domain-specific matches
|
||||
const isDomainRelevant = query.category && tool.domains &&
|
||||
tool.domains.some(domain => domain.includes(query.category.replace('_', '-')));
|
||||
|
||||
return hasKeywordMatch || hasQueryWordMatch || isDomainRelevant;
|
||||
});
|
||||
|
||||
console.log(` 🎯 Found ${relevantTools.length} relevant tools for "${query.query}"`);
|
||||
|
||||
// Log some examples for debugging
|
||||
if (relevantTools.length > 0) {
|
||||
console.log(` 📋 Examples: ${relevantTools.slice(0, 3).map(t => t.name).join(', ')}`);
|
||||
}
|
||||
|
||||
return relevantTools.map(tool => tool.id || tool.name);
|
||||
}
|
||||
|
||||
async testSearchPerformance(model) {
|
||||
console.log(` 🔍 Testing search performance...`);
|
||||
|
||||
// Create embeddings for all tools
|
||||
const toolEmbeddings = await this.evaluator.createBatchEmbeddings(this.tools, model);
|
||||
|
||||
const results = [];
|
||||
let totalApiCalls = 0;
|
||||
|
||||
for (const testQuery of this.testQueries) {
|
||||
console.log(` 📋 Query: "${testQuery.query}"`);
|
||||
|
||||
// Get query embedding
|
||||
const queryEmbedding = await this.evaluator.getEmbedding(testQuery.query, model);
|
||||
totalApiCalls++;
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
|
||||
// Find relevant tools for this query
|
||||
const relevantIds = this.findRelevantTools(testQuery);
|
||||
console.log(` 📊 Found ${relevantIds.length} relevant tools`);
|
||||
|
||||
if (relevantIds.length === 0) {
|
||||
console.log(` ⚠️ No relevant tools found, skipping metrics calculation`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Perform search
|
||||
const searchResults = this.evaluator.searchSimilar(queryEmbedding, toolEmbeddings, 20);
|
||||
|
||||
// Calculate metrics for different k values
|
||||
const metrics = {};
|
||||
for (const k of [1, 3, 5, 10]) {
|
||||
metrics[`k${k}`] = this.evaluator.calculateRetrievalMetrics(searchResults, relevantIds, k);
|
||||
}
|
||||
|
||||
results.push({
|
||||
query: testQuery.query,
|
||||
category: testQuery.category,
|
||||
relevantCount: relevantIds.length,
|
||||
searchResults: searchResults.slice(0, 5), // Top 5 for display
|
||||
metrics
|
||||
});
|
||||
|
||||
// Display results
|
||||
console.log(` 🎯 Top results:`);
|
||||
searchResults.slice(0, 3).forEach((result, i) => {
|
||||
const isRelevant = relevantIds.includes(result.id) ? '✓' : '✗';
|
||||
console.log(` ${i+1}. ${isRelevant} ${result.metadata.name} (${(result.similarity*100).toFixed(1)}%)`);
|
||||
});
|
||||
|
||||
console.log(` 📈 P@5: ${(metrics.k5.precisionAtK*100).toFixed(1)}% | R@5: ${(metrics.k5.recallAtK*100).toFixed(1)}% | NDCG@5: ${(metrics.k5.ndcgAtK*100).toFixed(1)}%`);
|
||||
}
|
||||
|
||||
return { results, totalApiCalls };
|
||||
}
|
||||
|
||||
async testSemanticUnderstanding(model) {
|
||||
console.log(` 🧠 Testing semantic understanding...`);
|
||||
|
||||
const semanticTests = [
|
||||
{
|
||||
primary: "memory forensics",
|
||||
synonyms: ["RAM analysis", "volatile memory examination", "memory dump investigation"],
|
||||
unrelated: ["file compression", "web browser", "text editor"]
|
||||
},
|
||||
{
|
||||
primary: "network analysis",
|
||||
synonyms: ["packet inspection", "traffic monitoring", "protocol analysis"],
|
||||
unrelated: ["image editing", "music player", "calculator"]
|
||||
},
|
||||
{
|
||||
primary: "malware detection",
|
||||
synonyms: ["virus scanning", "threat identification", "malicious code analysis"],
|
||||
unrelated: ["video converter", "password manager", "calendar app"]
|
||||
}
|
||||
];
|
||||
|
||||
let totalCorrect = 0;
|
||||
let totalTests = 0;
|
||||
let apiCalls = 0;
|
||||
|
||||
for (const test of semanticTests) {
|
||||
console.log(` 🔤 Testing: "${test.primary}"`);
|
||||
|
||||
const primaryEmbedding = await this.evaluator.getEmbedding(test.primary, model);
|
||||
apiCalls++;
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
|
||||
// Test synonyms (should be similar)
|
||||
for (const synonym of test.synonyms) {
|
||||
const synonymEmbedding = await this.evaluator.getEmbedding(synonym, model);
|
||||
apiCalls++;
|
||||
|
||||
const synonymSim = this.evaluator.cosineSimilarity(primaryEmbedding, synonymEmbedding);
|
||||
console.log(` ✓ "${synonym}": ${(synonymSim*100).toFixed(1)}%`);
|
||||
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
}
|
||||
|
||||
// Test unrelated terms (should be dissimilar)
|
||||
for (const unrelated of test.unrelated) {
|
||||
const unrelatedEmbedding = await this.evaluator.getEmbedding(unrelated, model);
|
||||
apiCalls++;
|
||||
|
||||
const unrelatedSim = this.evaluator.cosineSimilarity(primaryEmbedding, unrelatedEmbedding);
|
||||
console.log(` ✗ "${unrelated}": ${(unrelatedSim*100).toFixed(1)}%`);
|
||||
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
}
|
||||
|
||||
// Calculate semantic coherence
|
||||
const avgSynonymSim = await this.calculateAvgSimilarity(primaryEmbedding, test.synonyms, model);
|
||||
const avgUnrelatedSim = await this.calculateAvgSimilarity(primaryEmbedding, test.unrelated, model);
|
||||
|
||||
const isCorrect = avgSynonymSim > avgUnrelatedSim;
|
||||
if (isCorrect) totalCorrect++;
|
||||
totalTests++;
|
||||
|
||||
console.log(` 📊 Synonyms: ${(avgSynonymSim*100).toFixed(1)}% | Unrelated: ${(avgUnrelatedSim*100).toFixed(1)}% ${isCorrect ? '✓' : '✗'}`);
|
||||
}
|
||||
|
||||
return {
|
||||
accuracy: totalCorrect / totalTests,
|
||||
correctTests: totalCorrect,
|
||||
totalTests,
|
||||
apiCalls
|
||||
};
|
||||
}
|
||||
|
||||
async calculateAvgSimilarity(baseEmbedding, terms, model) {
|
||||
let totalSim = 0;
|
||||
|
||||
for (const term of terms) {
|
||||
const embedding = await this.evaluator.getEmbedding(term, model);
|
||||
const sim = this.evaluator.cosineSimilarity(baseEmbedding, embedding);
|
||||
totalSim += sim;
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
}
|
||||
|
||||
return totalSim / terms.length;
|
||||
}
|
||||
|
||||
async benchmarkPerformance(model) {
|
||||
console.log(` ⚡ Benchmarking performance...`);
|
||||
|
||||
const testTexts = this.tools.slice(0, 10).map(tool => `${tool.name} ${tool.description}`.slice(0, 500));
|
||||
const times = [];
|
||||
let apiCalls = 0;
|
||||
|
||||
console.log(` 🏃 Processing ${testTexts.length} texts...`);
|
||||
|
||||
for (const text of testTexts) {
|
||||
const start = Date.now();
|
||||
await this.evaluator.getEmbedding(text, model);
|
||||
const time = Date.now() - start;
|
||||
times.push(time);
|
||||
apiCalls++;
|
||||
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
}
|
||||
|
||||
const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
|
||||
const minTime = Math.min(...times);
|
||||
const maxTime = Math.max(...times);
|
||||
|
||||
console.log(` 📊 Avg: ${avgTime.toFixed(0)}ms | Min: ${minTime}ms | Max: ${maxTime}ms`);
|
||||
|
||||
return {
|
||||
avgLatency: avgTime,
|
||||
minLatency: minTime,
|
||||
maxLatency: maxTime,
|
||||
throughput: 1000 / avgTime, // requests per second
|
||||
apiCalls
|
||||
};
|
||||
}
|
||||
|
||||
async testModel(model) {
|
||||
console.log(`\n🧪 Testing ${model.name} (${model.type})...`);
|
||||
|
||||
const startTime = Date.now();
|
||||
let totalApiCalls = 0;
|
||||
|
||||
try {
|
||||
// 1. Search Performance Testing
|
||||
const searchResults = await this.testSearchPerformance(model);
|
||||
totalApiCalls += searchResults.totalApiCalls;
|
||||
|
||||
// 2. Semantic Understanding Testing
|
||||
const semanticResults = await this.testSemanticUnderstanding(model);
|
||||
totalApiCalls += semanticResults.apiCalls;
|
||||
|
||||
// 3. Performance Benchmarking
|
||||
const perfResults = await this.benchmarkPerformance(model);
|
||||
totalApiCalls += perfResults.apiCalls;
|
||||
|
||||
const totalTime = Date.now() - startTime;
|
||||
|
||||
console.log(` ✅ ${model.name} completed in ${(totalTime/1000).toFixed(1)}s (${totalApiCalls} API calls)`);
|
||||
|
||||
return {
|
||||
searchPerformance: searchResults.results,
|
||||
semanticUnderstanding: semanticResults,
|
||||
performance: perfResults,
|
||||
totalTime,
|
||||
totalApiCalls
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
console.error(` ❌ ${model.name} failed:`, error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
calculateOverallScore(results) {
|
||||
// Calculate average metrics across all queries
|
||||
const searchMetrics = results.searchPerformance.filter(r => r.metrics && Object.keys(r.metrics).length > 0);
|
||||
|
||||
if (searchMetrics.length === 0) {
|
||||
console.warn('⚠️ No search metrics available for scoring - may indicate relevance matching issues');
|
||||
return {
|
||||
overall: 0,
|
||||
components: {
|
||||
precision5: 0,
|
||||
recall5: 0,
|
||||
ndcg5: 0,
|
||||
mrr: 0,
|
||||
semanticAccuracy: results.semanticUnderstanding?.accuracy || 0,
|
||||
throughput: results.performance?.throughput || 0
|
||||
},
|
||||
warning: 'No search metrics available'
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`📊 Calculating score from ${searchMetrics.length} valid search results`);
|
||||
|
||||
const avgPrecision5 = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.precisionAtK || 0), 0) / searchMetrics.length;
|
||||
const avgRecall5 = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.recallAtK || 0), 0) / searchMetrics.length;
|
||||
const avgNDCG5 = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.ndcgAtK || 0), 0) / searchMetrics.length;
|
||||
const avgMRR = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.mrr || 0), 0) / searchMetrics.length;
|
||||
|
||||
const semanticAccuracy = results.semanticUnderstanding?.accuracy || 0;
|
||||
const throughput = results.performance?.throughput || 0;
|
||||
|
||||
// Weighted overall score
|
||||
const weights = {
|
||||
precision: 0.25,
|
||||
recall: 0.25,
|
||||
ndcg: 0.20,
|
||||
semantic: 0.20,
|
||||
speed: 0.10
|
||||
};
|
||||
|
||||
const normalizedThroughput = Math.min(throughput / 10, 1); // Normalize to 0-1 (10 req/s = 1.0)
|
||||
|
||||
const overall = (
|
||||
avgPrecision5 * weights.precision +
|
||||
avgRecall5 * weights.recall +
|
||||
avgNDCG5 * weights.ndcg +
|
||||
semanticAccuracy * weights.semantic +
|
||||
normalizedThroughput * weights.speed
|
||||
);
|
||||
|
||||
return {
|
||||
overall,
|
||||
components: {
|
||||
precision5: avgPrecision5,
|
||||
recall5: avgRecall5,
|
||||
ndcg5: avgNDCG5,
|
||||
mrr: avgMRR,
|
||||
semanticAccuracy,
|
||||
throughput
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
printResults(modelResults) {
|
||||
console.log(`\n${'='.repeat(80)}`);
|
||||
console.log("🏆 EFFICIENT EMBEDDING MODEL COMPARISON RESULTS");
|
||||
console.log(`${'='.repeat(80)}`);
|
||||
|
||||
const scores = modelResults.map(mr => ({
|
||||
model: mr.model,
|
||||
score: this.calculateOverallScore(mr.results),
|
||||
results: mr.results
|
||||
})).sort((a, b) => b.score.overall - a.score.overall);
|
||||
|
||||
console.log(`\n🥇 OVERALL RANKINGS:`);
|
||||
scores.forEach((score, index) => {
|
||||
console.log(` ${index + 1}. ${score.model.name}: ${(score.score.overall * 100).toFixed(1)}% overall`);
|
||||
});
|
||||
|
||||
console.log(`\n📊 DETAILED METRICS:`);
|
||||
|
||||
console.log(`\n 🎯 Search Performance (Precision@5):`);
|
||||
scores.forEach(score => {
|
||||
console.log(` ${score.model.name}: ${(score.score.components.precision5 * 100).toFixed(1)}%`);
|
||||
});
|
||||
|
||||
console.log(`\n 🔍 Search Performance (Recall@5):`);
|
||||
scores.forEach(score => {
|
||||
console.log(` ${score.model.name}: ${(score.score.components.recall5 * 100).toFixed(1)}%`);
|
||||
});
|
||||
|
||||
console.log(`\n 📈 Search Quality (NDCG@5):`);
|
||||
scores.forEach(score => {
|
||||
console.log(` ${score.model.name}: ${(score.score.components.ndcg5 * 100).toFixed(1)}%`);
|
||||
});
|
||||
|
||||
console.log(`\n 🧠 Semantic Understanding:`);
|
||||
scores.forEach(score => {
|
||||
console.log(` ${score.model.name}: ${(score.score.components.semanticAccuracy * 100).toFixed(1)}%`);
|
||||
});
|
||||
|
||||
console.log(`\n ⚡ Performance (req/s):`);
|
||||
scores.forEach(score => {
|
||||
console.log(` ${score.model.name}: ${score.score.components.throughput.toFixed(1)} req/s`);
|
||||
});
|
||||
|
||||
// Winner analysis
|
||||
const winner = scores[0];
|
||||
console.log(`\n🏆 WINNER: ${winner.model.name}`);
|
||||
console.log(` Overall Score: ${(winner.score.overall * 100).toFixed(1)}%`);
|
||||
console.log(` Best for: ${this.getBestUseCase(winner.score.components)}`);
|
||||
|
||||
// Summary stats
|
||||
const totalQueries = modelResults[0]?.results.searchPerformance.length || 0;
|
||||
const totalTools = this.tools.length;
|
||||
|
||||
console.log(`\n📋 Test Summary:`);
|
||||
console.log(` Tools tested: ${totalTools}`);
|
||||
console.log(` Search queries: ${totalQueries}`);
|
||||
console.log(` Models compared: ${scores.length}`);
|
||||
console.log(` Total API calls: ${modelResults.reduce((sum, mr) => sum + mr.results.totalApiCalls, 0)}`);
|
||||
}
|
||||
|
||||
getBestUseCase(components) {
|
||||
const strengths = [];
|
||||
if (components.precision5 > 0.7) strengths.push("High precision");
|
||||
if (components.recall5 > 0.7) strengths.push("High recall");
|
||||
if (components.semanticAccuracy > 0.8) strengths.push("Semantic understanding");
|
||||
if (components.throughput > 5) strengths.push("High performance");
|
||||
|
||||
return strengths.length > 0 ? strengths.join(", ") : "General purpose";
|
||||
}
|
||||
|
||||
async run() {
|
||||
try {
|
||||
console.log("🚀 EFFICIENT EMBEDDING MODEL COMPARISON");
|
||||
console.log("=====================================");
|
||||
|
||||
await this.loadConfig();
|
||||
await this.loadTools();
|
||||
|
||||
console.log(`\n📋 Test Overview:`);
|
||||
console.log(` Models: ${this.config.models.length}`);
|
||||
console.log(` Tools: ${this.tools.length}`);
|
||||
console.log(` Search queries: ${this.testQueries.length}`);
|
||||
console.log(` Cache: ${this.evaluator.cache.cacheDir}`);
|
||||
|
||||
const modelResults = [];
|
||||
|
||||
for (const model of this.config.models) {
|
||||
try {
|
||||
const results = await this.testModel(model);
|
||||
modelResults.push({ model, results });
|
||||
} catch (error) {
|
||||
console.error(`❌ Skipping ${model.name}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (modelResults.length === 0) {
|
||||
throw new Error('No models completed testing successfully');
|
||||
}
|
||||
|
||||
this.printResults(modelResults);
|
||||
|
||||
} catch (error) {
|
||||
console.error('\n❌ Test failed:', error.message);
|
||||
console.log('\nDebugging steps:');
|
||||
console.log('1. Verify tools.yaml exists and contains valid tool data');
|
||||
console.log('2. Check model endpoints are accessible');
|
||||
console.log('3. For Ollama: ensure models are pulled and ollama serve is running');
|
||||
console.log('4. For Mistral: verify AI_EMBEDDINGS_API_KEY environment variable');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Execute
|
||||
const configArg = process.argv.find(arg => arg.startsWith('--config='));
|
||||
const configPath = configArg ? configArg.split('=')[1] : './embedding-test-config.json';
|
||||
|
||||
(async () => {
|
||||
const comparison = new EfficientEmbeddingComparison(configPath);
|
||||
await comparison.run();
|
||||
})().catch(console.error);
|
||||
333
find-duplicates.mjs
Normal file
333
find-duplicates.mjs
Normal file
@@ -0,0 +1,333 @@
|
||||
#!/usr/bin/env node
|
||||
// find-duplicate-functions.mjs
|
||||
// Usage:
|
||||
// node find-duplicate-functions.mjs [rootDir] [--mode exact|struct] [--min-lines N] [--json]
|
||||
// Example:
|
||||
// node find-duplicate-functions.mjs . --mode struct --min-lines 3
|
||||
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import * as url from "url";
|
||||
import ts from "typescript";
|
||||
|
||||
const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
|
||||
|
||||
/** -------- CLI OPTIONS -------- */
|
||||
const args = process.argv.slice(2);
|
||||
let rootDir = ".";
|
||||
let mode = "struct"; // "exact" | "struct"
|
||||
let minLines = 3;
|
||||
let outputJson = false;
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const a = args[i];
|
||||
if (!a.startsWith("--") && rootDir === ".") {
|
||||
rootDir = a;
|
||||
} else if (a === "--mode") {
|
||||
mode = (args[++i] || "struct").toLowerCase();
|
||||
if (!["exact", "struct"].includes(mode)) {
|
||||
console.error("Invalid --mode. Use 'exact' or 'struct'.");
|
||||
process.exit(1);
|
||||
}
|
||||
} else if (a === "--min-lines") {
|
||||
minLines = parseInt(args[++i] || "3", 10);
|
||||
} else if (a === "--json") {
|
||||
outputJson = true;
|
||||
}
|
||||
}
|
||||
|
||||
/** -------- FILE DISCOVERY -------- */
|
||||
const DEFAULT_IGNORES = new Set([
|
||||
"node_modules",
|
||||
".git",
|
||||
".next",
|
||||
".vercel",
|
||||
"dist",
|
||||
"build",
|
||||
".astro", // Astro's generated cache dir
|
||||
]);
|
||||
|
||||
const VALID_EXTS = new Set([".ts", ".tsx", ".astro", ".mts", ".cts"]);
|
||||
|
||||
function walk(dir) {
|
||||
/** @type {string[]} */
|
||||
const out = [];
|
||||
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
||||
for (const e of entries) {
|
||||
const p = path.join(dir, e.name);
|
||||
if (e.isDirectory()) {
|
||||
if (DEFAULT_IGNORES.has(e.name)) continue;
|
||||
out.push(...walk(p));
|
||||
} else if (e.isFile() && VALID_EXTS.has(path.extname(e.name))) {
|
||||
out.push(p);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/** -------- ASTRO CODE EXTRACTION --------
|
||||
* Extract TS/JS code from:
|
||||
* - frontmatter: --- ... ---
|
||||
* - <script ...> ... </script>
|
||||
*/
|
||||
function extractCodeFromAstro(source) {
|
||||
/** @type {{code:string, offset:number}[]} */
|
||||
const blocks = [];
|
||||
|
||||
// Frontmatter (must be at top in Astro)
|
||||
// Match the FIRST pair of --- ... ---
|
||||
const fm = source.startsWith("---")
|
||||
? (() => {
|
||||
const end = source.indexOf("\n---", 3);
|
||||
if (end !== -1) {
|
||||
const front = source.slice(3, end + 1); // include trailing \n
|
||||
return { start: 0, end: end + 4, code: front };
|
||||
}
|
||||
return null;
|
||||
})()
|
||||
: null;
|
||||
if (fm) {
|
||||
// offset for line numbers is after the first '---\n'
|
||||
blocks.push({ code: fm.code, offset: 4 }); // rough; we’ll fix line numbers via positions later
|
||||
}
|
||||
|
||||
// <script ...> ... </script>
|
||||
const scriptRe = /<script\b[^>]*>([\s\S]*?)<\/script>/gi;
|
||||
let m;
|
||||
while ((m = scriptRe.exec(source))) {
|
||||
const code = m[1] || "";
|
||||
blocks.push({ code, offset: indexToLine(source, m.index) });
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
/** -------- UTIL: index -> 1-based line -------- */
|
||||
function indexToLine(text, idx) {
|
||||
let line = 1;
|
||||
for (let i = 0; i < idx && i < text.length; i++) {
|
||||
if (text.charCodeAt(i) === 10) line++;
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
/** -------- AST HELPERS -------- */
|
||||
function createSourceFile(virtualPath, code) {
|
||||
return ts.createSourceFile(
|
||||
virtualPath,
|
||||
code,
|
||||
ts.ScriptTarget.Latest,
|
||||
/*setParentNodes*/ true,
|
||||
virtualPath.endsWith(".tsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS
|
||||
);
|
||||
}
|
||||
|
||||
// Normalize AST to a structural signature string
|
||||
function structuralSignature(node) {
|
||||
/** @type {string[]} */
|
||||
const parts = [];
|
||||
const visit = (n) => {
|
||||
// Skip trivia: comments/whitespace are already not in AST
|
||||
const kindName = ts.SyntaxKind[n.kind] || `K${n.kind}`;
|
||||
switch (n.kind) {
|
||||
case ts.SyntaxKind.Identifier:
|
||||
parts.push("Id");
|
||||
return;
|
||||
case ts.SyntaxKind.PrivateIdentifier:
|
||||
parts.push("PrivId");
|
||||
return;
|
||||
case ts.SyntaxKind.StringLiteral:
|
||||
case ts.SyntaxKind.NoSubstitutionTemplateLiteral:
|
||||
case ts.SyntaxKind.TemplateHead:
|
||||
case ts.SyntaxKind.TemplateMiddle:
|
||||
case ts.SyntaxKind.TemplateTail:
|
||||
parts.push("Str");
|
||||
return;
|
||||
case ts.SyntaxKind.NumericLiteral:
|
||||
parts.push("Num");
|
||||
return;
|
||||
case ts.SyntaxKind.TrueKeyword:
|
||||
case ts.SyntaxKind.FalseKeyword:
|
||||
parts.push("Bool");
|
||||
return;
|
||||
case ts.SyntaxKind.NullKeyword:
|
||||
case ts.SyntaxKind.UndefinedKeyword:
|
||||
parts.push("Nil");
|
||||
return;
|
||||
case ts.SyntaxKind.PropertyAssignment:
|
||||
case ts.SyntaxKind.ShorthandPropertyAssignment:
|
||||
case ts.SyntaxKind.MethodDeclaration:
|
||||
case ts.SyntaxKind.MethodSignature:
|
||||
parts.push("Prop");
|
||||
break;
|
||||
default:
|
||||
parts.push(kindName);
|
||||
}
|
||||
n.forEachChild(visit);
|
||||
};
|
||||
visit(node);
|
||||
return parts.join("|");
|
||||
}
|
||||
|
||||
function getFunctionInfo(sf, filePath) {
|
||||
/** @type {Array<{
|
||||
name: string,
|
||||
bodyText: string,
|
||||
structKey: string,
|
||||
start: number,
|
||||
end: number,
|
||||
startLine: number,
|
||||
endLine: number
|
||||
}>} */
|
||||
const out = [];
|
||||
|
||||
const addFunc = (nameNode, bodyNode) => {
|
||||
if (!bodyNode) return;
|
||||
const bodyText = bodyNode.getText(sf).trim();
|
||||
const start = bodyNode.getStart(sf);
|
||||
const end = bodyNode.getEnd();
|
||||
const { line: startLine } = sf.getLineAndCharacterOfPosition(start);
|
||||
const { line: endLine } = sf.getLineAndCharacterOfPosition(end);
|
||||
const name =
|
||||
nameNode && ts.isIdentifier(nameNode) ? nameNode.escapedText.toString() : "(anonymous)";
|
||||
|
||||
// min-lines filter
|
||||
const lines = bodyText.split(/\r?\n/).filter(Boolean);
|
||||
if (lines.length < minLines) return;
|
||||
|
||||
// structural signature from the body
|
||||
const structKey = structuralSignature(bodyNode);
|
||||
|
||||
out.push({
|
||||
name,
|
||||
bodyText,
|
||||
structKey,
|
||||
start,
|
||||
end,
|
||||
startLine: startLine + 1,
|
||||
endLine: endLine + 1,
|
||||
});
|
||||
};
|
||||
|
||||
const visit = (node) => {
|
||||
if (ts.isFunctionDeclaration(node) && node.body) {
|
||||
addFunc(node.name ?? null, node.body);
|
||||
} else if (
|
||||
ts.isFunctionExpression(node) ||
|
||||
ts.isArrowFunction(node)
|
||||
) {
|
||||
// find name if it’s assigned: const foo = () => {}
|
||||
let name = null;
|
||||
if (node.parent && ts.isVariableDeclaration(node.parent) && node.parent.name) {
|
||||
name = node.parent.name;
|
||||
} else if (
|
||||
node.parent &&
|
||||
ts.isPropertyAssignment(node.parent) &&
|
||||
ts.isIdentifier(node.parent.name)
|
||||
) {
|
||||
name = node.parent.name;
|
||||
} else if (node.name) {
|
||||
name = node.name;
|
||||
}
|
||||
if (node.body) addFunc(name, node.body);
|
||||
} else if (ts.isMethodDeclaration(node) && node.body) {
|
||||
addFunc(node.name, node.body);
|
||||
}
|
||||
node.forEachChild(visit);
|
||||
};
|
||||
|
||||
visit(sf);
|
||||
return out;
|
||||
}
|
||||
|
||||
/** -------- MAIN SCAN -------- */
|
||||
const files = walk(path.resolve(process.cwd(), rootDir));
|
||||
|
||||
/** Maps from hash -> occurrences */
|
||||
const groups = new Map();
|
||||
/** Helper for exact hash */
|
||||
import crypto from "crypto";
|
||||
const exactHash = (text) => crypto.createHash("sha1").update(text.replace(/\s+/g, " ").trim()).digest("hex");
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const ext = path.extname(file).toLowerCase();
|
||||
const raw = fs.readFileSync(file, "utf8");
|
||||
|
||||
/** @type {Array<{virtualPath:string, code:string, lineOffset:number}>} */
|
||||
const codeUnits = [];
|
||||
|
||||
if (ext === ".astro") {
|
||||
const blocks = extractCodeFromAstro(raw);
|
||||
blocks.forEach((b, i) => {
|
||||
codeUnits.push({
|
||||
virtualPath: file + `#astro${i + 1}.ts`,
|
||||
code: b.code,
|
||||
lineOffset: b.offset || 1,
|
||||
});
|
||||
});
|
||||
} else {
|
||||
codeUnits.push({ virtualPath: file, code: raw, lineOffset: 1 });
|
||||
}
|
||||
|
||||
for (const { virtualPath, code, lineOffset } of codeUnits) {
|
||||
const sf = createSourceFile(virtualPath, code);
|
||||
const funcs = getFunctionInfo(sf, file);
|
||||
for (const f of funcs) {
|
||||
const key =
|
||||
mode === "exact" ? exactHash(f.bodyText) : crypto.createHash("sha1").update(f.structKey).digest("hex");
|
||||
const item = {
|
||||
file,
|
||||
where:
|
||||
ext === ".astro"
|
||||
? `${path.relative(process.cwd(), file)}:${f.startLine + lineOffset - 1}-${f.endLine + lineOffset - 1}`
|
||||
: `${path.relative(process.cwd(), file)}:${f.startLine}-${f.endLine}`,
|
||||
name: f.name,
|
||||
lines: f.endLine - f.startLine + 1,
|
||||
preview: f.bodyText.split(/\r?\n/).slice(0, 5).join("\n") + (f.endLine - f.startLine + 1 > 5 ? "\n..." : ""),
|
||||
};
|
||||
if (!groups.has(key)) groups.set(key, []);
|
||||
groups.get(key).push(item);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`⚠️ Skipping ${file}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/** -------- REPORT -------- */
|
||||
const dupes = [...groups.entries()]
|
||||
.map(([key, arr]) => ({ key, items: arr }))
|
||||
.filter((g) => g.items.length > 1)
|
||||
.sort((a, b) => b.items.length - a.items.length);
|
||||
|
||||
if (outputJson) {
|
||||
console.log(JSON.stringify({ mode, minLines, groups: dupes }, null, 2));
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (dupes.length === 0) {
|
||||
console.log(`✅ No duplicate functions found (mode=${mode}, min-lines=${minLines}).`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
console.log(`\nFound ${dupes.length} duplicate group(s) (mode=${mode}, min-lines=${minLines}):\n`);
|
||||
dupes.forEach((g, i) => {
|
||||
console.log(`== Group ${i + 1} (${g.items.length} matches) ==`);
|
||||
const example = g.items[0];
|
||||
console.log(` Sample (${example.lines} lines) from ${example.where}${example.name ? ` [${example.name}]` : ""}`);
|
||||
console.log(" ---");
|
||||
console.log(indent(example.preview, " "));
|
||||
console.log(" ---");
|
||||
g.items.forEach((it) => {
|
||||
console.log(` • ${it.where}${it.name ? ` [${it.name}]` : ""} (${it.lines} lines)`);
|
||||
});
|
||||
console.log();
|
||||
});
|
||||
|
||||
function indent(s, pre) {
|
||||
return s
|
||||
.split("\n")
|
||||
.map((l) => pre + l)
|
||||
.join("\n");
|
||||
}
|
||||
11
package.json
11
package.json
@@ -10,15 +10,14 @@
|
||||
"astro": "astro"
|
||||
},
|
||||
"dependencies": {
|
||||
"@astrojs/node": "^9.3.0",
|
||||
"@aws-sdk/client-s3": "^3.864.0",
|
||||
"@aws-sdk/s3-request-presigner": "^3.864.0",
|
||||
"astro": "^5.12.3",
|
||||
"@astrojs/node": "^9.4.3",
|
||||
"astro": "^5.13.7",
|
||||
"cookie": "^1.0.2",
|
||||
"dotenv": "^16.4.5",
|
||||
"jose": "^5.2.0",
|
||||
"dotenv": "^16.6.1",
|
||||
"jose": "^5.10.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"simple-boost": "^2.0.2",
|
||||
"zod": "^3.25.76"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
@@ -1,5 +1,405 @@
|
||||
# Manuell hinzufügen
|
||||
# Video-Bereitstellung für ForensicPathways Knowledgebase
|
||||
|
||||
Hier müssen Videos, die eingebettet werden sollen, manuell abgespeichert werden.
|
||||
Da diese anders lizensiert sein können, sind sie nicht Bestandteil des Open-Source-Repositorys.
|
||||
Bei Bedarf bitte Kontakt aufnehmen mit mstoeck3@hs-mittweida.de.
|
||||
Videos müssen manuell in diesem Verzeichnis bereitgestellt werden, da sie aufgrund unterschiedlicher Lizenzierung nicht Bestandteil des Open-Source-Git-Repositorys sind.
|
||||
|
||||
## 🎥 Video-Quelle und Lizenzierung
|
||||
|
||||
**Video-Quelle:** https://cloud.cc24.dev/f/47971 (Interner Nextcloud-Share)
|
||||
**Kontakt bei Fragen:** mstoeck3@hs-mittweida.de
|
||||
|
||||
### Lizenzhinweise
|
||||
|
||||
- Videos können proprietäre Lizenzen haben
|
||||
- Nicht für öffentliche Redistribution geeignet
|
||||
- Nur für den internen Gebrauch in ForensicPathways
|
||||
- Urheberrechte beachten bei eigenen Video-Beiträgen
|
||||
|
||||
## 📁 Empfohlene Verzeichnisstruktur
|
||||
|
||||
```
|
||||
public/videos/
|
||||
├── tools/ # Tool-spezifische Tutorials
|
||||
│ ├── autopsy/
|
||||
│ │ ├── autopsy-installation.mp4
|
||||
│ │ ├── autopsy-basics.mp4
|
||||
│ │ └── autopsy-advanced-analysis.webm
|
||||
│ ├── volatility/
|
||||
│ │ ├── volatility-setup.mp4
|
||||
│ │ ├── volatility-pslist-demo.mp4
|
||||
│ │ └── volatility-malfind-tutorial.webm
|
||||
│ └── yara/
|
||||
│ ├── yara-rules-basics.mp4
|
||||
│ └── yara-advanced-hunting.mp4
|
||||
├── methods/ # Methodologie-Videos
|
||||
│ ├── timeline-analysis/
|
||||
│ │ ├── timeline-fundamentals.mp4
|
||||
│ │ └── timeline-correlation.webm
|
||||
│ ├── disk-imaging/
|
||||
│ │ ├── imaging-best-practices.mp4
|
||||
│ │ └── imaging-verification.mp4
|
||||
│ └── incident-response/
|
||||
│ ├── ir-methodology.mp4
|
||||
│ └── ir-documentation.webm
|
||||
├── concepts/ # Konzeptuelle Erklärungen
|
||||
│ ├── forensics-fundamentals/
|
||||
│ │ ├── hash-functions-explained.mp4
|
||||
│ │ ├── chain-of-custody.mp4
|
||||
│ │ └── evidence-handling.webm
|
||||
│ └── technical-concepts/
|
||||
│ ├── regex-patterns.mp4
|
||||
│ └── file-systems.webm
|
||||
└── shared/ # Übergreifende Inhalte
|
||||
├── nist-methodology.mp4
|
||||
├── legal-considerations.webm
|
||||
└── best-practices-overview.mp4
|
||||
```
|
||||
|
||||
## 🦊 Firefox-Kompatibilität (KRITISCH)
|
||||
|
||||
### **Wichtiger Hinweis**
|
||||
Videos **müssen** in Firefox-kompatiblen Formaten bereitgestellt werden, da das System automatische Firefox-Unterstützung implementiert. Nicht-kompatible Formate führen zu Fehlern!
|
||||
|
||||
### Unterstützte Formate
|
||||
|
||||
#### ✅ Empfohlene Formate (höchste Kompatibilität)
|
||||
|
||||
**MP4 (H.264/AVC + AAC):**
|
||||
```bash
|
||||
# Konvertierung mit ffmpeg
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libx264 \
|
||||
-c:a aac \
|
||||
-profile:v baseline \
|
||||
-level 3.0 \
|
||||
-movflags +faststart \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
**WebM (VP8/VP9 + Vorbis/Opus):**
|
||||
```bash
|
||||
# VP9 für beste Qualität
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libvpx-vp9 \
|
||||
-c:a libopus \
|
||||
-b:v 1M \
|
||||
-b:a 128k \
|
||||
output.webm
|
||||
|
||||
# VP8 für breitere Kompatibilität
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libvpx \
|
||||
-c:a libvorbis \
|
||||
-b:v 1M \
|
||||
-b:a 128k \
|
||||
output.webm
|
||||
```
|
||||
|
||||
#### ⚠️ Fallback-Format
|
||||
|
||||
**OGG Theora (für ältere Firefox-Versionen):**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libtheora \
|
||||
-c:a libvorbis \
|
||||
-b:v 1M \
|
||||
-b:a 128k \
|
||||
output.ogv
|
||||
```
|
||||
|
||||
### ❌ Nicht unterstützte Formate in Firefox
|
||||
|
||||
- **H.265/HEVC** (.mp4, .mov) - Wird nicht dekodiert
|
||||
- **AV1** (.mp4, .webm) - Eingeschränkte Unterstützung
|
||||
- **Proprietäre Codecs** (.wmv, .avi mit proprietären Codecs)
|
||||
- **Apple-spezifische Formate** (.mov mit ProRes, .m4v)
|
||||
|
||||
### Multi-Format-Bereitstellung
|
||||
|
||||
Für maximale Kompatibilität mehrere Formate bereitstellen:
|
||||
|
||||
```html
|
||||
<video title="Autopsy Installation Tutorial" controls>
|
||||
<source src="/videos/tools/autopsy/installation.mp4" type="video/mp4">
|
||||
<source src="/videos/tools/autopsy/installation.webm" type="video/webm">
|
||||
<source src="/videos/tools/autopsy/installation.ogv" type="video/ogg">
|
||||
<p>Ihr Browser unterstützt das Video-Element nicht.</p>
|
||||
</video>
|
||||
```
|
||||
|
||||
## 🔧 Video-Konvertierung und -Optimierung
|
||||
|
||||
### Qualitätsrichtlinien
|
||||
|
||||
#### Auflösung und Bitrate
|
||||
|
||||
**720p (empfohlen für Tutorials):**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-vf scale=1280:720 \
|
||||
-c:v libx264 \
|
||||
-b:v 2M \
|
||||
-c:a aac \
|
||||
-b:a 128k \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
**1080p (für detaillierte Demonstrationen):**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-vf scale=1920:1080 \
|
||||
-c:v libx264 \
|
||||
-b:v 4M \
|
||||
-c:a aac \
|
||||
-b:a 128k \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
**480p (mobile-optimiert):**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-vf scale=854:480 \
|
||||
-c:v libx264 \
|
||||
-b:v 1M \
|
||||
-c:a aac \
|
||||
-b:a 96k \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
### Optimierung für Web-Streaming
|
||||
|
||||
#### Fast Start für progressive Download
|
||||
```bash
|
||||
# Metadata an Dateianfang verschieben
|
||||
ffmpeg -i input.mp4 -c copy -movflags +faststart output.mp4
|
||||
```
|
||||
|
||||
#### Keyframe-Intervall optimieren
|
||||
```bash
|
||||
# Keyframes alle 2 Sekunden für bessere Suche
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libx264 \
|
||||
-g 60 \
|
||||
-keyint_min 60 \
|
||||
-sc_threshold 0 \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
### Batch-Konvertierung
|
||||
|
||||
**Alle Videos in einem Verzeichnis konvertieren:**
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# convert-all.sh
|
||||
for file in *.mov *.avi *.mkv; do
|
||||
if [ -f "$file" ]; then
|
||||
name=$(basename "$file" | cut -d. -f1)
|
||||
|
||||
# MP4 erstellen
|
||||
ffmpeg -i "$file" \
|
||||
-c:v libx264 \
|
||||
-c:a aac \
|
||||
-b:v 2M \
|
||||
-b:a 128k \
|
||||
-movflags +faststart \
|
||||
"${name}.mp4"
|
||||
|
||||
# WebM erstellen
|
||||
ffmpeg -i "$file" \
|
||||
-c:v libvpx-vp9 \
|
||||
-c:a libopus \
|
||||
-b:v 1.5M \
|
||||
-b:a 128k \
|
||||
"${name}.webm"
|
||||
fi
|
||||
done
|
||||
```
|
||||
|
||||
## 📊 Dateigröße und Performance
|
||||
|
||||
### Größenrichtlinien
|
||||
|
||||
**Streaming-optimiert:**
|
||||
- 720p: 5-15 MB/Minute
|
||||
- 1080p: 20-40 MB/Minute
|
||||
- 480p: 2-8 MB/Minute
|
||||
|
||||
**Maximale Dateigröße:**
|
||||
- Tutorial-Videos: < 100 MB
|
||||
- Kurze Demos: < 50 MB
|
||||
- Konzept-Erklärungen: < 30 MB
|
||||
|
||||
### Kompressionseinstellungen
|
||||
|
||||
**Ausgewogene Qualität/Größe:**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libx264 \
|
||||
-preset medium \
|
||||
-crf 23 \
|
||||
-c:a aac \
|
||||
-b:a 128k \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
**Hohe Kompression (kleinere Dateien):**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libx264 \
|
||||
-preset slow \
|
||||
-crf 28 \
|
||||
-c:a aac \
|
||||
-b:a 96k \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
## 🎬 Video-Thumbnail-Generierung
|
||||
|
||||
Automatische Thumbnail-Erstellung:
|
||||
|
||||
```bash
|
||||
# Thumbnail nach 10 Sekunden
|
||||
ffmpeg -i input.mp4 -ss 00:00:10 -vframes 1 -q:v 2 thumbnail.jpg
|
||||
|
||||
# Mehrere Thumbnails für Auswahl
|
||||
ffmpeg -i input.mp4 -vf fps=1/30 thumb_%03d.jpg
|
||||
```
|
||||
|
||||
Thumbnails speichern in:
|
||||
```
|
||||
public/images/video-thumbnails/
|
||||
├── autopsy-installation-thumb.jpg
|
||||
├── volatility-basics-thumb.jpg
|
||||
└── timeline-analysis-thumb.jpg
|
||||
```
|
||||
|
||||
## 🔍 Qualitätskontrolle
|
||||
|
||||
### Pre-Upload-Checkliste
|
||||
|
||||
**✅ Format-Kompatibilität:**
|
||||
- [ ] MP4 mit H.264/AVC Video-Codec
|
||||
- [ ] AAC Audio-Codec
|
||||
- [ ] Fast Start aktiviert (`movflags +faststart`)
|
||||
- [ ] Keyframe-Intervall ≤ 2 Sekunden
|
||||
|
||||
**✅ Firefox-Test:**
|
||||
- [ ] Video lädt in Firefox ohne Fehler
|
||||
- [ ] Audio synchron mit Video
|
||||
- [ ] Controls funktionieren
|
||||
- [ ] Seeking funktioniert flüssig
|
||||
|
||||
**✅ Technische Qualität:**
|
||||
- [ ] Auflösung angemessen (720p+ für GUI-Demos)
|
||||
- [ ] Audio klar und verständlich
|
||||
- [ ] Keine Kompressionsartefakte
|
||||
- [ ] Dateigröße < 100 MB
|
||||
|
||||
**✅ Inhaltliche Qualität:**
|
||||
- [ ] Beschreibender Dateiname
|
||||
- [ ] Angemessene Länge (< 10 Minuten für Tutorials)
|
||||
- [ ] Klare Demonstration der Funktionalität
|
||||
- [ ] Sichtbare UI-Elemente
|
||||
|
||||
### Automated Testing
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# video-check.sh - Basis-Validierung
|
||||
for video in public/videos/**/*.mp4; do
|
||||
echo "Checking: $video"
|
||||
|
||||
# Format prüfen
|
||||
format=$(ffprobe -v quiet -select_streams v:0 -show_entries stream=codec_name -of csv=p=0 "$video")
|
||||
if [ "$format" != "h264" ]; then
|
||||
echo "❌ Wrong codec: $format (should be h264)"
|
||||
fi
|
||||
|
||||
# Dateigröße prüfen
|
||||
size=$(stat -c%s "$video")
|
||||
if [ $size -gt 104857600 ]; then # 100MB
|
||||
echo "⚠️ Large file: $(($size / 1048576))MB"
|
||||
fi
|
||||
|
||||
echo "✅ $video validated"
|
||||
done
|
||||
```
|
||||
|
||||
## 🚨 Troubleshooting
|
||||
|
||||
### Häufige Firefox-Probleme
|
||||
|
||||
**Problem: Video lädt nicht**
|
||||
```
|
||||
Lösung:
|
||||
1. Codec überprüfen: ffprobe -v quiet -show_format -show_streams video.mp4
|
||||
2. Fallback-Format hinzufügen
|
||||
3. Fast Start aktivieren
|
||||
```
|
||||
|
||||
**Problem: Audio/Video out of sync**
|
||||
```
|
||||
Lösung:
|
||||
ffmpeg -i input.mp4 -c:v copy -c:a aac -avoid_negative_ts make_zero output.mp4
|
||||
```
|
||||
|
||||
**Problem: Seeking funktioniert nicht**
|
||||
```
|
||||
Lösung:
|
||||
ffmpeg -i input.mp4 -c copy -movflags +faststart output.mp4
|
||||
```
|
||||
|
||||
### Performance-Probleme
|
||||
|
||||
**Problem: Lange Ladezeiten**
|
||||
```
|
||||
Lösungsansätze:
|
||||
1. Bitrate reduzieren
|
||||
2. Auflösung verringern
|
||||
3. Keyframe-Intervall optimieren
|
||||
4. Progressive Download aktivieren
|
||||
```
|
||||
|
||||
**Problem: Hohe Bandbreiten-Nutzung**
|
||||
```
|
||||
Lösungsansätze:
|
||||
1. Adaptive Streaming implementieren
|
||||
2. Multiple Qualitätsstufen bereitstellen
|
||||
3. Preload="metadata" verwenden
|
||||
```
|
||||
|
||||
## 📋 Deployment-Checkliste
|
||||
|
||||
**Nach Video-Upload:**
|
||||
|
||||
1. **✅ Dateistruktur prüfen**
|
||||
```bash
|
||||
ls -la public/videos/tools/autopsy/
|
||||
```
|
||||
|
||||
2. **✅ Permissions setzen**
|
||||
```bash
|
||||
chmod 644 public/videos/**/*.mp4
|
||||
```
|
||||
|
||||
3. **✅ Artikel-Verlinkung testen**
|
||||
- Video-Tags in Markdown funktionieren
|
||||
- Responsive Container werden generiert
|
||||
- Thumbnails laden korrekt
|
||||
|
||||
4. **✅ Browser-Kompatibilität**
|
||||
- Firefox: Codec-Support prüfen
|
||||
- Chrome: Performance testen
|
||||
- Safari: Fallback-Formate testen
|
||||
- Mobile: Touch-Controls funktionieren
|
||||
|
||||
5. **✅ Build-System**
|
||||
```bash
|
||||
npm run build
|
||||
# Keine Video-bezogenen Fehler in Console
|
||||
```
|
||||
|
||||
Bei Problemen kontaktieren Sie mstoeck3@hs-mittweida.de mit:
|
||||
- Browser und Version
|
||||
- Video-Dateiname und -pfad
|
||||
- Fehlermeldungen aus Browser-Console
|
||||
- Screenshot des Problems
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
---
|
||||
// src/components/ContributionButton.astro - CLEANED: Removed duplicate auth script
|
||||
// src/components/ContributionButton.astro
|
||||
export interface Props {
|
||||
type: 'edit' | 'new' | 'write';
|
||||
toolName?: string;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
import { createToolSlug } from '../utils/toolHelpers.js';
|
||||
import { createToolSlug } from '../utils/clientUtils.js';
|
||||
|
||||
export interface Props {
|
||||
toolName: string;
|
||||
|
||||
@@ -4,7 +4,6 @@ import { getToolsData } from '../utils/dataService.js';
|
||||
const data = await getToolsData();
|
||||
const scenarios = data.scenarios || [];
|
||||
|
||||
// Configuration
|
||||
const maxDisplayed = 9;
|
||||
const displayedScenarios = scenarios.slice(0, maxDisplayed);
|
||||
---
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
---
|
||||
//src/components/ToolFilters.astro
|
||||
import { getToolsData } from '../utils/dataService.js';
|
||||
|
||||
const data = await getToolsData();
|
||||
@@ -54,7 +55,7 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
|
||||
<!-- Semantic Search Toggle - Inline -->
|
||||
<div id="semantic-search-container" class="semantic-search-inline hidden">
|
||||
<label class="semantic-toggle-wrapper" title="Semantische Suche verwendet Embeddings. Dadurch kann mit natürlicher Sprache/Begriffen gesucht werden, die Ergebnisse richten sich nach der euklidischen Distanz.">
|
||||
<label class="semantic-toggle-wrapper" title="Semantische Suche verwendet Embeddings. Dadurch kann mit natürlicher Sprache/Begriffen gesucht werden, die Ergebnisse richten sich nach der cosinus-Distanz.">
|
||||
<input type="checkbox" id="semantic-search-enabled" disabled/>
|
||||
<div class="semantic-checkbox-custom"></div>
|
||||
<span class="semantic-toggle-label">
|
||||
@@ -305,8 +306,7 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
</div>
|
||||
|
||||
<script define:vars={{ toolsData: data.tools, tagFrequency, sortedTags }}>
|
||||
window.toolsData = toolsData;
|
||||
|
||||
window.toolsData = toolsData;
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
const elements = {
|
||||
searchInput: document.getElementById('search-input'),
|
||||
@@ -358,7 +358,7 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
try {
|
||||
const res = await fetch('/api/ai/embeddings-status');
|
||||
const { embeddings } = await res.json();
|
||||
semanticSearchAvailable = embeddings?.enabled && embeddings?.initialized;
|
||||
semanticSearchAvailable = embeddings?.initialized;
|
||||
|
||||
if (semanticSearchAvailable) {
|
||||
elements.semanticContainer.classList.remove('hidden');
|
||||
@@ -392,6 +392,13 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function isToolHosted(tool) {
|
||||
return tool.projectUrl !== undefined &&
|
||||
tool.projectUrl !== null &&
|
||||
tool.projectUrl !== "" &&
|
||||
tool.projectUrl.trim() !== "";
|
||||
}
|
||||
|
||||
function toggleCollapsible(toggleBtn, content, storageKey) {
|
||||
const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true';
|
||||
@@ -432,13 +439,6 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
}
|
||||
}
|
||||
|
||||
function isToolHosted(tool) {
|
||||
return tool.projectUrl !== undefined &&
|
||||
tool.projectUrl !== null &&
|
||||
tool.projectUrl !== "" &&
|
||||
tool.projectUrl.trim() !== "";
|
||||
}
|
||||
|
||||
function initTagCloud() {
|
||||
const visibleCount = 20;
|
||||
elements.tagCloudItems.forEach((item, index) => {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
//src/components/ToolMatrix.astro
|
||||
import { getToolsData } from '../utils/dataService.js';
|
||||
import ShareButton from './ShareButton.astro';
|
||||
|
||||
const data = await getToolsData();
|
||||
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
---
|
||||
// src/components/Video.astro - SIMPLE responsive video component
|
||||
export interface Props {
|
||||
src: string;
|
||||
title?: string;
|
||||
controls?: boolean;
|
||||
autoplay?: boolean;
|
||||
muted?: boolean;
|
||||
loop?: boolean;
|
||||
aspectRatio?: '16:9' | '4:3' | '1:1';
|
||||
preload?: 'none' | 'metadata' | 'auto';
|
||||
}
|
||||
|
||||
const {
|
||||
src,
|
||||
title = 'Video',
|
||||
controls = true,
|
||||
autoplay = false,
|
||||
muted = false,
|
||||
loop = false,
|
||||
aspectRatio = '16:9',
|
||||
preload = 'metadata'
|
||||
} = Astro.props;
|
||||
|
||||
const aspectClass = `aspect-${aspectRatio.replace(':', '-')}`;
|
||||
---
|
||||
|
||||
<div class={`video-container ${aspectClass}`}>
|
||||
<video
|
||||
src={src}
|
||||
controls={controls}
|
||||
autoplay={autoplay}
|
||||
muted={muted}
|
||||
loop={loop}
|
||||
preload={preload}
|
||||
style="width: 100%; height: 100%;"
|
||||
data-video-title={title}
|
||||
>
|
||||
<p>Your browser does not support the video element.</p>
|
||||
</video>
|
||||
{title !== 'Video' && (
|
||||
<div class="video-metadata">
|
||||
<div class="video-title">{title}</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
@@ -1,203 +1,263 @@
|
||||
// src/config/prompts.ts - Enhanced with phase completion reasoning
|
||||
// src/config/prompts.ts
|
||||
|
||||
const RELEVANCE_RUBRIC = `
|
||||
TASK RELEVANCE (INTEGER 0–100, NO %):
|
||||
- 55–65 = Basis/ok
|
||||
- 66–75 = Gut geeignet
|
||||
- 76–85 = Sehr gut geeignet
|
||||
- >85 = Nur bei nahezu perfekter Übereinstimmung
|
||||
`.trim();
|
||||
|
||||
const STRICTNESS = `
|
||||
STRICTNESS:
|
||||
- Output MUST be pure JSON (no prose, no code fences, no trailing commas).
|
||||
- Use EXACT item names as provided (casing/spelling must match).
|
||||
- Do NOT invent items or fields. If unsure, select fewer.
|
||||
`.trim();
|
||||
|
||||
export const AI_PROMPTS = {
|
||||
|
||||
toolSelection: (mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number) => {
|
||||
const modeInstruction = mode === 'workflow'
|
||||
? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
|
||||
: 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
|
||||
enhancementQuestions: (input: string) => {
|
||||
return `Sie sind DFIR-Experte. Ein Nutzer beschreibt unten ein Szenario/Problem.
|
||||
|
||||
return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.
|
||||
ZIEL:
|
||||
- Stellen Sie NUR dann 1–3 präzise Rückfragen, wenn entscheidende forensische Lücken die weitere Analyse/Toolauswahl PHASENREIHENFOLGE oder EVIDENCE-STRATEGIE wesentlich beeinflussen würden.
|
||||
- Wenn ausreichend abgedeckt: Geben Sie eine leere Liste [] zurück.
|
||||
|
||||
AUSWAHLMETHODE: ${selectionMethod}
|
||||
${selectionMethod === 'embeddings_candidates' ?
|
||||
'✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe' :
|
||||
'✓ Vollständige Datenbank verfügbar\n✓ Wähle die relevantesten Items'}
|
||||
PRIORITÄT DER THEMEN (in dieser Reihenfolge prüfen):
|
||||
1) Available Evidence & Artefakte (z.B. RAM-Dump, Disk-Image, Logs, PCAP, Registry, Cloud/Audit-Logs)
|
||||
2) Scope/Systems (konkrete Plattformen/Assets/Identitäten/Netzsegmente)
|
||||
3) Investigation Objectives (Ziele: IOC-Extraktion, Timeline, Impact, Attribution)
|
||||
4) Timeline/Timeframe (kritische Zeitfenster, Erhalt flüchtiger Daten)
|
||||
5) Legal & Compliance (Chain of Custody, Aufbewahrung, DSGVO/Branchenvorgaben)
|
||||
6) Technical Constraints (Ressourcen, Zugriffsrechte, Tooling/EDR)
|
||||
|
||||
FRAGEN-QUALITÄT:
|
||||
- Forensisch spezifisch und entscheidungsrelevant (keine Allgemeinplätze).
|
||||
- Eine Frage pro Thema, keine Dopplungen.
|
||||
- Antwortbar vom Nutzer (keine Spekulation, keine “Beweise senden”-Aufforderungen).
|
||||
- Maximal 18 Wörter, endet mit "?".
|
||||
|
||||
VALIDIERUNG:
|
||||
- Stellen Sie NUR Fragen zu Themen, die im Nutzertext NICHT hinreichend konkret beantwortet sind (keine Wiederholung bereits gegebener Details).
|
||||
- Wenn alle priorisierten Themen ausreichend sind → [].
|
||||
|
||||
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
|
||||
[
|
||||
"präzise Frage 1?",
|
||||
"präzise Frage 2?",
|
||||
"präzise Frage 3?"
|
||||
]
|
||||
|
||||
NUTZER-EINGABE:
|
||||
${input}`.trim();
|
||||
},
|
||||
toolSelection: (mode: string, userQuery: string, maxSelectedItems: number) => {
|
||||
const modeInstruction =
|
||||
mode === 'workflow'
|
||||
? 'Workflow mit 15–25 Items über alle Phasen. Pflicht: ~40% Methoden, Rest Software/Konzepte (falls verfügbar).'
|
||||
: 'Spezifische Lösung mit 4–10 Items. Pflicht: ≥30% Methoden (falls verfügbar).';
|
||||
|
||||
return `Du bist DFIR-Experte. Wähle die BESTEN Items aus dem bereits semantisch vorgefilterten Set für die konkrete Aufgabe.
|
||||
|
||||
${modeInstruction}
|
||||
|
||||
ANFRAGE: "${userQuery}"
|
||||
|
||||
VERFÜGBARE ITEM-TYPEN:
|
||||
- TOOLS (type: "software"/"method") → praktische Anwendungen und Vorgehensweisen
|
||||
- KONZEPTE (type: "concept") → theoretisches Wissen und Methodiken
|
||||
ITEM-TYPEN:
|
||||
- TOOLS (type: "software" | "method")
|
||||
- KONZEPTE (type: "concept")
|
||||
|
||||
AUSWAHLSTRATEGIE:
|
||||
1. **ERSTE PRIORITÄT: Relevanz zur Anfrage**
|
||||
- Direkt anwendbar auf das Problem
|
||||
- Löst die Kernherausforderung
|
||||
|
||||
2. **ZWEITE PRIORITÄT: Ausgewogene Mischung**
|
||||
- Tools/Methoden für praktische Umsetzung → selectedTools
|
||||
- Konzepte für methodisches Verständnis → selectedConcepts
|
||||
- WICHTIG: Auch Konzepte auswählen, nicht nur Tools!
|
||||
|
||||
3. **QUALITÄT > QUANTITÄT**
|
||||
- Lieber weniger perfekte Items als viele mittelmäßige
|
||||
- Jedes Item muss begründbar sein
|
||||
AUSWAHLPRINZIPIEN:
|
||||
1) Relevanz zur Anfrage (direkt anwendbar, adressiert Kernproblem)
|
||||
2) Ausgewogene Mischung (Praxis: selectedTools; Methodik: selectedConcepts)
|
||||
3) Qualität > Quantität (lieber weniger, dafür passgenau)
|
||||
4) Keine Erfindungen. Wenn etwas nicht passt, wähle weniger.
|
||||
|
||||
AUSWAHLREGELN:
|
||||
- Wähle ${mode === 'workflow' ? '15-25' : '4-10'} Items total, max ${maxSelectedItems}
|
||||
- BEIDE Arrays füllen: selectedTools UND selectedConcepts
|
||||
- Mindestens 1-2 Konzepte auswählen für methodische Fundierung
|
||||
- Tools: 40% Methoden (type="method"), Rest Software (type="software")
|
||||
- Wähle ${mode === 'workflow' ? '15–25' : '4–10'} Items total (max ${maxSelectedItems})
|
||||
- Fülle BEIDE Arrays: selectedTools UND selectedConcepts
|
||||
- Mindestens 1–2 Konzepte (falls verfügbar)
|
||||
- Bevorzugt ~40% Methoden (Workflow) bzw. ≥30% Methoden (Tool-Modus), sofern vorhanden
|
||||
- Sortiere selectedTools grob nach Eignung (bestes zuerst)
|
||||
|
||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
|
||||
Skalenhinweis (für spätere Schritte – einheitlich):
|
||||
${RELEVANCE_RUBRIC}
|
||||
|
||||
${STRICTNESS}
|
||||
|
||||
ANTWORT (NUR JSON):
|
||||
{
|
||||
"selectedTools": ["ToolName1", "MethodName1", ...],
|
||||
"selectedConcepts": ["ConceptName1", "ConceptName2", ...],
|
||||
"reasoning": "Kurze Begründung mit Erwähnung der Tool/Konzept-Balance"
|
||||
"selectedTools": ["ToolName1", "MethodName1", "..."],
|
||||
"selectedConcepts": ["ConceptName1", "ConceptName2", "..."],
|
||||
"reasoning": "Sehr kurz: Balance/Abdeckung begründen"
|
||||
}`;
|
||||
},
|
||||
|
||||
toolSelectionWithData: (basePrompt: string, toolsToSend: any[], conceptsToSend: any[]) => {
|
||||
return `${basePrompt}
|
||||
|
||||
VERFÜGBARE TOOLS (${toolsToSend.length} Items - Methoden und Software):
|
||||
VERFÜGBARE TOOLS (${toolsToSend.length}):
|
||||
${JSON.stringify(toolsToSend, null, 2)}
|
||||
|
||||
VERFÜGBARE KONZEPTE (${conceptsToSend.length} Items - theoretisches Wissen):
|
||||
VERFÜGBARE KONZEPTE (${conceptsToSend.length}):
|
||||
${JSON.stringify(conceptsToSend, null, 2)}
|
||||
|
||||
WICHTIGER HINWEIS: Wähle sowohl aus TOOLS als auch aus KONZEPTEN aus! Konzepte sind essentiell für methodische Fundierung.`;
|
||||
WICHTIG:
|
||||
- Wähle nur aus obigen Listen. Keine neuen Namen.
|
||||
- Nutze exakte Namen. Keine Synonyme/Varianten.
|
||||
|
||||
Hinweis zur einheitlichen Relevanz-Skala:
|
||||
${RELEVANCE_RUBRIC}
|
||||
|
||||
${STRICTNESS}`;
|
||||
},
|
||||
|
||||
scenarioAnalysis: (isWorkflow: boolean, userQuery: string) => {
|
||||
const analysisType = isWorkflow ? 'Szenario' : 'Problem';
|
||||
const focus = isWorkflow ?
|
||||
'Angriffsvektoren, betroffene Systeme, Zeitkritikalität' :
|
||||
'Kernherausforderung, verfügbare Daten, methodische Anforderungen';
|
||||
const focus = isWorkflow
|
||||
? 'Angriffsvektoren, betroffene Systeme, Zeitkritikalität'
|
||||
: 'Kernherausforderung, verfügbare Daten, methodische Anforderungen';
|
||||
|
||||
return `DFIR-Experte: Analysiere das ${analysisType}.
|
||||
|
||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
|
||||
|
||||
Fokus: ${focus}
|
||||
|
||||
Antwort: Fließtext ohne Listen, max 100 Wörter.`;
|
||||
Antwort: Fließtext, max 100 Wörter. Keine Liste, keine Einleitung.`;
|
||||
},
|
||||
|
||||
investigationApproach: (isWorkflow: boolean, userQuery: string) => {
|
||||
const approachType = isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz';
|
||||
const focus = isWorkflow ?
|
||||
'Triage-Prioritäten, Phasenabfolge, Kontaminationsvermeidung' :
|
||||
'Methodenauswahl, Validierung, Integration';
|
||||
const focus = isWorkflow
|
||||
? 'Triage-Prioritäten, Phasenabfolge, Kontaminationsvermeidung'
|
||||
: 'Methodenauswahl, Validierung, Integration';
|
||||
|
||||
return `Entwickle einen ${approachType}.
|
||||
|
||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
|
||||
|
||||
Fokus: ${focus}
|
||||
|
||||
Antwort: Fließtext ohne Listen, max 100 Wörter.`;
|
||||
Antwort: Fließtext, max 100 Wörter.`;
|
||||
},
|
||||
|
||||
criticalConsiderations: (isWorkflow: boolean, userQuery: string) => {
|
||||
const focus = isWorkflow ?
|
||||
'Beweissicherung vs. Gründlichkeit, Chain of Custody' :
|
||||
'Tool-Validierung, False Positives/Negatives, Qualifikationen';
|
||||
const focus = isWorkflow
|
||||
? 'Beweissicherung vs. Gründlichkeit, Chain of Custody'
|
||||
: 'Tool-Validierung, False Positives/Negatives, Qualifikationen';
|
||||
|
||||
return `Identifiziere kritische Überlegungen.
|
||||
|
||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
|
||||
|
||||
Fokus: ${focus}
|
||||
|
||||
Antwort: Fließtext ohne Listen, max 100 Wörter.`;
|
||||
Antwort: Fließtext, max 100 Wörter.`;
|
||||
},
|
||||
|
||||
phaseToolSelection: (userQuery: string, phase: any, phaseTools: any[]) => {
|
||||
const methods = phaseTools.filter(t => t.type === 'method');
|
||||
const tools = phaseTools.filter(t => t.type === 'software');
|
||||
|
||||
|
||||
if (phaseTools.length === 0) {
|
||||
return `Keine Methoden/Tools für Phase "${phase.name}" verfügbar. Antworte mit leerem Array: []`;
|
||||
}
|
||||
|
||||
return `Du bist ein DFIR-Experte. Wähle die 2-3 BESTEN Items für Phase "${phase.name}".
|
||||
|
||||
return `Wähle die 2–3 BESTEN Items für Phase "${phase.name}".
|
||||
|
||||
SZENARIO: "${userQuery}"
|
||||
PHASE: ${phase.name} - ${phase.description || ''}
|
||||
PHASE: ${phase.name} — ${phase.description || ''}
|
||||
|
||||
VERFÜGBARE ITEMS (bereits von KI vorausgewählt):
|
||||
VERFÜGBARE ITEMS:
|
||||
${methods.length > 0 ? `
|
||||
METHODEN (${methods.length}):
|
||||
${methods.map((method: any) =>
|
||||
`- ${method.name}
|
||||
Typ: ${method.type}
|
||||
Beschreibung: ${method.description}
|
||||
Domains: ${method.domains?.join(', ') || 'N/A'}
|
||||
Skill Level: ${method.skillLevel}`
|
||||
${methods.map((m: any) =>
|
||||
`- ${m.name}
|
||||
Typ: ${m.type}
|
||||
Beschreibung: ${m.description}
|
||||
Domains: ${m.domains?.join(', ') || 'N/A'}
|
||||
Skill Level: ${m.skillLevel}`
|
||||
).join('\n\n')}
|
||||
` : 'Keine Methoden verfügbar'}
|
||||
|
||||
${tools.length > 0 ? `
|
||||
SOFTWARE TOOLS (${tools.length}):
|
||||
${tools.map((tool: any) =>
|
||||
`- ${tool.name}
|
||||
Typ: ${tool.type}
|
||||
Beschreibung: ${tool.description}
|
||||
Plattformen: ${tool.platforms?.join(', ') || 'N/A'}
|
||||
Skill Level: ${tool.skillLevel}`
|
||||
SOFTWARE (${tools.length}):
|
||||
${tools.map((t: any) =>
|
||||
`- ${t.name}
|
||||
Typ: ${t.type}
|
||||
Beschreibung: ${t.description}
|
||||
Plattformen: ${t.platforms?.join(', ') || 'N/A'}
|
||||
Skill Level: ${t.skillLevel}`
|
||||
).join('\n\n')}
|
||||
` : 'Keine Software-Tools verfügbar'}
|
||||
|
||||
AUSWAHLREGELN FÜR PHASE "${phase.name}":
|
||||
1. Wähle die 2-3 BESTEN Items für diese spezifische Phase
|
||||
2. Priorisiere Items, die DIREKT für "${phase.name}" relevant sind
|
||||
3. Mindestens 1 Methode wenn verfügbar, Rest Software-Tools
|
||||
4. Begründe WARUM jedes Item für diese Phase optimal ist
|
||||
REGELN:
|
||||
1) 2–3 Items, direkt phasenrelevant; mind. 1 Methode, falls verfügbar
|
||||
2) Begründung pro Item (präzise, anwendungsbezogen)
|
||||
3) Verwende EXAKTE Namen aus den Listen. Keine Erfindungen.
|
||||
|
||||
WICHTIG: Verwende EXAKT die Namen wie oben aufgelistet (ohne Präfixe wie M1./T2.)!
|
||||
${RELEVANCE_RUBRIC}
|
||||
|
||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB:
|
||||
${STRICTNESS}
|
||||
|
||||
ANTWORT (NUR JSON):
|
||||
[
|
||||
{
|
||||
"toolName": "Exakter Name aus der Liste oben",
|
||||
"taskRelevance": 85,
|
||||
"justification": "Detaillierte Begründung (60-80 Wörter) warum optimal für ${phase.name} - erkläre Anwendung, Vorteile und spezifische Relevanz",
|
||||
"limitations": ["Mögliche Einschränkung für diese Phase"]
|
||||
"toolName": "Exakter Name",
|
||||
"taskRelevance": 0,
|
||||
"justification": "60–80 Wörter zur phasenspezifischen Eignung",
|
||||
"limitations": ["Optionale spezifische Einschränkung"]
|
||||
}
|
||||
]`;
|
||||
},
|
||||
|
||||
toolEvaluation: (userQuery: string, tool: any, rank: number, taskRelevance: number) => {
|
||||
toolEvaluation: (userQuery: string, tool: any, rank: number) => {
|
||||
const itemType = tool.type === 'method' ? 'Methode' : 'Tool';
|
||||
|
||||
return `Erkläre die Anwendung dieser/dieses ${itemType}.
|
||||
|
||||
return `Bewerte diese/diesen ${itemType} ausschließlich bzgl. des PROBLEMS.
|
||||
|
||||
PROBLEM: "${userQuery}"
|
||||
${itemType.toUpperCase()}: ${tool.name} (${taskRelevance}% Eignung)
|
||||
${itemType.toUpperCase()}: ${tool.name}
|
||||
TYP: ${tool.type}
|
||||
|
||||
Bereits als Rang ${rank} bewertet.
|
||||
ANWEISUNGEN:
|
||||
- Nur vorhandene Metadaten nutzen (keine Annahmen, keine Websuche).
|
||||
- "taskRelevance" als GANZZAHL 0–100 nach einheitlicher Skala vergeben.
|
||||
- Realistische Scores i.d.R. 60–80, >85 nur bei nahezu perfektem Fit.
|
||||
- Keine Texte außerhalb des JSON.
|
||||
|
||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-STRUKTUR:
|
||||
${RELEVANCE_RUBRIC}
|
||||
${STRICTNESS}
|
||||
|
||||
ANTWORT (NUR JSON):
|
||||
{
|
||||
"detailed_explanation": "Warum und wie einsetzen",
|
||||
"implementation_approach": "Konkrete Schritte",
|
||||
"pros": ["Vorteil 1", "Vorteil 2"],
|
||||
"limitations": ["Einschränkung 1"],
|
||||
"alternatives": "Alternative Ansätze"
|
||||
"alternatives": "Kurz zu sinnvollen Alternativen",
|
||||
"taskRelevance": 0
|
||||
}`;
|
||||
},
|
||||
|
||||
backgroundKnowledgeSelection: (userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]) => {
|
||||
return `Wähle 2-4 relevante Konzepte.
|
||||
return `Wähle 2–4 Konzepte, die das Verständnis/den Einsatz der ausgewählten Tools verbessern.
|
||||
|
||||
${mode === 'workflow' ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
|
||||
AUSGEWÄHLTE TOOLS: ${selectedToolNames.join(', ')}
|
||||
|
||||
VERFÜGBARE KONZEPTE (${availableConcepts.length} KI-kuratiert):
|
||||
${availableConcepts.map((c: any) =>
|
||||
`- ${c.name}: ${c.description}...`
|
||||
).join('\n')}
|
||||
VERFÜGBARE KONZEPTE (${availableConcepts.length}):
|
||||
${availableConcepts.map((c: any) => `- ${c.name}: ${c.description}...`).join('\n')}
|
||||
|
||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-STRUKTUR:
|
||||
REGELN:
|
||||
- Nur Konzepte aus obiger Liste wählen.
|
||||
- Relevanz kurz und konkret begründen.
|
||||
|
||||
${STRICTNESS}
|
||||
|
||||
ANTWORT (NUR JSON):
|
||||
[
|
||||
{
|
||||
"conceptName": "Name",
|
||||
"relevance": "Warum kritisch für Methodik"
|
||||
"conceptName": "Exakter Name",
|
||||
"relevance": "Warum dieses Konzept hier methodisch wichtig ist"
|
||||
}
|
||||
]`;
|
||||
},
|
||||
@@ -209,27 +269,14 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-S
|
||||
tool: any,
|
||||
completionContext: string
|
||||
) => {
|
||||
return `Du bist ein DFIR-Experte. Erkläre warum dieses Tool nachträglich zur Vervollständigung hinzugefügt wurde.
|
||||
|
||||
KONTEXT DER NACHTRÄGLICHEN ERGÄNZUNG:
|
||||
- Ursprüngliche KI-Auswahl war zu spezifisch/eng gefasst
|
||||
- Phase "${phase.name}" war unterrepräsentiert in der initialen Auswahl
|
||||
- Semantische Suche fand zusätzlich relevante Tools für diese Phase
|
||||
- Tool wird nachträglich hinzugefügt um Vollständigkeit zu gewährleisten
|
||||
return `Begründe knapp die Nachergänzung für Phase "${phase.name}".
|
||||
|
||||
URSPRÜNGLICHE ANFRAGE: "${originalQuery}"
|
||||
PHASE ZU VERVOLLSTÄNDIGEN: ${phase.name} - ${phase.description || ''}
|
||||
PHASE: ${phase.name} — ${phase.description || ''}
|
||||
HINZUGEFÜGTES TOOL: ${selectedToolName} (${tool.type})
|
||||
TOOL-BESCHREIBUNG: ${tool.description}
|
||||
KONTEXT: ${completionContext}
|
||||
|
||||
BEGRÜNDUNGSKONTEXT: ${completionContext}
|
||||
|
||||
Erstelle eine präzise Begründung (max. 40 Wörter), die erklärt:
|
||||
1. WARUM dieses Tool nachträglich hinzugefügt wurde
|
||||
2. WIE es die ${phase.name}-Phase ergänzt
|
||||
3. DASS es die ursprünglich zu spezifische Auswahl erweitert
|
||||
|
||||
Antwort: Prägnanter Fließtext, knappe Begründung für Nachergänzung. Vermeide Begriffe wie "Das Tool" und gib keinen einleitenden Text wie "Begründung (40 Wörter):" an.`;
|
||||
Antwort: Prägnanter Fließtext, max 40 Wörter, keine Einleitung, keine Liste.`;
|
||||
},
|
||||
|
||||
generatePhaseCompletionPrompt(
|
||||
@@ -238,47 +285,48 @@ Antwort: Prägnanter Fließtext, knappe Begründung für Nachergänzung. Vermeid
|
||||
candidateTools: any[],
|
||||
candidateConcepts: any[]
|
||||
): string {
|
||||
return `Du bist ein DFIR-Experte. Die initiale KI-Auswahl war zu spezifisch - die Phase "${phase.name}" ist unterrepräsentiert.
|
||||
return `Unterrepräsentierte Phase: "${phase.name}". Ergänze 1–2 passende Items aus der semantischen Nachsuche.
|
||||
|
||||
KONTEXT: Die Hauptauswahl hat zu wenige Tools für "${phase.name}" identifiziert. Wähle jetzt ergänzende Tools aus semantischer Nachsuche.
|
||||
ORIGINALANFRAGE: "${originalQuery}"
|
||||
PHASE: ${phase.name} — ${phase.description || ''}
|
||||
|
||||
ORIGINAL ANFRAGE: "${originalQuery}"
|
||||
UNTERREPRÄSENTIERTE PHASE: ${phase.name} - ${phase.description || ''}
|
||||
|
||||
SEMANTISCH GEFUNDENE KANDIDATEN für Nachergänzung:
|
||||
|
||||
VERFÜGBARE TOOLS (${candidateTools.length}):
|
||||
${candidateTools.map((tool: any) => `
|
||||
- ${tool.name} (${tool.type})
|
||||
Beschreibung: ${tool.description}
|
||||
Skill Level: ${tool.skillLevel}
|
||||
KANDIDATEN — TOOLS (${candidateTools.length}):
|
||||
${candidateTools.map((t: any) => `
|
||||
- ${t.name} (${t.type})
|
||||
Beschreibung: ${t.description}
|
||||
Skill Level: ${t.skillLevel}
|
||||
`).join('')}
|
||||
|
||||
${candidateConcepts.length > 0 ? `
|
||||
VERFÜGBARE KONZEPTE (${candidateConcepts.length}):
|
||||
${candidateConcepts.map((concept: any) => `
|
||||
- ${concept.name}
|
||||
Beschreibung: ${concept.description}
|
||||
KANDIDATEN — KONZEPTE (${candidateConcepts.length}):
|
||||
${candidateConcepts.map((c: any) => `
|
||||
- ${c.name}
|
||||
Beschreibung: ${c.description}
|
||||
`).join('')}
|
||||
` : ''}
|
||||
|
||||
AUSWAHLREGELN FÜR NACHERGÄNZUNG:
|
||||
1. Wähle 1-2 BESTE Methoden/Tools die die ${phase.name}-Phase optimal ergänzen
|
||||
2. Methoden/Tools müssen für die ursprüngliche Anfrage relevant sein
|
||||
3. Ergänzen, nicht ersetzen - erweitere die zu spezifische Erstauswahl
|
||||
REGELN:
|
||||
- Wähle 1–2 Tools/Methoden, die ${phase.name} sinnvoll ergänzen (keine Ersetzung).
|
||||
- Nur aus obigen Kandidaten wählen; exakte Namen verwenden.
|
||||
- Kurze Begründung, warum diese Ergänzung nötig ist.
|
||||
|
||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
|
||||
Skalenhinweis (einheitlich):
|
||||
${RELEVANCE_RUBRIC}
|
||||
|
||||
${STRICTNESS}
|
||||
|
||||
ANTWORT (NUR JSON):
|
||||
{
|
||||
"selectedTools": ["ToolName1", "ToolName2"],
|
||||
"selectedConcepts": ["ConceptName1"],
|
||||
"completionReasoning": "Kurze Erklärung warum diese Nachergänzung für ${phase.name} notwendig war"
|
||||
"completionReasoning": "Kurze Erklärung zur Ergänzung der ${phase.name}-Phase"
|
||||
}`;
|
||||
},
|
||||
|
||||
finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => {
|
||||
const focus = isWorkflow ?
|
||||
'Workflow-Schritte, Best Practices, Objektivität' :
|
||||
'Methodische Überlegungen, Validierung, Qualitätssicherung';
|
||||
const focus = isWorkflow
|
||||
? 'Knappe Workflow-Schritte & Best Practices; neutral formulieren'
|
||||
: 'Methodische Überlegungen, Validierung, Qualitätssicherung';
|
||||
|
||||
return `Erstelle ${isWorkflow ? 'Workflow-Empfehlung' : 'methodische Überlegungen'}.
|
||||
|
||||
@@ -286,33 +334,31 @@ ${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
|
||||
AUSGEWÄHLT: ${selectedToolNames.join(', ')}${selectedToolNames.length > 5 ? '...' : ''}
|
||||
|
||||
Fokus: ${focus}
|
||||
|
||||
Antwort: Fließtext ohne Listen, max ${isWorkflow ? '100' : '80'} Wörter.`;
|
||||
Antwort: Fließtext, max ${isWorkflow ? '100' : '80'} Wörter. Keine Liste.`;
|
||||
}
|
||||
} as const;
|
||||
|
||||
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number): string;
|
||||
export function getPrompt(key: 'enhancementQuestions', input: string): string;
|
||||
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, maxSelectedItems: number): string;
|
||||
export function getPrompt(key: 'toolSelectionWithData', basePrompt: string, toolsToSend: any[], conceptsToSend: any[]): string;
|
||||
export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuery: string): string;
|
||||
export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;
|
||||
export function getPrompt(key: 'criticalConsiderations', isWorkflow: boolean, userQuery: string): string;
|
||||
export function getPrompt(key: 'phaseToolSelection', userQuery: string, phase: any, phaseTools: any[]): string;
|
||||
export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number, taskRelevance: number): string;
|
||||
export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number): string;
|
||||
export function getPrompt(key: 'backgroundKnowledgeSelection', userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]): string;
|
||||
export function getPrompt(key: 'phaseCompletionReasoning', originalQuery: string, phase: any, selectedToolName: string, tool: any, completionContext: string): string;
|
||||
export function getPrompt(key: 'finalRecommendations', isWorkflow: boolean, userQuery: string, selectedToolNames: string[]): string;
|
||||
export function getPrompt(key: 'generatePhaseCompletionPrompt', originalQuery: string, phase: any, candidateTools: any[], candidateConcepts: any[]): string;
|
||||
|
||||
export function getPrompt(promptKey: keyof typeof AI_PROMPTS, ...args: any[]): string {
|
||||
try {
|
||||
const promptFunction = AI_PROMPTS[promptKey];
|
||||
if (typeof promptFunction === 'function') {
|
||||
return (promptFunction as (...args: any[]) => string)(...args);
|
||||
} else {
|
||||
console.error(`[PROMPTS] Invalid prompt key: ${promptKey}`);
|
||||
return 'Error: Invalid prompt configuration';
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`[PROMPTS] Error generating prompt ${promptKey}:`, error);
|
||||
const f = AI_PROMPTS[promptKey];
|
||||
if (typeof f === 'function') return (f as (...a: any[]) => string)(...args);
|
||||
console.error(`[PROMPTS] Invalid prompt key: ${promptKey}`);
|
||||
return 'Error: Invalid prompt configuration';
|
||||
} catch (err) {
|
||||
console.error(`[PROMPTS] Error generating prompt ${promptKey}:`, err);
|
||||
return 'Error: Failed to generate prompt';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -121,9 +121,9 @@ vol -f memory.dmp windows.info
|
||||
vol -f memory.dmp windows.pslist
|
||||
```
|
||||
|
||||
## Video-Einbindung
|
||||
## Video-Demonstration
|
||||
|
||||
<video src="/path/to/video.mp4" title="Volatility Demo" controls></video>
|
||||
<video src="/videos/volatility-basics.mp4" title="Volatility Grundlagen Tutorial" controls preload="metadata"></video>
|
||||
|
||||
## Weiterführende Links
|
||||
|
||||
@@ -143,20 +143,181 @@ vol -f memory.dmp windows.pslist
|
||||
|
||||
### Video-Einbindung
|
||||
|
||||
Videos können direkt in Markdown eingebettet werden:
|
||||
Videos können direkt in Markdown eingebettet werden und werden automatisch mit responsiven Containern erweitert:
|
||||
|
||||
#### Basis-Video-Einbindung
|
||||
|
||||
```html
|
||||
<video src="/pfad/zum/video.mp4" title="Beschreibung" controls></video>
|
||||
<video src="/videos/demo.mp4" title="Tool-Demonstration" controls></video>
|
||||
```
|
||||
|
||||
Unterstützte Attribute:
|
||||
- `src`: Pfad zur Videodatei
|
||||
- `title`: Titel für Metadaten
|
||||
- `controls`: Zeigt Player-Steuerung
|
||||
- `autoplay`: Automatisches Abspielen
|
||||
- `muted`: Stummgeschaltet
|
||||
#### Vollständige Video-Konfiguration
|
||||
|
||||
```html
|
||||
<video
|
||||
src="/videos/advanced-tutorial.mp4"
|
||||
title="Erweiterte Analysefunktionen"
|
||||
controls
|
||||
preload="metadata"
|
||||
width="720"
|
||||
height="405"
|
||||
muted
|
||||
poster="/images/video-thumbnail.jpg"
|
||||
>
|
||||
<p>Ihr Browser unterstützt das Video-Element nicht.</p>
|
||||
</video>
|
||||
```
|
||||
|
||||
#### Unterstützte Video-Attribute
|
||||
|
||||
**Basis-Attribute:**
|
||||
- `src`: **Erforderlich** - Pfad zur Videodatei (relativ zu `/public/`)
|
||||
- `title`: **Empfohlen** - Beschreibung für Metadaten und Accessibility
|
||||
- `controls`: Zeigt Player-Steuerung (Standard-Empfehlung)
|
||||
|
||||
**Erweiterte Attribute:**
|
||||
- `autoplay`: Automatisches Abspielen (nicht empfohlen für UX)
|
||||
- `muted`: Stummgeschaltet (erforderlich für Autoplay in den meisten Browsern)
|
||||
- `loop`: Endlosschleife
|
||||
- `preload`: "none", "metadata", "auto"
|
||||
- `preload`: `"none"` | `"metadata"` | `"auto"` (Standard: `"metadata"`)
|
||||
- `poster`: Vorschaubild-URL
|
||||
- `width`/`height`: Feste Dimensionen (Optional, responsive Container anpasst sich automatisch)
|
||||
|
||||
**Accessibility-Attribute:**
|
||||
- `aria-label`: Alternative Beschreibung
|
||||
- `aria-describedby`: ID eines Elements mit detaillierter Beschreibung
|
||||
|
||||
#### iframe-Einbindung (YouTube, Vimeo, etc.)
|
||||
|
||||
```html
|
||||
<iframe
|
||||
src="https://www.youtube.com/embed/VIDEO_ID"
|
||||
title="YouTube-Tutorial: Forensic Analysis mit Tool XYZ"
|
||||
width="560"
|
||||
height="315"
|
||||
frameborder="0"
|
||||
allowfullscreen
|
||||
></iframe>
|
||||
```
|
||||
|
||||
**iframe-Attribute:**
|
||||
- `src`: **Erforderlich** - Embed-URL des Video-Dienstes
|
||||
- `title`: **Erforderlich** - Beschreibung für Accessibility
|
||||
- `width`/`height`: Empfohlene Dimensionen (werden responsive angepasst)
|
||||
- `frameborder`: Auf `"0"` setzen für modernen Look
|
||||
- `allowfullscreen`: Vollbild-Modus erlauben
|
||||
- `loading`: `"lazy"` für Performance-Optimierung
|
||||
|
||||
### Automatische Video-Verarbeitung
|
||||
|
||||
Das System erweitert Video-Tags automatisch:
|
||||
|
||||
**Input:**
|
||||
```html
|
||||
<video src="/videos/demo.mp4" title="Demo" controls></video>
|
||||
```
|
||||
|
||||
**Output (automatisch generiert):**
|
||||
```html
|
||||
<div class="video-container">
|
||||
<video
|
||||
src="/videos/demo.mp4"
|
||||
title="Demo"
|
||||
controls
|
||||
preload="metadata"
|
||||
data-video-title="Demo"
|
||||
>
|
||||
<p>Your browser does not support the video element.</p>
|
||||
</video>
|
||||
<div class="video-metadata">
|
||||
<div class="video-title">Demo</div>
|
||||
</div>
|
||||
</div>
|
||||
```
|
||||
|
||||
### Firefox-Kompatibilität
|
||||
|
||||
**Wichtiger Hinweis:** Videos müssen in Firefox-kompatiblen Formaten bereitgestellt werden:
|
||||
|
||||
#### Empfohlene Formate
|
||||
|
||||
**Primäre Formate (höchste Kompatibilität):**
|
||||
- **MP4 (H.264/AVC)**: `.mp4` - Beste Kompatibilität across Browser
|
||||
- **WebM (VP8/VP9)**: `.webm` - Moderne Browser, gute Kompression
|
||||
|
||||
**Sekundäre Formate:**
|
||||
- **OGG Theora**: `.ogv` - Fallback für ältere Firefox-Versionen
|
||||
|
||||
#### Format-Konvertierung
|
||||
|
||||
```bash
|
||||
# Mit ffmpeg zu Firefox-kompatiblem MP4 konvertieren
|
||||
ffmpeg -i input.mov -c:v libx264 -c:a aac -movflags +faststart output.mp4
|
||||
|
||||
# Mit ffmpeg zu WebM konvertieren
|
||||
ffmpeg -i input.mov -c:v libvpx-vp9 -c:a libvorbis output.webm
|
||||
|
||||
# Multi-Format-Bereitstellung
|
||||
<video title="Demo" controls>
|
||||
<source src="/videos/demo.mp4" type="video/mp4">
|
||||
<source src="/videos/demo.webm" type="video/webm">
|
||||
<p>Ihr Browser unterstützt das Video-Element nicht.</p>
|
||||
</video>
|
||||
```
|
||||
|
||||
#### Firefox-spezifische Probleme
|
||||
|
||||
Das System erkennt automatisch Firefox und implementiert Error-Recovery:
|
||||
|
||||
- **Automatische Fehlererkennung** für nicht unterstützte Formate
|
||||
- **Fallback-Mechanismen** bei Codec-Problemen
|
||||
- **Erweiterte Logging** für Debugging
|
||||
|
||||
**Bekannte Firefox-Probleme:**
|
||||
- H.265/HEVC nicht unterstützt
|
||||
- Proprietäre Codecs teilweise eingeschränkt
|
||||
- MIME-Type-Sensitivität höher als bei Chrome
|
||||
|
||||
### Video-Datei-Management
|
||||
|
||||
#### Dateistruktur
|
||||
|
||||
```
|
||||
public/
|
||||
├── videos/
|
||||
│ ├── tools/
|
||||
│ │ ├── autopsy-basics.mp4
|
||||
│ │ ├── volatility-tutorial.webm
|
||||
│ │ └── yara-rules-demo.mp4
|
||||
│ ├── methods/
|
||||
│ │ ├── timeline-analysis.mp4
|
||||
│ │ └── disk-imaging.mp4
|
||||
│ └── concepts/
|
||||
│ ├── hash-functions.mp4
|
||||
│ └── chain-custody.mp4
|
||||
└── images/
|
||||
└── video-thumbnails/
|
||||
├── autopsy-thumb.jpg
|
||||
└── volatility-thumb.jpg
|
||||
```
|
||||
|
||||
#### Dateigröße-Empfehlungen
|
||||
|
||||
- **Streaming-Qualität**: 5-15 MB/Minute (720p)
|
||||
- **High-Quality Tutorials**: 20-40 MB/Minute (1080p)
|
||||
- **Mobile-Optimiert**: 2-8 MB/Minute (480p)
|
||||
|
||||
#### Konventionen
|
||||
|
||||
**Dateinamen:**
|
||||
- Lowercase mit Bindestrichen: `tool-autopsy-installation.mp4`
|
||||
- Präfix nach Kategorie: `tool-`, `method-`, `concept-`
|
||||
- Beschreibender Suffix: `-basics`, `-advanced`, `-troubleshooting`
|
||||
|
||||
**Video-Titel:**
|
||||
- Beschreibend und suchfreundlich
|
||||
- Tool/Methode im Titel erwähnen
|
||||
- Skill-Level angeben: "Grundlagen", "Erweitert", "Expertenlevel"
|
||||
|
||||
### Code-Blöcke
|
||||
|
||||
@@ -173,10 +334,10 @@ import volatility.registry as registry
|
||||
|
||||
### Tabellen
|
||||
|
||||
| Plugin | Beschreibung | Beispiel |
|
||||
|--------|--------------|----------|
|
||||
| pslist | Prozesse auflisten | `vol -f dump.raw windows.pslist` |
|
||||
| malfind | Malware finden | `vol -f dump.raw windows.malfind` |
|
||||
| Plugin | Beschreibung | Video-Tutorial |
|
||||
|--------|--------------|----------------|
|
||||
| pslist | Prozesse auflisten | [Tutorial ansehen](/videos/pslist-demo.mp4) |
|
||||
| malfind | Malware finden | [Demo](/videos/malfind-basics.mp4) |
|
||||
|
||||
## Artikel-Typen
|
||||
|
||||
@@ -274,13 +435,20 @@ Das System validiert automatisch:
|
||||
- Broken Links werden geloggt (development)
|
||||
- Dateinamen-Präfixe helfen bei der Organisation und Verknüpfung
|
||||
|
||||
### Video-Validierung
|
||||
|
||||
- Dateipfade auf Existenz geprüft (development)
|
||||
- Format-Kompatibilität gewarnt
|
||||
- Firefox-spezifische Warnings bei problematischen Formaten
|
||||
|
||||
## Deployment
|
||||
|
||||
1. Artikel von Nextcloud-Share herunterladen: https://cloud.cc24.dev/f/47971
|
||||
2. Artikel in `src/content/knowledgebase/` ablegen (flache Struktur mit Präfixen)
|
||||
3. Frontmatter nach Schema überprüfen/anpassen
|
||||
4. Build-Prozess validiert automatisch
|
||||
5. Artikel erscheint in Knowledgebase-Übersicht
|
||||
2. Videos manuell in `public/videos/` bereitstellen (siehe `public/videos/README.md`)
|
||||
3. Artikel in `src/content/knowledgebase/` ablegen (flache Struktur mit Präfixen)
|
||||
4. Frontmatter nach Schema überprüfen/anpassen
|
||||
5. Build-Prozess validiert automatisch
|
||||
6. Artikel erscheint in Knowledgebase-Übersicht
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
@@ -295,9 +463,16 @@ Das System validiert automatisch:
|
||||
- Groß-/Kleinschreibung beachten
|
||||
|
||||
**Video lädt nicht:**
|
||||
- Pfad korrekt?
|
||||
- Datei im `public/` Ordner?
|
||||
- Pfad korrekt? (beginnt mit `/videos/`)
|
||||
- Datei im `public/videos/` Ordner?
|
||||
- Unterstütztes Format? (mp4, webm, ogg)
|
||||
- Firefox-kompatibel? (H.264/AVC für MP4)
|
||||
|
||||
**Firefox-Video-Probleme:**
|
||||
- H.265/HEVC-Codecs vermeiden
|
||||
- Multiple `<source>`-Tags für Fallbacks nutzen
|
||||
- Browser-Console auf Codec-Fehler prüfen
|
||||
- MIME-Types korrekt gesetzt?
|
||||
|
||||
## Beispiel-Ordnerstruktur
|
||||
|
||||
@@ -311,4 +486,16 @@ src/content/knowledgebase/
|
||||
├── concept-hash-functions-digital-signatures.md
|
||||
├── concept-regex-pattern-matching.md
|
||||
└── concept-chain-of-custody.md
|
||||
|
||||
public/videos/
|
||||
├── tools/
|
||||
│ ├── autopsy-timeline-tutorial.mp4
|
||||
│ ├── volatility-basics-demo.mp4
|
||||
│ └── yara-rules-advanced.webm
|
||||
├── methods/
|
||||
│ ├── timeline-analysis-walkthrough.mp4
|
||||
│ └── disk-imaging-best-practices.mp4
|
||||
└── concepts/
|
||||
├── hash-functions-explained.mp4
|
||||
└── chain-custody-procedures.mp4
|
||||
```
|
||||
@@ -16,7 +16,7 @@ const knowledgebaseCollection = defineCollection({
|
||||
tags: z.array(z.string()).default([]),
|
||||
|
||||
published: z.boolean().default(true),
|
||||
gated_content: z.boolean().default(false), // NEW: Gated content flag
|
||||
gated_content: z.boolean().default(false),
|
||||
|
||||
})
|
||||
});
|
||||
|
||||
@@ -57,6 +57,44 @@ tools:
|
||||
accessType: download
|
||||
license: Apache-2.0
|
||||
knowledgebase: false
|
||||
- name: Thorium
|
||||
icon: ⚛️
|
||||
type: software
|
||||
description: >-
|
||||
CISAs portable Hybrid-Analyse-Tool für die schnelle Untersuchung von Windows-
|
||||
Systemen auf bösartige Aktivitäten. Scannt mit kuratierten YARA- und
|
||||
Sigma-Regeln Arbeitsspeicher, Prozesse, Dateisystem, Netzwerkverbindungen und
|
||||
Systemprotokolle. Ideal für schnelle Triage im Incident Response, sowohl live als auch
|
||||
auf gemounteten Images. Die Ausgabe erfolgt in strukturierten JSON-Reports.
|
||||
domains:
|
||||
- incident-response
|
||||
- malware-analysis
|
||||
phases:
|
||||
- examination
|
||||
- analysis
|
||||
platforms:
|
||||
- Linux
|
||||
related_software:
|
||||
- Loki
|
||||
- YARA
|
||||
- Velociraptor
|
||||
skillLevel: intermediate
|
||||
accessType: download
|
||||
url: https://github.com/cisagov/thorium
|
||||
license: MIT
|
||||
knowledgebase: false
|
||||
tags:
|
||||
- cli
|
||||
- triage
|
||||
- fast-scan
|
||||
- ioc-matching
|
||||
- yara-scan
|
||||
- sigma-rules
|
||||
- memory-analysis
|
||||
- process-analysis
|
||||
- filesystem-scanning
|
||||
- log-analysis
|
||||
- portable
|
||||
- name: Volatility 3
|
||||
type: software
|
||||
description: >-
|
||||
@@ -119,9 +157,8 @@ tools:
|
||||
Kill-Chain-Phasen. Föderierte Architektur ermöglicht selektives
|
||||
Intelligence-Sharing zwischen vertrauenswürdigen Partnern durch
|
||||
Tagging-System. Correlation-Engine findet automatisch Zusammenhänge
|
||||
zwischen scheinbar unabhängigen Incidents. ZeroMQ-Feed pusht IOCs in
|
||||
Echtzeit an Firewalls, SIEMs und Detection-Systeme für automatisierte
|
||||
Response.
|
||||
zwischen scheinbar unabhängigen Incidents. Integriert mit Firewalls ind
|
||||
SIEMS, die mit MISP-Anreicherungen gefüttert werden können.
|
||||
url: https://misp-project.org/
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
@@ -157,6 +194,7 @@ tools:
|
||||
- OpenCTI
|
||||
icon: 🌐
|
||||
projectUrl: https://misp.cc24.dev
|
||||
statusUrl: https://status.mikoshi.de/api/badge/34/status
|
||||
license: AGPL-3.0
|
||||
accessType: server-based
|
||||
knowledgebase: true
|
||||
@@ -221,18 +259,16 @@ tools:
|
||||
- name: Timesketch
|
||||
type: software
|
||||
description: >-
|
||||
Google's Collaborative Timeline-Analyse-Platform meistert Millionen von
|
||||
korrelierten Events durch hochperformante
|
||||
Elasticsearch-Backend-Architektur für Enterprise-Scale-Investigations.
|
||||
Plaso-Integration parst automatisch über 300 verschiedene Log-Formate in
|
||||
einheitliche Super-Timeline mit standardisierten Attributen. Interactive
|
||||
Timeline-Explorer mit dynamischen Heatmaps, Activity-Graphen und
|
||||
Statistical-Analysis für Advanced-Pattern-Recognition. Sigma-Rules werden
|
||||
direkt auf Timelines angewendet für Automated-Threat-Detection,
|
||||
Machine-Learning-Analyzers erkennen Login-Brute-Force, Lateral-Movement
|
||||
und Data-Exfiltration-Patterns. Collaborative-Features: Shared-Sketches,
|
||||
Analyst-Comments, Saved-Searches und narrative Stories für
|
||||
Management-Reporting.
|
||||
Google's Timeline-Analyse-Platform meistert Millionen von korrelierten
|
||||
Events durch skalierende Elasticsearch-Backend-Architektur für
|
||||
umfangreiche Zeitlinienanalysen. Plaso-Integration parst automatisch über
|
||||
300 verschiedene Log-Formate in einheitliche Timeline mit standardisierten
|
||||
Attributen. Statistische Analysen und Plugins zur Datenanreicherung wie
|
||||
maxming GeoIP und MISP sind verfügbar. Sigma-Rules werden direkt auf
|
||||
Timelines angewendet für automatisierte Detektion von Anomalien,
|
||||
Login-Brute-Force, Lateral-Movement und Data-Exfiltration-Patterns.
|
||||
Kollaborative Funktionen: Gemeinsames Bearbeiten, Analystenkommentare,
|
||||
"Stories" für Management-Berichterstattung.
|
||||
url: https://timesketch.org/
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
@@ -269,6 +305,7 @@ tools:
|
||||
- Kibana
|
||||
icon: ⏱️
|
||||
projectUrl: https://timesketch.cc24.dev
|
||||
statusUrl: https://status.mikoshi.de/api/badge/37/status
|
||||
license: Apache-2.0
|
||||
accessType: server-based
|
||||
- name: Wireshark
|
||||
@@ -922,18 +959,20 @@ tools:
|
||||
- name: Neo4j
|
||||
type: software
|
||||
description: >-
|
||||
Native Graph-Datenbank transformiert komplexe Relationship-Data in
|
||||
intuitive Visualisierungen durch Cypher-Query-Language für forensische
|
||||
Pattern-Detection. Graph-Algorithmen finden kürzeste Pfade zwischen
|
||||
Entities, Community-Detection identifiziert Fraud-Rings und
|
||||
Criminal-Networks automatisch. Visual-Graph-Explorer macht verborgene
|
||||
Multi-Hop-Connections sichtbar für Money-Laundering, Social-Engineering
|
||||
und Organized-Crime-Investigations. APOC-Bibliothek bietet 450+
|
||||
spezialisierte Procedures für Advanced-Analytics: Centrality-Measures,
|
||||
PageRank, Clustering-Coefficients. Bloom-Visualization-Tool für
|
||||
nicht-technische Stakeholder mit Point-and-Click-Exploration. Import aus
|
||||
CSV, JSON und relationalen Datenbanken, Elasticsearch-Integration für
|
||||
Hybrid-Search-Scenarios.
|
||||
Graph-Datenbank transformiert komplexe relationale Daten in intuitive
|
||||
Visualisierungen. Die SQL-ähnliche Cypher-Query-Language ist nach einer
|
||||
gewissen Lernkurve intuitiv und bietet viele Möglichkeiten.
|
||||
Cypher-Algorithmen finden kürzeste Pfade zwischen Entitäten, viele weitere
|
||||
Automatisierungen sind möglich. Die Anwendbarkeiten sind wegen der
|
||||
abstrakten Struktur von Neo4J daher unbegrenzt und in allen Domänen
|
||||
(hauptsichlich Netzwerkforensik, Finanztransaktionsanalysen,
|
||||
Kriminalermittlungen gegen organisiertes Verbrechen) zur Visualisierung
|
||||
und ggf. auch zur Analyse einsetzbar. Die APOC-Bibliothek bietet darüber
|
||||
hinaus noch zahlreiche weitere Plugins. Import aus CSV, JSON und
|
||||
relationalen Datenbanken.
|
||||
|
||||
Leider versteckt Neo4J einige seiner Funktionen mittlerweile hinter einem
|
||||
Premium-Modell und entfernt sich so vom Open-Source-Konzept.
|
||||
url: https://neo4j.com/
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
@@ -971,6 +1010,7 @@ tools:
|
||||
- Linkurious
|
||||
icon: 🕸️
|
||||
projectUrl: https://graph.cc24.dev
|
||||
statusUrl: https://status.mikoshi.de/api/badge/32/status
|
||||
license: GPL-3.0 / Commercial
|
||||
accessType: server-based
|
||||
- name: QGIS
|
||||
@@ -2141,23 +2181,25 @@ tools:
|
||||
related_concepts:
|
||||
- Digital Evidence Chain of Custody
|
||||
- name: Aftermath
|
||||
icon: 🎯
|
||||
type: software
|
||||
description: >-
|
||||
Jamfs Open-Source-Juwel für macOS-Forensik sammelt systematisch Artefakte
|
||||
ohne Full-System-Image. Optimiert für Incident-Response mit minimalem
|
||||
System-Impact. Extrahiert kritische Daten: laufende Prozesse, Netzwerk-
|
||||
verbindungen, installierte Software, Persistence-Mechanismen. Besonders
|
||||
wertvoll: Unified-Log-Parser für System-Events, Browser-Artefakte aller
|
||||
Major-Browser, Quick-Look-Thumbnails, FSEvents für Dateiaktivitäten. Die
|
||||
modulare Architektur erlaubt selektive Sammlung. Output in strukturierten
|
||||
JSON/CSV für einfache Analyse. Zeitstempel-Normalisierung für
|
||||
Timeline-Erstellung. Unterstützt moderne macOS-Security-Features:
|
||||
TCC-Permissions, Code-Signing-Status, XProtect-Matches. Die Remote-
|
||||
Collection via MDM/SSH skaliert auf Unternehmensflotten. Besonders clever:
|
||||
Sammlung von Cloud-Synchronisations-Artefakten (iCloud, Dropbox).
|
||||
Regelmäßige Updates für neue macOS-Versionen. Die Alternative zu teuren
|
||||
kommerziellen Mac-Forensik-Suiten.
|
||||
Jamfs Open-Source-Software für macOS-Forensik sammelt systematisch
|
||||
Artefakte, ohne zuvor ein Full-System-Image zu ziehen. Optimiert für
|
||||
Incident-Response mit minimalem Systemeingriff. Extrahiert kritische
|
||||
Daten: laufende Prozesse, Netzwerkverbindungen, installierte Software,
|
||||
Persistenzmechanismen. Besonders wertvoll: Unified-Log-Parser für
|
||||
System-Events, Browser-Artefakte aller größeren Browser,
|
||||
Quick-Look-Thumbnails, FSEvents für Dateiaktivitäten. Die modulare
|
||||
Architektur erlaubt selektive Sammlung. Output in strukturierten JSON/CSV
|
||||
für einfache Analyse. Zeitstempel-Normalisierung für Timeline-Erstellung.
|
||||
Unterstützt moderne macOS-Sicherheitsfeatures: TCC-Permissions,
|
||||
Code-Signing-Status, XProtect-Matches. Die Remote-Collection via MDM/SSH
|
||||
skaliert auf Unternehmensflotten. Besonders clever: Sammlung von
|
||||
Cloud-Synchronisations-Artefakten (iCloud, Dropbox). Regelmäßige Updates
|
||||
für neue macOS-Versionen. Die Alternative zu teuren kommerziellen
|
||||
Mac-Forensik-Suiten.
|
||||
url: https://github.com/jamf/aftermath/
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
- incident-response
|
||||
- static-investigations
|
||||
@@ -2167,14 +2209,6 @@ tools:
|
||||
- examination
|
||||
platforms:
|
||||
- macOS
|
||||
related_software:
|
||||
- osquery
|
||||
- KAPE
|
||||
skillLevel: intermediate
|
||||
accessType: download
|
||||
url: https://github.com/jamf/aftermath/
|
||||
license: Apache-2.0
|
||||
knowledgebase: false
|
||||
tags:
|
||||
- cli
|
||||
- triage
|
||||
@@ -2190,6 +2224,12 @@ tools:
|
||||
- json-export
|
||||
related_concepts:
|
||||
- Digital Evidence Chain of Custody
|
||||
related_software:
|
||||
- osquery
|
||||
- KAPE
|
||||
icon: 🎯
|
||||
license: Apache-2.0
|
||||
accessType: download
|
||||
- name: RegRipper
|
||||
type: software
|
||||
description: >-
|
||||
@@ -2280,17 +2320,15 @@ tools:
|
||||
- name: PhotoRec
|
||||
type: software
|
||||
description: >-
|
||||
Signature-Based File-Carving-Tool rekonstruiert gelöschte Files durch
|
||||
Signatur-basiertes File-Carving-Tool rekonstruiert gelöschte Daten durch
|
||||
Header/Footer-Pattern-Matching unabhängig vom Dateisystem-Zustand oder
|
||||
Partition-Table-Corruption. Unterstützt über 300 File-Formats: Images
|
||||
(JPEG, PNG, TIFF), Documents (PDF, DOC, XLS), Archives (ZIP, RAR), Videos
|
||||
(AVI, MP4) und Custom-Signatures. Read-Only-Operation gewährleistet
|
||||
forensische Evidence-Integrity, funktioniert bei beschädigten,
|
||||
formatierten oder korrupten Dateisystemen. Paranoid-Mode scannt jeden
|
||||
einzelnen Sektor für Maximum-Recovery-Rate bei fragmentierten Files.
|
||||
Konfigurierbare File-Extensions und Custom-Signature-Development für
|
||||
proprietäre Formats. Companion-Software TestDisk repariert
|
||||
Partition-Tables und Boot-Sectors für Filesystem-Recovery-Scenarios.
|
||||
Korruption des Dateisystems. Unterstützt über 300 Datei-Formate: Bilder
|
||||
(JPEG, PNG, TIFF), Dokumente (PDF, DOC, XLS), Archive (ZIP, RAR), Videos
|
||||
(AVI, MP4) und selbstdefinierte Dateisignaturen. Read-Only gewährleistet
|
||||
forensische Integrität, funktioniert bei beschädigten, formatierten oder
|
||||
korrupten Dateisystemen. Paranoid-Mode scannt jeden einzelnen Sektor für
|
||||
maximale Anzahl wiederhergestellter Daten. Integrierbar mit Software wie
|
||||
TestDisk.
|
||||
url: https://www.cgsecurity.org/wiki/PhotoRec
|
||||
skillLevel: beginner
|
||||
domains:
|
||||
@@ -2299,6 +2337,7 @@ tools:
|
||||
- fraud-investigation
|
||||
phases:
|
||||
- examination
|
||||
- data-collection
|
||||
platforms:
|
||||
- Windows
|
||||
- Linux
|
||||
@@ -2689,42 +2728,6 @@ tools:
|
||||
icon: 🔍
|
||||
license: Proprietary
|
||||
accessType: commercial
|
||||
- name: FRED
|
||||
type: software
|
||||
description: >-
|
||||
Hardware-Forensik-Workstation ermöglicht simultanes Imaging von 8
|
||||
Evidenzen durch Hot-Swap-UltraBay
|
||||
und integrierte Write-Blocker für SATA/IDE/USB/FireWire. Hardware-Hash-Acceleration beschleunigt
|
||||
MD5/SHA-Verifizierung, Touchscreen-Konsole steuert Parallel-Processing ohne Host-System-Belastung.
|
||||
Field-Kit-Version mit 4-Bay-Kapazität für Vor-Ort-Akquisition, modulares Design erlaubt
|
||||
RAID-Controller-Upgrades für NAS-Forensik.
|
||||
url: https://www.digitalintelligence.com/products/fred/
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
- static-investigations
|
||||
- incident-response
|
||||
phases:
|
||||
- data-collection
|
||||
platforms:
|
||||
- Hardware
|
||||
tags:
|
||||
- gui
|
||||
- commercial
|
||||
- write-blocker
|
||||
- physical-copy
|
||||
- scenario:disk_imaging
|
||||
- multithreaded
|
||||
- hardware-solution
|
||||
- hot-swap
|
||||
- raid-recovery
|
||||
- parallel-imaging
|
||||
- touch-control
|
||||
- lab-equipment
|
||||
related_concepts:
|
||||
- Digital Evidence Chain of Custody
|
||||
icon: 🖥️
|
||||
license: Proprietary
|
||||
accessType: commercial
|
||||
- name: GraphSense
|
||||
icon: 📊
|
||||
type: software
|
||||
@@ -3388,26 +3391,25 @@ tools:
|
||||
description: >-
|
||||
Die kommerzielle Blockchain-Analytics-Plattform konkurriert mit
|
||||
Chainalysis durch erweiterte Compliance-Features und RegTech-Integration.
|
||||
Clustering- Algorithmen identifizieren Services durch
|
||||
Transaction-Pattern-Analysis: Exchanges, Darknet-Markets, Mixers,
|
||||
Ransomware-Wallets. Die Compliance- Suite bietet Real-Time-Screening gegen
|
||||
OFAC/EU-Sanctions-Listen. Besonders stark: DeFi-Protocol-Analysis
|
||||
dekodiert Smart-Contract- Interactions, Cross-Chain-Tracking folgt Funds
|
||||
über Bridges, Investigation-Tools für Complex-Money-Laundering-Schemes.
|
||||
API-Integration ermöglicht Automated-AML-Workflows. Die Typology-Library
|
||||
kategorisiert Verdachtsmuster nach FATF-Standards. Court-Ready-Reports
|
||||
mit Blockchain- Evidence-Chain. Training-Programme zertifizieren
|
||||
Investigators. Unterstützt Bitcoin, Ethereum, und 15+ andere Blockchains.
|
||||
Enterprise- Deployment für Banken, Exchanges und Strafverfolgung. Der
|
||||
Clustering-Algorithmen identifizieren Dienstleister durch
|
||||
Transaktionsmusteranalyse: Exchanges, Darknet-Markets, Mixer,
|
||||
Ransomware-Wallets. Die Compliance-Suite bietet Echtzeitüberwachung von
|
||||
OFAC/EU-Sanktionslisten. Besonders stark: DeFi-Protokollanalyse dekodiert
|
||||
Smart-Contract-Interaktionen, Cross-Chain-Tracking folgt Geldern über
|
||||
verschiedene Blockchains hinweg, Ermittlungswerkzeuge für komplexe
|
||||
Geldwäsche-Schemata. API-Integration ermöglicht programmatische
|
||||
Integration. Unterstützt Bitcoin, Ethereum, und 15+ andere Blockchains.
|
||||
Enterprise-Deployment für Banken, Exchanges und Strafverfolgung. Der
|
||||
europäische Fokus macht es zur Alternative für EU-basierte
|
||||
Organisationen.
|
||||
skillLevel: intermediate
|
||||
url: https://www.elliptic.co
|
||||
icon: ₿
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
- fraud-investigation
|
||||
phases:
|
||||
- analysis
|
||||
platforms:
|
||||
- Web
|
||||
tags:
|
||||
- blockchain-analysis
|
||||
- compliance-screening
|
||||
@@ -3416,11 +3418,8 @@ tools:
|
||||
- cross-chain-tracking
|
||||
- aml-workflows
|
||||
- court-reporting
|
||||
platforms:
|
||||
- Web
|
||||
accessType: cloud
|
||||
license: Subscription
|
||||
knowledgebase: false
|
||||
icon: ₿
|
||||
license: Proprietary
|
||||
- name: FACT
|
||||
type: software
|
||||
description: >-
|
||||
@@ -4329,13 +4328,13 @@ tools:
|
||||
- name: ADB
|
||||
type: software
|
||||
description: >-
|
||||
Kommuniziert mit Android-Geräten für forensische Datenextraktion über USB
|
||||
oder Netzwerk ohne Root-Zugriff. Erstellt logische Backups von App-Daten,
|
||||
installiert forensische Analysewerkzeuge, erfasst Live-Logcats für
|
||||
Incident-Response. Port-Weiterleitung ermöglicht sichere Remote-Analyse.
|
||||
File-Transfer-Funktionen extrahieren Beweise direkt vom Gerät.
|
||||
Shell-Access für erweiterte Forensik-Kommandos. Unverzichtbar für
|
||||
Mobile-Incident-Response und App-Entwicklungs-Forensik.
|
||||
Die "Android Debug Bridge" ist grundsätzlich ein Werkzeug für
|
||||
Android-Entwickler, wird aber auch gern in der Mobile-Forensik genutzt.
|
||||
|
||||
Sie ermöglicht bei Android-Geräten forensische Datenextraktion über USB
|
||||
oder Netzwerk teilweise ohne Root-Zugriff, besonders einfach bei älteren
|
||||
Geräten. Erstellt logische Backups von App-Daten, installiert forensische
|
||||
Analysewerkzeuge.
|
||||
url: https://developer.android.com/tools/adb
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
@@ -4523,8 +4522,8 @@ tools:
|
||||
Deauth-Frames für Handshake-Erfassung. WEP-Schlüssel-Rekonstruktion in
|
||||
Minuten, WPA2-PSK-Recovery mit Dictionary-Angriffen.
|
||||
Rogue-Access-Point-Erkennung und Client-Probing-Analyse für
|
||||
Bewegungsprofile. GPU-Beschleunigung via hashcat für moderne
|
||||
Verschlüsselungsstandards.
|
||||
Bewegungsprofile. Ein sehr etabliertes Tool, das immer noch seine Relevanz
|
||||
vor allem auch im Pentesting besitzt.
|
||||
url: https://www.aircrack-ng.org/
|
||||
skillLevel: advanced
|
||||
domains:
|
||||
@@ -7160,6 +7159,97 @@ tools:
|
||||
- kernel-analysis
|
||||
related_concepts:
|
||||
- Memory Forensics & Process Analysis
|
||||
- name: ChipWhisperer
|
||||
type: software
|
||||
description: >-
|
||||
Hardware-Sicherheitsanalyse-Plattform für Firmware-Extraktion aus
|
||||
eingebetteten Systemen durch Stromverbrauchsanalysen. Automatisierte
|
||||
Differential-Power-Analysis (DPA) und Correlation-Power-Analysis (CPA)
|
||||
brechen AES-Implementierungen und extrahieren Verschlüsselungsschlüssel
|
||||
aus Mikrocontrollern. Fehlereinschleusung umgeht Bootloader-Überprüfung
|
||||
und Secure-Boot-Mechanismen. Besonders wertvoll für IoT-Geräte-Forensik:
|
||||
Umgehung von Hardware-Security-Modulen, Clock-Glitching für
|
||||
Code-Ausführungs-Übernahme, Spannungsfehler für
|
||||
Authentifizierungs-Umgehung. Python-API automatisiert Angriffsszenarien,
|
||||
CW-Lite/Pro-Hardware skaliert von Hobby bis professionelle
|
||||
Penetrationstests. Standardplattform für Hardware-Hacking und eingebettete
|
||||
Systemforensik.
|
||||
url: https://www.newae.com/chipwhisperer
|
||||
skillLevel: expert
|
||||
domains:
|
||||
- ics-forensics
|
||||
- static-investigations
|
||||
phases:
|
||||
- data-collection
|
||||
- analysis
|
||||
platforms:
|
||||
- Windows
|
||||
- Linux
|
||||
- macOS
|
||||
tags:
|
||||
- hardware-analysis
|
||||
- side-channel-attack
|
||||
- power-analysis
|
||||
- fault-injection
|
||||
- embedded-security
|
||||
- firmware-extraction
|
||||
- iot-forensics
|
||||
- hardware-hacking
|
||||
- encryption-bypass
|
||||
- python-api
|
||||
related_concepts:
|
||||
- Hash Functions & Digital Signatures
|
||||
related_software:
|
||||
- Binwalk
|
||||
- Ghidra
|
||||
- ICSpector
|
||||
icon: 🫓
|
||||
license: GPL-3.0
|
||||
accessType: download
|
||||
- name: JTAG-Analyse
|
||||
type: method
|
||||
description: >-
|
||||
Direkter Hardware-Schnittstellenzugriff auf eingebettete Systeme über
|
||||
Joint Test Action Group Debug-Schnittstelle für Firmware-Extraktion und
|
||||
Systemanalyse. Boundary-Scan-Verfahren identifiziert verfügbare JTAG-Pins
|
||||
auch bei undokumentierten Geräten durch systematische Pin-Tests.
|
||||
Flash-Speicher-Abzüge umgehen Software-Schutzmaßnahmen und extrahieren
|
||||
komplette Firmware-Abbilder inklusive verschlüsselter Bereiche.
|
||||
|
||||
Debug-Port-Ausnutzung ermöglicht Live-Speicherzugriff,
|
||||
Register-Manipulation und Code-Injection in laufende Systeme. Besonders
|
||||
kritisch für IoT-Forensik: Router-Hintertüren, intelligente
|
||||
Geräte-Manipulationen, Industriesteuerungsanlagen-Kompromittierungen.
|
||||
Kombiniert mit Chip-Off-Techniken für maximale Datenwiederherstellung bei
|
||||
sicherheitsgehärteten Geräten. Standard-Methodik für Hardware-Forensik.
|
||||
url: https://www.jtag.com/what-is-jtag-testing-of-electronics-tutorial/#
|
||||
skillLevel: expert
|
||||
domains:
|
||||
- ics-forensics
|
||||
- mobile-forensics
|
||||
- static-investigations
|
||||
phases:
|
||||
- data-collection
|
||||
- examination
|
||||
tags:
|
||||
- hardware-interface
|
||||
- firmware-extraction
|
||||
- debug-access
|
||||
- boundary-scan
|
||||
- embedded-analysis
|
||||
- iot-forensics
|
||||
- flash-memory
|
||||
- system-exploitation
|
||||
- hardware-forensics
|
||||
- pin-identification
|
||||
related_concepts:
|
||||
- Digital Evidence Chain of Custody
|
||||
related_software:
|
||||
- ChipWhisperer
|
||||
- Binwalk
|
||||
- OpenOCD
|
||||
icon: 💳
|
||||
knowledgebase: true
|
||||
domains:
|
||||
- id: incident-response
|
||||
name: Incident Response & Breach-Untersuchung
|
||||
@@ -7228,3 +7318,4 @@ scenarios:
|
||||
- id: scenario:windows-registry
|
||||
icon: 📜
|
||||
friendly_name: Windows Registry analysieren
|
||||
skill_levels: {}
|
||||
|
||||
@@ -184,7 +184,7 @@ import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
|
||||
<div style="display: grid; gap: 1.25rem;">
|
||||
<div style="background-color: var(--color-bg-secondary); padding: 1.25rem; border-radius: 0.5rem;">
|
||||
<h4 style="margin: 0 0 0.5rem 0; color: var(--color-accent);">🔍 Vorschläge</h4>
|
||||
<h4 style="margin: 0 0 0.5rem 0; color: var(--color-accent);">📝 Vorschläge</h4>
|
||||
<p style="margin: 0;">
|
||||
Du hast eine Idee, wie wir den Hub erweitern können? Reiche deinen Vorschlag unkompliziert
|
||||
über unsere <a href="/contribute#vorschlaege">/contribute</a>-Seite ein.
|
||||
@@ -210,15 +210,54 @@ import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z" />
|
||||
</svg>
|
||||
Git‑Repository besuchen
|
||||
Git-Repository besuchen
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Lightning Support Section with simple-boost integration -->
|
||||
<div style="background-color: var(--color-bg-secondary); padding: 1.25rem; border-radius: 0.5rem;">
|
||||
<h4 style="margin: 0 0 0.5rem 0; color: var(--color-accent);">⚡ Unterstützung</h4>
|
||||
<p style="margin: 0;">
|
||||
Kleine Spenden zur Infrastruktur-Finanzierung nehme ich auch gerne an, wenn es sein muss.
|
||||
Fragt einfach nach der Lightning-Adresse oder BTC-Adresse!
|
||||
<h4 style="margin: 0 0 0.75rem 0; color: var(--color-accent); display: flex; align-items: center; gap: 0.5rem;">
|
||||
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<polygon points="13,2 3,14 12,14 11,22 21,10 12,10 13,2"/>
|
||||
</svg>
|
||||
⚡ Unterstützung
|
||||
</h4>
|
||||
<p style="margin: 0 0 1rem 0; font-size: 0.875rem; line-height: 1.5;">
|
||||
Kleine Spenden zur Server-Finanzierung sind willkommen.
|
||||
</p>
|
||||
|
||||
<div style="margin-bottom: 1rem;">
|
||||
<!-- Simple EUR Payment -->
|
||||
<div style="display: flex; gap: 0.75rem; align-items: center; justify-content: center; max-width: 300px; margin: 0 auto;">
|
||||
<input
|
||||
type="number"
|
||||
id="eur-amount"
|
||||
min="0.01"
|
||||
step="0.01"
|
||||
placeholder="0,50"
|
||||
value="0.5"
|
||||
style="width: 80px; padding: 0.5rem; border: 1px solid var(--color-border); border-radius: 0.375rem; font-size: 0.875rem; text-align: center;">
|
||||
<span style="font-size: 0.875rem; color: var(--color-text-secondary);">€</span>
|
||||
<simple-boost
|
||||
id="eur-boost"
|
||||
class="bootstrap"
|
||||
nwc="nostr+walletconnect://4fe05896e1faf09d1902ea24ef589f65a9606d1710420a9574ce331e3c7f486b?relay=wss://nostr.mikoshi.de&secret=bdfc861fe71e8d9e375b7a2484052e92def7caf4b317d8f6537b784d3cd6eb3b"
|
||||
amount="0.5"
|
||||
currency="eur"
|
||||
memo="ForensicPathways Unterstützung - Vielen Dank!"
|
||||
style="background-color: var(--color-accent); color: white; border: none; border-radius: 0.375rem; padding: 0.5rem 1rem; font-size: 0.875rem; cursor: pointer;">
|
||||
⚡ Senden
|
||||
</simple-boost>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style="margin-top: 1rem; padding: 0.75rem; background-color: var(--color-bg); border-radius: 0.375rem; border-left: 3px solid var(--color-accent);">
|
||||
<p style="margin: 0; font-size: 0.75rem; color: var(--color-text-secondary); line-height: 1.4; text-align: center;">
|
||||
<strong>⚡ Lightning-Unterstützung:</strong> Betrag eingeben und senden.
|
||||
Benötigt eine Lightning-Wallet wie <a href="https://getalby.com" target="_blank" rel="noopener" style="color: var(--color-accent);">Alby</a> oder
|
||||
<a href="https://phoenix.acinq.co" target="_blank" rel="noopener" style="color: var(--color-accent);">Phoenix</a>.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -231,4 +270,70 @@ import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
</p>
|
||||
</div>
|
||||
</section>
|
||||
</BaseLayout>
|
||||
</BaseLayout>
|
||||
|
||||
<script>
|
||||
// TODO: cleanup
|
||||
import('simple-boost').then(() => {
|
||||
console.log('Simple-boost loaded successfully from local dependencies');
|
||||
|
||||
setupDynamicAmounts();
|
||||
}).catch(error => {
|
||||
console.error('Failed to load simple-boost:', error);
|
||||
const script = document.createElement('script');
|
||||
script.type = 'module';
|
||||
script.src = '/node_modules/simple-boost/dist/simple-boost.js';
|
||||
script.onload = () => {
|
||||
console.log('Simple-boost fallback loaded');
|
||||
setupDynamicAmounts();
|
||||
};
|
||||
script.onerror = () => console.error('Simple-boost fallback failed');
|
||||
document.head.appendChild(script);
|
||||
});
|
||||
|
||||
function setupDynamicAmounts() {
|
||||
const eurBoost = document.getElementById('eur-boost');
|
||||
const eurInput = document.getElementById('eur-amount') as HTMLInputElement;
|
||||
|
||||
if (eurBoost && eurInput) {
|
||||
eurBoost.addEventListener('click', (e) => {
|
||||
const amount = parseFloat(eurInput.value) || 0.5;
|
||||
eurBoost.setAttribute('amount', amount.toString());
|
||||
console.log('EUR amount set to:', amount);
|
||||
});
|
||||
|
||||
eurInput.addEventListener('input', () => {
|
||||
const amount = parseFloat(eurInput.value) || 0.5;
|
||||
eurBoost.setAttribute('amount', amount.toString());
|
||||
});
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style>
|
||||
simple-boost {
|
||||
--simple-boost-primary: var(--color-warning);
|
||||
--simple-boost-primary-hover: var(--color-accent);
|
||||
--simple-boost-text: white;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
simple-boost:hover {
|
||||
transform: translateY(-1px);
|
||||
box-shadow: 0 4px 8px rgba(0,0,0,0.15) !important;
|
||||
}
|
||||
|
||||
simple-boost .simple-boost-button {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-family: inherit;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
/* Loading state styling */
|
||||
simple-boost[loading] {
|
||||
opacity: 0.7;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
</style>
|
||||
@@ -1,16 +1,18 @@
|
||||
// src/pages/api/ai/embeddings-status.ts
|
||||
import type { APIRoute } from 'astro';
|
||||
import { embeddingsService } from '../../../utils/embeddings.js';
|
||||
|
||||
export const prerender = false;
|
||||
|
||||
export const GET: APIRoute = async () => {
|
||||
try {
|
||||
const { embeddingsService } = await import('../../../utils/embeddings.js');
|
||||
await embeddingsService.waitForInitialization();
|
||||
|
||||
const stats = embeddingsService.getStats();
|
||||
const status = stats.enabled && stats.initialized ? 'ready' :
|
||||
stats.enabled && !stats.initialized ? 'initializing' : 'disabled';
|
||||
const status = stats.initialized ? 'ready' :
|
||||
!stats.initialized ? 'initializing' : 'disabled';
|
||||
|
||||
console.log(`[EMBEDDINGS-STATUS-API] Service status: ${status}, stats:`, stats);
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
@@ -23,6 +25,8 @@ export const GET: APIRoute = async () => {
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error('[EMBEDDINGS-STATUS-API] Error checking embeddings status:', error);
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: false,
|
||||
embeddings: { enabled: false, initialized: false, count: 0 },
|
||||
|
||||
@@ -1,28 +1,57 @@
|
||||
// src/pages/api/ai/enhance-input.ts - Enhanced AI service compatibility
|
||||
|
||||
// src/pages/api/ai/enhance-input.ts
|
||||
import type { APIRoute } from 'astro';
|
||||
import { withAPIAuth } from '../../../utils/auth.js';
|
||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
||||
import { aiService } from '../../../utils/aiService.js';
|
||||
import { JSONParser } from '../../../utils/jsonUtils.js';
|
||||
import { getPrompt } from '../../../config/prompts.js';
|
||||
|
||||
export const prerender = false;
|
||||
|
||||
function getEnv(key: string): string {
|
||||
const value = process.env[key];
|
||||
if (!value) {
|
||||
throw new Error(`Missing environment variable: ${key}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
const RATE_LIMIT_WINDOW_MS =
|
||||
Number.isFinite(parseInt(process.env.RATE_LIMIT_WINDOW_MS ?? '', 10))
|
||||
? parseInt(process.env.RATE_LIMIT_WINDOW_MS!, 10)
|
||||
: 60_000;
|
||||
|
||||
const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
|
||||
const AI_ANALYZER_API_KEY = getEnv('AI_ANALYZER_API_KEY');
|
||||
const AI_ANALYZER_MODEL = getEnv('AI_ANALYZER_MODEL');
|
||||
const RATE_LIMIT_MAX =
|
||||
Number.isFinite(parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS ?? '', 10))
|
||||
? parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS!, 10)
|
||||
: 5;
|
||||
|
||||
const INPUT_MIN_CHARS = 40;
|
||||
const INPUT_MAX_CHARS = 1000;
|
||||
const Q_MIN_LEN = 15;
|
||||
const Q_MAX_LEN = 160;
|
||||
const Q_MAX_COUNT = 3;
|
||||
const AI_TEMPERATURE = 0.3;
|
||||
const CLEANER_TEMPERATURE = 0.0;
|
||||
|
||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
||||
const RATE_LIMIT_WINDOW = 60 * 1000;
|
||||
const RATE_LIMIT_MAX = 5;
|
||||
|
||||
function checkRateLimit(userId: string): boolean {
|
||||
const now = Date.now();
|
||||
const entry = rateLimitStore.get(userId);
|
||||
if (!entry || now > entry.resetTime) {
|
||||
rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW_MS });
|
||||
return true;
|
||||
}
|
||||
if (entry.count >= RATE_LIMIT_MAX) return false;
|
||||
entry.count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
function cleanupExpiredRateLimits(): void {
|
||||
const now = Date.now();
|
||||
for (const [userId, entry] of rateLimitStore.entries()) {
|
||||
if (now > entry.resetTime) rateLimitStore.delete(userId);
|
||||
}
|
||||
}
|
||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||
|
||||
/**
|
||||
* Helpers
|
||||
*/
|
||||
function sanitizeInput(input: string): string {
|
||||
return input
|
||||
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
|
||||
@@ -30,112 +59,24 @@ function sanitizeInput(input: string): string {
|
||||
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
||||
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
||||
.trim()
|
||||
.slice(0, 1000);
|
||||
.slice(0, INPUT_MAX_CHARS);
|
||||
}
|
||||
|
||||
function checkRateLimit(userId: string): boolean {
|
||||
const now = Date.now();
|
||||
const userLimit = rateLimitStore.get(userId);
|
||||
|
||||
if (!userLimit || now > userLimit.resetTime) {
|
||||
rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
|
||||
return true;
|
||||
}
|
||||
|
||||
if (userLimit.count >= RATE_LIMIT_MAX) {
|
||||
return false;
|
||||
}
|
||||
|
||||
userLimit.count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
function cleanupExpiredRateLimits() {
|
||||
const now = Date.now();
|
||||
for (const [userId, limit] of rateLimitStore.entries()) {
|
||||
if (now > limit.resetTime) {
|
||||
rateLimitStore.delete(userId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||
|
||||
function createEnhancementPrompt(input: string): string {
|
||||
return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein forensisches Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte forensische Untersuchung.
|
||||
|
||||
ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
|
||||
1. **Incident Context**: Was ist passiert? Welche Angriffsvektoren oder technischen Probleme liegen vor?
|
||||
2. **Affected Systems**: Welche spezifischen Technologien/Plattformen sind betroffen? (Windows/Linux/ICS/SCADA/Mobile/Cloud/Network Infrastructure)
|
||||
3. **Available Evidence**: Welche forensischen Datenquellen stehen zur Verfügung? (RAM-Dumps, Disk-Images, Log-Files, Network-Captures, Registry-Hives)
|
||||
4. **Investigation Objectives**: Was soll erreicht werden? (IOC-Extraktion, Timeline-Rekonstruktion, Attribution, Impact-Assessment)
|
||||
5. **Timeline Constraints**: Wie zeitkritisch ist die Untersuchung?
|
||||
6. **Legal & Compliance**: Rechtliche Anforderungen, Chain of Custody, Compliance-Rahmen (DSGVO, sector-specific regulations)
|
||||
7. **Technical Constraints**: Verfügbare Ressourcen, Skills, Infrastrukturbeschränkungen
|
||||
|
||||
WENN die Beschreibung alle kritischen forensischen Aspekte abdeckt: Geben Sie eine leere Liste [] zurück.
|
||||
|
||||
WENN wichtige forensische Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte forensische Analyse schließen.
|
||||
|
||||
QUALITÄTSKRITERIEN FÜR FRAGEN:
|
||||
- Forensisch spezifisch, nicht allgemein (NICHT: "Mehr Details?")
|
||||
- Methodisch relevant (NICHT: "Wann passierte das?")
|
||||
- Priorisiert nach Auswirkung auf die forensische Untersuchungsqualität
|
||||
- Die Frage soll maximal 20 Wörter umfassen
|
||||
|
||||
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
|
||||
[
|
||||
"spezifische Frage 1?",
|
||||
"spezifische Frage 2?",
|
||||
"spezifische Frage 3?"
|
||||
]
|
||||
|
||||
NUTZER-EINGABE:
|
||||
${input}
|
||||
`.trim();
|
||||
}
|
||||
|
||||
async function callAIService(prompt: string): Promise<Response> {
|
||||
const endpoint = AI_ENDPOINT;
|
||||
const apiKey = AI_ANALYZER_API_KEY;
|
||||
const model = AI_ANALYZER_MODEL;
|
||||
|
||||
let headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
|
||||
if (apiKey) {
|
||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
||||
console.log('[ENHANCE API] Using API key authentication');
|
||||
} else {
|
||||
console.log('[ENHANCE API] No API key - making request without authentication');
|
||||
}
|
||||
|
||||
const requestBody = {
|
||||
model,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
max_tokens: 300,
|
||||
temperature: 0.7,
|
||||
top_p: 0.9,
|
||||
frequency_penalty: 0.2,
|
||||
presence_penalty: 0.1
|
||||
};
|
||||
|
||||
return fetch(`${endpoint}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(requestBody)
|
||||
});
|
||||
function stripJsonFences(s: string): string {
|
||||
return s.replace(/^```json\s*/i, '')
|
||||
.replace(/^```\s*/i, '')
|
||||
.replace(/\s*```\s*$/, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler
|
||||
*/
|
||||
export const POST: APIRoute = async ({ request }) => {
|
||||
try {
|
||||
const authResult = await withAPIAuth(request, 'ai');
|
||||
if (!authResult.authenticated) {
|
||||
return createAuthErrorResponse();
|
||||
}
|
||||
|
||||
const userId = authResult.userId;
|
||||
const auth = await withAPIAuth(request, 'ai');
|
||||
if (!auth.authenticated) return createAuthErrorResponse();
|
||||
const userId = auth.userId;
|
||||
|
||||
if (!checkRateLimit(userId)) {
|
||||
return apiError.rateLimit('Enhancement rate limit exceeded');
|
||||
@@ -144,79 +85,53 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
const body = await request.json();
|
||||
const { input } = body;
|
||||
|
||||
if (!input || typeof input !== 'string' || input.length < 40) {
|
||||
return apiError.badRequest('Input too short for enhancement (minimum 40 characters)');
|
||||
if (!input || typeof input !== 'string' || input.length < INPUT_MIN_CHARS) {
|
||||
return apiError.badRequest(`Input too short for enhancement (minimum ${INPUT_MIN_CHARS} characters)`);
|
||||
}
|
||||
|
||||
const sanitizedInput = sanitizeInput(input);
|
||||
if (sanitizedInput.length < 40) {
|
||||
if (sanitizedInput.length < INPUT_MIN_CHARS) {
|
||||
return apiError.badRequest('Input too short after sanitization');
|
||||
}
|
||||
|
||||
const systemPrompt = createEnhancementPrompt(sanitizedInput);
|
||||
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
|
||||
|
||||
const aiResponse = await enqueueApiCall(() => callAIService(systemPrompt), taskId);
|
||||
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`;
|
||||
const questionsPrompt = getPrompt('enhancementQuestions', sanitizedInput);
|
||||
|
||||
if (!aiResponse.ok) {
|
||||
const errorText = await aiResponse.text();
|
||||
console.error('[ENHANCE API] AI enhancement error:', errorText, 'Status:', aiResponse.status);
|
||||
return apiServerError.unavailable('Enhancement service unavailable');
|
||||
}
|
||||
console.log(`[ENHANCE-API] Processing enhancement request for user: ${userId}`);
|
||||
|
||||
const aiData = await aiResponse.json();
|
||||
const aiContent = aiData.choices?.[0]?.message?.content;
|
||||
const aiResponse = await enqueueApiCall(
|
||||
() => aiService.callAI(questionsPrompt, { temperature: AI_TEMPERATURE }),
|
||||
taskId
|
||||
);
|
||||
|
||||
if (!aiContent) {
|
||||
if (!aiResponse?.content) {
|
||||
return apiServerError.unavailable('No enhancement response');
|
||||
}
|
||||
|
||||
let questions;
|
||||
try {
|
||||
const cleanedContent = aiContent
|
||||
.replace(/^```json\s*/i, '')
|
||||
.replace(/\s*```\s*$/, '')
|
||||
.trim();
|
||||
questions = JSON.parse(cleanedContent);
|
||||
|
||||
if (!Array.isArray(questions)) {
|
||||
throw new Error('Response is not an array');
|
||||
}
|
||||
|
||||
questions = questions
|
||||
.filter(q => typeof q === 'string' && q.length > 20 && q.length < 200)
|
||||
.filter(q => q.includes('?'))
|
||||
.filter(q => {
|
||||
const forensicsTerms = ['forensisch', 'log', 'dump', 'image', 'artefakt', 'evidence', 'incident', 'system', 'netzwerk', 'zeitraum', 'verfügbar'];
|
||||
const lowerQ = q.toLowerCase();
|
||||
return forensicsTerms.some(term => lowerQ.includes(term));
|
||||
})
|
||||
.map(q => q.trim())
|
||||
.slice(0, 3);
|
||||
|
||||
if (questions.length === 0) {
|
||||
questions = [];
|
||||
}
|
||||
let parsed: unknown = JSONParser.safeParseJSON(stripJsonFences(aiResponse.content), null);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Failed to parse enhancement response:', aiContent);
|
||||
questions = [];
|
||||
}
|
||||
let questions: string[] = Array.isArray(parsed) ? parsed : [];
|
||||
questions = questions
|
||||
.filter(q => typeof q === 'string')
|
||||
.map(q => q.trim())
|
||||
.filter(q => q.endsWith('?'))
|
||||
.filter(q => q.length >= Q_MIN_LEN && q.length <= Q_MAX_LEN)
|
||||
.slice(0, Q_MAX_COUNT);
|
||||
|
||||
console.log(`[ENHANCE API] User: ${userId}, Forensics Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
||||
console.log(`[ENHANCE-API] User: ${userId}, Questions generated: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
questions,
|
||||
taskId,
|
||||
inputComplete: questions.length === 0
|
||||
inputComplete: questions.length === 0
|
||||
}), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error('Enhancement error:', error);
|
||||
} catch (err) {
|
||||
console.error('[ENHANCE-API] Enhancement error:', err);
|
||||
return apiServerError.internal('Enhancement processing failed');
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
// src/pages/api/ai/query.ts
|
||||
|
||||
import type { APIRoute } from 'astro';
|
||||
import { withAPIAuth } from '../../../utils/auth.js';
|
||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||
@@ -21,15 +20,14 @@ const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '
|
||||
const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10);
|
||||
|
||||
function sanitizeInput(input: string): string {
|
||||
let sanitized = input
|
||||
return input
|
||||
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
|
||||
.replace(/\<\/?[^>]+(>|$)/g, '')
|
||||
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
||||
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
||||
.trim();
|
||||
|
||||
sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
|
||||
return sanitized;
|
||||
.trim()
|
||||
.slice(0, 2000)
|
||||
.replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
|
||||
@@ -78,7 +76,7 @@ function incrementMicroTaskCount(userId: string, aiCallsMade: number): void {
|
||||
}
|
||||
}
|
||||
|
||||
function cleanupExpiredRateLimits() {
|
||||
function cleanupExpiredRateLimits(): void {
|
||||
const now = Date.now();
|
||||
const maxStoreSize = 1000;
|
||||
|
||||
@@ -118,51 +116,52 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
const body = await request.json();
|
||||
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
||||
|
||||
console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
||||
console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
|
||||
console.log(`[AI-API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
||||
console.log(`[AI-API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
|
||||
|
||||
if (!query || typeof query !== 'string') {
|
||||
console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
|
||||
console.log(`[AI-API] Invalid query for task ${clientTaskId}`);
|
||||
return apiError.badRequest('Query required');
|
||||
}
|
||||
|
||||
if (!['workflow', 'tool'].includes(mode)) {
|
||||
console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
||||
console.log(`[AI-API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
||||
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
||||
}
|
||||
|
||||
const sanitizedQuery = sanitizeInput(query);
|
||||
if (sanitizedQuery.includes('[FILTERED]')) {
|
||||
console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
|
||||
console.log(`[AI-API] Filtered input detected for task ${clientTaskId}`);
|
||||
return apiError.badRequest('Invalid input detected');
|
||||
}
|
||||
|
||||
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
||||
|
||||
console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
|
||||
console.log(`[AI-API] Enqueueing pipeline task ${taskId}`);
|
||||
|
||||
const result = await enqueueApiCall(() =>
|
||||
aiPipeline.processQuery(sanitizedQuery, mode)
|
||||
, taskId);
|
||||
|
||||
if (!result || !result.recommendation) {
|
||||
return apiServerError.unavailable('No response from micro-task AI pipeline');
|
||||
return apiServerError.unavailable('No response from AI pipeline');
|
||||
}
|
||||
|
||||
const stats = result.processingStats;
|
||||
const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
|
||||
incrementMicroTaskCount(userId, estimatedAICallsMade);
|
||||
|
||||
console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
|
||||
console.log(` - Mode: ${mode}`);
|
||||
console.log(` - User: ${userId}`);
|
||||
console.log(` - Query length: ${sanitizedQuery.length}`);
|
||||
console.log(` - Processing time: ${stats.processingTimeMs}ms`);
|
||||
console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`);
|
||||
console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`);
|
||||
console.log(` - Estimated AI calls: ${estimatedAICallsMade}`);
|
||||
console.log(` - Embeddings used: ${stats.embeddingsUsed}`);
|
||||
console.log(` - Final items: ${stats.finalSelectedItems}`);
|
||||
console.log(`[AI-API] Pipeline completed for ${taskId}:`, {
|
||||
mode,
|
||||
user: userId,
|
||||
queryLength: sanitizedQuery.length,
|
||||
processingTime: stats.processingTimeMs,
|
||||
microTasksCompleted: stats.microTasksCompleted,
|
||||
microTasksFailed: stats.microTasksFailed,
|
||||
estimatedAICalls: estimatedAICallsMade,
|
||||
embeddingsUsed: stats.embeddingsUsed,
|
||||
finalItems: stats.finalSelectedItems
|
||||
});
|
||||
|
||||
const currentLimit = rateLimitStore.get(userId);
|
||||
const remainingMicroTasks = currentLimit ?
|
||||
@@ -176,7 +175,7 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
query: sanitizedQuery,
|
||||
processingStats: {
|
||||
...result.processingStats,
|
||||
pipelineType: 'micro-task',
|
||||
pipelineType: 'refactored',
|
||||
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||
estimatedAICallsMade
|
||||
@@ -192,18 +191,16 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error('[MICRO-TASK API] Pipeline error:', error);
|
||||
console.error('[AI-API] Pipeline error:', error);
|
||||
|
||||
if (error.message.includes('embeddings')) {
|
||||
return apiServerError.unavailable('Embeddings service error - using AI fallback');
|
||||
} else if (error.message.includes('micro-task')) {
|
||||
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed');
|
||||
} else if (error.message.includes('selector')) {
|
||||
return apiServerError.unavailable('AI selector service error');
|
||||
return apiServerError.unavailable('Embeddings service error');
|
||||
} else if (error.message.includes('AI')) {
|
||||
return apiServerError.unavailable('AI service error');
|
||||
} else if (error.message.includes('rate limit')) {
|
||||
return apiError.rateLimit('AI service rate limits exceeded during micro-task processing');
|
||||
return apiError.rateLimit('AI service rate limits exceeded');
|
||||
} else {
|
||||
return apiServerError.internal('Micro-task AI pipeline error');
|
||||
return apiServerError.internal('AI pipeline error');
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -180,7 +180,6 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
return apiSpecial.invalidJSON();
|
||||
}
|
||||
|
||||
// Preprocess form data to handle autocomplete inputs
|
||||
body = preprocessFormData(body);
|
||||
|
||||
const sanitizedBody = sanitizeInput(body);
|
||||
|
||||
@@ -37,13 +37,6 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
|
||||
const { embeddingsService } = await import('../../../utils/embeddings.js');
|
||||
|
||||
if (!embeddingsService.isEnabled()) {
|
||||
return new Response(
|
||||
JSON.stringify({ success: false, error: 'Semantic search not available' }),
|
||||
{ status: 400, headers: { 'Content-Type': 'application/json' } }
|
||||
);
|
||||
}
|
||||
|
||||
await embeddingsService.waitForInitialization();
|
||||
|
||||
const similarItems = await embeddingsService.findSimilar(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
// src/pages/contribute/index.astro - Consolidated Auth
|
||||
// src/pages/contribute/index.astro
|
||||
import BaseLayout from '../../layouts/BaseLayout.astro';
|
||||
import { withAuth } from '../../utils/auth.js';
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
---
|
||||
//src/pages/index.astro
|
||||
import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
import ToolCard from '../components/ToolCard.astro';
|
||||
import ToolFilters from '../components/ToolFilters.astro';
|
||||
@@ -509,9 +510,7 @@ if (aiAuthRequired) {
|
||||
}, 500);
|
||||
};
|
||||
|
||||
function handleSharedURL() {
|
||||
console.log('[SHARE] Handling shared URL:', window.location.search);
|
||||
|
||||
function handleSharedURL() {
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
const toolParam = urlParams.get('tool');
|
||||
const viewParam = urlParams.get('view');
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
---
|
||||
//src/pages/knowledgebase.astro
|
||||
import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
import { getCollection } from 'astro:content';
|
||||
import { getToolsData } from '../utils/dataService.js';
|
||||
|
||||
@@ -588,10 +588,8 @@ const currentUrl = Astro.url.href;
|
||||
});
|
||||
}
|
||||
|
||||
// Make generateTOCContent available globally for the auth check script
|
||||
window.generateTOCContent = generateTOCContent;
|
||||
|
||||
// Initialize everything on page load
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
calculateReadingTime();
|
||||
generateSidebarTOC();
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
---
|
||||
//src/pages/status.astro
|
||||
import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
import { getToolsData } from '../utils/dataService.js';
|
||||
|
||||
|
||||
@@ -675,6 +675,7 @@ input[type="checkbox"] {
|
||||
border-radius: 0.25rem;
|
||||
font-size: 0.75rem;
|
||||
margin: 0.125rem;
|
||||
max-height: 1.5rem;
|
||||
}
|
||||
|
||||
/* ===================================================================
|
||||
@@ -1806,11 +1807,44 @@ input[type="checkbox"] {
|
||||
.ai-textarea-section {
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.ai-textarea-section textarea {
|
||||
width: 100%;
|
||||
height: 180px;
|
||||
min-height: 180px;
|
||||
max-height: 300px;
|
||||
resize: vertical;
|
||||
font-size: 0.9375rem;
|
||||
line-height: 1.5;
|
||||
padding: 0.75rem;
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: 0.375rem;
|
||||
background-color: var(--color-bg);
|
||||
color: var(--color-text);
|
||||
transition: var(--transition-fast);
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.confidence-tooltip {
|
||||
background: var(--color-bg) !important;
|
||||
border: 2px solid var(--color-border) !important;
|
||||
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15) !important;
|
||||
z-index: 2000 !important;
|
||||
}
|
||||
|
||||
.ai-textarea-section textarea:focus {
|
||||
outline: none;
|
||||
border-color: var(--color-primary);
|
||||
box-shadow: 0 0 0 3px rgb(37 99 235 / 10%);
|
||||
}
|
||||
|
||||
.ai-suggestions-section {
|
||||
flex: 0 0 320px;
|
||||
min-height: 120px;
|
||||
min-height: 180px;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.ai-input-container textarea {
|
||||
@@ -1887,7 +1921,6 @@ input[type="checkbox"] {
|
||||
box-shadow: 0 2px 4px 0 rgb(255 255 255 / 10%);
|
||||
}
|
||||
|
||||
/* Enhanced contextual analysis cards */
|
||||
.contextual-analysis-card {
|
||||
margin-bottom: 2rem;
|
||||
border-left: 4px solid;
|
||||
@@ -1984,7 +2017,6 @@ input[type="checkbox"] {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
/* Enhanced queue status for micro-tasks */
|
||||
.queue-status-card.micro-task-mode {
|
||||
border-left: 4px solid var(--color-primary);
|
||||
}
|
||||
@@ -1997,7 +2029,6 @@ input[type="checkbox"] {
|
||||
border-radius: 0.5rem 0.5rem 0 0;
|
||||
}
|
||||
|
||||
/* Mobile responsive adjustments */
|
||||
@media (max-width: 768px) {
|
||||
.micro-task-steps {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
@@ -2189,12 +2220,20 @@ input[type="checkbox"] {
|
||||
border-radius: 1rem;
|
||||
font-weight: 500;
|
||||
text-transform: uppercase;
|
||||
position: relative;
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
.tool-rec-priority.high { background-color: var(--color-error); color: white; }
|
||||
.tool-rec-priority.medium { background-color: var(--color-warning); color: white; }
|
||||
.tool-rec-priority.low { background-color: var(--color-accent); color: white; }
|
||||
|
||||
[data-theme="dark"] .confidence-tooltip {
|
||||
background: var(--color-bg-secondary) !important;
|
||||
border-color: var(--color-border) !important;
|
||||
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.4) !important;
|
||||
}
|
||||
|
||||
.tool-rec-justification {
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.5;
|
||||
@@ -2613,7 +2652,8 @@ footer {
|
||||
================================================================= */
|
||||
|
||||
.smart-prompting-container {
|
||||
height: 100%;
|
||||
height: auto;
|
||||
min-height: 180px;
|
||||
animation: smartPromptSlideIn 0.4s cubic-bezier(0.4, 0, 0.2, 1);
|
||||
}
|
||||
|
||||
@@ -2622,8 +2662,10 @@ footer {
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: 0.5rem;
|
||||
padding: 1rem;
|
||||
height: 100%;
|
||||
min-height: 120px;
|
||||
height: auto;
|
||||
min-height: 180px;
|
||||
max-height: 400px;
|
||||
overflow-y: auto;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
opacity: 0.85;
|
||||
@@ -2663,8 +2705,8 @@ footer {
|
||||
|
||||
/* Smart Prompting Hint */
|
||||
.smart-prompting-hint {
|
||||
height: 100%;
|
||||
min-height: 120px;
|
||||
height: 180px;
|
||||
min-height: 180px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
animation: hintFadeIn 0.3s ease-in-out;
|
||||
@@ -3378,8 +3420,8 @@ footer {
|
||||
|
||||
.ai-suggestions-section {
|
||||
flex: 0 0 auto;
|
||||
width: 100%;
|
||||
max-width: none;
|
||||
height: auto;
|
||||
min-height: 120px;
|
||||
}
|
||||
|
||||
.ai-textarea-section {
|
||||
@@ -3389,6 +3431,11 @@ footer {
|
||||
min-height: 100px;
|
||||
}
|
||||
|
||||
.ai-textarea-section textarea {
|
||||
height: 150px;
|
||||
min-height: 150px;
|
||||
}
|
||||
|
||||
.ai-spotlight-content {
|
||||
flex-direction: column;
|
||||
gap: 0.75rem;
|
||||
|
||||
@@ -691,12 +691,11 @@
|
||||
|
||||
|
||||
/* ==========================================================================
|
||||
VIDEO EMBEDDING - Add to knowledgebase.css
|
||||
VIDEO EMBEDDING - ULTRA SIMPLE: Just full width, natural aspect ratios
|
||||
========================================================================== */
|
||||
|
||||
/* Video Container and Responsive Wrapper */
|
||||
/* Video Container - just a styled wrapper */
|
||||
:where(.markdown-content) .video-container {
|
||||
position: relative;
|
||||
width: 100%;
|
||||
margin: 2rem 0;
|
||||
border-radius: var(--radius-lg, 0.75rem);
|
||||
@@ -705,84 +704,34 @@
|
||||
box-shadow: var(--shadow-lg, 0 12px 30px rgba(0,0,0,0.16));
|
||||
}
|
||||
|
||||
/* Responsive 16:9 aspect ratio by default */
|
||||
:where(.markdown-content) .video-container.aspect-16-9 {
|
||||
aspect-ratio: 16 / 9;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container.aspect-4-3 {
|
||||
aspect-ratio: 4 / 3;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container.aspect-1-1 {
|
||||
aspect-ratio: 1 / 1;
|
||||
}
|
||||
|
||||
/* Video Element Styling */
|
||||
/* Video Element - full width, natural aspect ratio */
|
||||
:where(.markdown-content) .video-container video {
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
object-fit: contain;
|
||||
height: auto;
|
||||
display: block;
|
||||
background-color: #000;
|
||||
border: none;
|
||||
outline: none;
|
||||
}
|
||||
|
||||
/* Custom Video Controls Enhancement */
|
||||
:where(.markdown-content) video::-webkit-media-controls-panel {
|
||||
background-color: rgba(0, 0, 0, 0.8);
|
||||
/* YouTube iframe - full width, preserve embedded dimensions ratio */
|
||||
:where(.markdown-content) .video-container iframe {
|
||||
width: 100%;
|
||||
height: auto;
|
||||
aspect-ratio: 16 / 9; /* Only for iframes since they don't have intrinsic ratio */
|
||||
display: block;
|
||||
border: none;
|
||||
outline: none;
|
||||
}
|
||||
|
||||
:where(.markdown-content) video::-webkit-media-controls-current-time-display,
|
||||
:where(.markdown-content) video::-webkit-media-controls-time-remaining-display {
|
||||
color: white;
|
||||
text-shadow: none;
|
||||
/* Focus states for accessibility */
|
||||
:where(.markdown-content) .video-container video:focus,
|
||||
:where(.markdown-content) .video-container iframe:focus {
|
||||
outline: 3px solid var(--color-primary);
|
||||
outline-offset: 3px;
|
||||
}
|
||||
|
||||
/* Video Loading State */
|
||||
:where(.markdown-content) .video-container .video-loading {
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
color: var(--color-text-secondary);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container .video-loading .spinner {
|
||||
width: 2rem;
|
||||
height: 2rem;
|
||||
border: 3px solid var(--color-border);
|
||||
border-top: 3px solid var(--color-primary);
|
||||
border-radius: 50%;
|
||||
animation: spin 1s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
0% { transform: rotate(0deg); }
|
||||
100% { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
/* Video Error State */
|
||||
:where(.markdown-content) .video-container .video-error {
|
||||
position: absolute;
|
||||
top: 50%;
|
||||
left: 50%;
|
||||
transform: translate(-50%, -50%);
|
||||
text-align: center;
|
||||
color: var(--color-error, #dc3545);
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container .video-error .error-icon {
|
||||
font-size: 3rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
/* Video Metadata Overlay */
|
||||
/* Video Metadata */
|
||||
:where(.markdown-content) .video-metadata {
|
||||
background-color: var(--color-bg-secondary);
|
||||
border: 1px solid var(--color-border);
|
||||
@@ -796,69 +745,13 @@
|
||||
:where(.markdown-content) .video-metadata .video-title {
|
||||
font-weight: 600;
|
||||
color: var(--color-text);
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-metadata .video-info {
|
||||
display: flex;
|
||||
gap: 1rem;
|
||||
flex-wrap: wrap;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-metadata .video-duration,
|
||||
:where(.markdown-content) .video-metadata .video-size,
|
||||
:where(.markdown-content) .video-metadata .video-format {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
}
|
||||
|
||||
/* Fullscreen Support */
|
||||
:where(.markdown-content) .video-container video:fullscreen {
|
||||
background-color: #000;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container video:-webkit-full-screen {
|
||||
background-color: #000;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container video:-moz-full-screen {
|
||||
background-color: #000;
|
||||
}
|
||||
|
||||
/* Video Thumbnail/Poster Styling */
|
||||
:where(.markdown-content) .video-container video[poster] {
|
||||
object-fit: cover;
|
||||
}
|
||||
|
||||
/* Protected Video Overlay */
|
||||
:where(.markdown-content) .video-container .video-protected {
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
bottom: 0;
|
||||
background-color: rgba(0, 0, 0, 0.8);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
color: white;
|
||||
text-align: center;
|
||||
padding: 2rem;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container .video-protected .lock-icon {
|
||||
font-size: 3rem;
|
||||
margin-bottom: 1rem;
|
||||
opacity: 0.8;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* Responsive Design */
|
||||
@media (max-width: 768px) {
|
||||
:where(.markdown-content) .video-container {
|
||||
margin: 1.5rem -0.5rem; /* Extend to edges on mobile */
|
||||
margin: 1.5rem -0.5rem;
|
||||
border-radius: 0;
|
||||
}
|
||||
|
||||
@@ -867,15 +760,9 @@
|
||||
font-size: 0.8125rem;
|
||||
border-radius: 0;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-metadata .video-info {
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
align-items: flex-start;
|
||||
}
|
||||
}
|
||||
|
||||
/* Dark Theme Adjustments */
|
||||
/* Dark Theme */
|
||||
[data-theme="dark"] :where(.markdown-content) .video-container {
|
||||
box-shadow: 0 12px 30px rgba(0,0,0,0.4);
|
||||
}
|
||||
@@ -885,48 +772,23 @@
|
||||
border-color: color-mix(in srgb, var(--color-border) 60%, transparent);
|
||||
}
|
||||
|
||||
/* Video Caption/Description Support */
|
||||
:where(.markdown-content) .video-caption {
|
||||
margin-top: 1rem;
|
||||
font-size: 0.9375rem;
|
||||
color: var(--color-text-secondary);
|
||||
text-align: center;
|
||||
font-style: italic;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
/* Video Gallery Support (multiple videos) */
|
||||
:where(.markdown-content) .video-gallery {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
||||
gap: 2rem;
|
||||
margin: 2rem 0;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-gallery .video-container {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* Accessibility Improvements */
|
||||
:where(.markdown-content) .video-container video:focus {
|
||||
outline: 3px solid var(--color-primary);
|
||||
outline-offset: 3px;
|
||||
}
|
||||
|
||||
/* Print Media - Hide Videos */
|
||||
/* Print Media */
|
||||
@media print {
|
||||
:where(.markdown-content) .video-container {
|
||||
border: 2px solid #ddd;
|
||||
background-color: #f5f5f5;
|
||||
padding: 2rem;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container video,
|
||||
:where(.markdown-content) .video-container iframe {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container::after {
|
||||
content: "[Video: " attr(data-video-title, "Embedded Video") "]";
|
||||
:where(.markdown-content) .video-container::before {
|
||||
content: "📹 Video: " attr(data-video-title, "Embedded Video");
|
||||
display: block;
|
||||
padding: 1rem;
|
||||
background-color: #f5f5f5;
|
||||
border: 1px solid #ddd;
|
||||
text-align: center;
|
||||
font-style: italic;
|
||||
color: #666;
|
||||
font-weight: 600;
|
||||
}
|
||||
}
|
||||
@@ -1,62 +1,72 @@
|
||||
/* PALETTE OPTION 1: BLUEPRINT & AMBER */
|
||||
:root {
|
||||
/* Light Theme Colors */
|
||||
--color-bg: #fff;
|
||||
--color-bg-secondary: #f8fafc;
|
||||
--color-bg-tertiary: #e2e8f0;
|
||||
--color-text: #1e293b;
|
||||
--color-text-secondary: #64748b;
|
||||
--color-border: #cbd5e1;
|
||||
--color-primary: #2563eb;
|
||||
--color-primary-hover: #1d4ed8;
|
||||
--color-accent: #059669;
|
||||
--color-accent-hover: #047857;
|
||||
/* Light Theme */
|
||||
--color-bg: #ffffff;
|
||||
--color-bg-secondary: #f1f5f9; /* Slate 100 */
|
||||
--color-bg-tertiary: #e2e8f0; /* Slate 200 */
|
||||
--color-text: #0f172a; /* Slate 900 */
|
||||
--color-text-secondary: #475569; /* Slate 600 */
|
||||
--color-border: #cbd5e1; /* Slate 300 */
|
||||
|
||||
--color-primary: #334155; /* Slate 700 - A strong, serious primary */
|
||||
--color-primary-hover: #1e293b; /* Slate 800 */
|
||||
|
||||
--color-accent: #b45309; /* A sharp, focused amber for highlights */
|
||||
--color-accent-hover: #92400e;
|
||||
|
||||
--color-warning: #d97706;
|
||||
--color-error: #dc2626;
|
||||
|
||||
/* Enhanced card type colors */
|
||||
--color-hosted: #7c3aed;
|
||||
--color-hosted-bg: #f3f0ff;
|
||||
--color-oss: #059669;
|
||||
--color-oss-bg: #ecfdf5;
|
||||
--color-method: #0891b2;
|
||||
--color-method-bg: #f0f9ff;
|
||||
--color-concept: #ea580c;
|
||||
--color-error: #be123c; /* A deeper, more serious red */
|
||||
|
||||
/* Card/Tag Category Colors */
|
||||
--color-hosted: #4f46e5; /* Indigo */
|
||||
--color-hosted-bg: #eef2ff;
|
||||
--color-oss: #0d9488; /* Teal */
|
||||
--color-oss-bg: #f0fdfa;
|
||||
--color-method: #0891b2; /* Cyan */
|
||||
--color-method-bg: #ecfeff;
|
||||
--color-concept: #c2410c; /* Orange */
|
||||
--color-concept-bg: #fff7ed;
|
||||
|
||||
/* Shadows */
|
||||
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 5%);
|
||||
--shadow-md: 0 4px 6px -1px rgb(0 0 0 / 10%);
|
||||
--shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 10%);
|
||||
|
||||
|
||||
/* Shadows (Crisper) */
|
||||
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 6%);
|
||||
--shadow-md: 0 3px 5px -1px rgb(0 0 0 / 8%);
|
||||
--shadow-lg: 0 8px 12px -3px rgb(0 0 0 / 10%);
|
||||
|
||||
/* Transitions */
|
||||
--transition-fast: all 0.2s ease;
|
||||
--transition-medium: all 0.3s ease;
|
||||
}
|
||||
|
||||
[data-theme="dark"] {
|
||||
--color-bg: #0f172a;
|
||||
--color-bg-secondary: #1e293b;
|
||||
--color-bg-tertiary: #334155;
|
||||
--color-text: #f1f5f9;
|
||||
--color-text-secondary: #94a3b8;
|
||||
--color-border: #475569;
|
||||
--color-primary: #3b82f6;
|
||||
--color-primary-hover: #60a5fa;
|
||||
--color-accent: #10b981;
|
||||
--color-accent-hover: #34d399;
|
||||
/* Dark Theme */
|
||||
--color-bg: #0f172a; /* Slate 900 */
|
||||
--color-bg-secondary: #1e293b; /* Slate 800 */
|
||||
--color-bg-tertiary: #334155; /* Slate 700 */
|
||||
--color-text: #f1f5f9; /* Slate 100 */
|
||||
--color-text-secondary: #94a3b8; /* Slate 400 */
|
||||
--color-border: #475569; /* Slate 600 */
|
||||
|
||||
--color-primary: #64748b; /* Slate 500 */
|
||||
--color-primary-hover: #94a3b8; /* Slate 400 */
|
||||
|
||||
--color-accent: #f59e0b; /* A brighter amber for dark mode contrast */
|
||||
--color-accent-hover: #fbbf24;
|
||||
|
||||
--color-warning: #f59e0b;
|
||||
--color-error: #f87171;
|
||||
|
||||
--color-hosted: #a855f7;
|
||||
--color-hosted-bg: #2e1065;
|
||||
--color-oss: #10b981;
|
||||
--color-oss-bg: #064e3b;
|
||||
--color-method: #0891b2;
|
||||
--color-error: #f43f5e;
|
||||
|
||||
/* Card/Tag Category Colors */
|
||||
--color-hosted: #818cf8; /* Indigo */
|
||||
--color-hosted-bg: #3730a3;
|
||||
--color-oss: #2dd4bf; /* Teal */
|
||||
--color-oss-bg: #115e59;
|
||||
--color-method: #22d3ee; /* Cyan */
|
||||
--color-method-bg: #164e63;
|
||||
--color-concept: #f97316;
|
||||
--color-concept: #fb923c; /* Orange */
|
||||
--color-concept-bg: #7c2d12;
|
||||
|
||||
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 30%);
|
||||
--shadow-md: 0 4px 6px -1px rgb(0 0 0 / 40%);
|
||||
--shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 50%);
|
||||
|
||||
/* Shadows (Subtler for dark mode) */
|
||||
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 20%);
|
||||
--shadow-md: 0 4px 6px -1px rgb(0 0 0 / 30%);
|
||||
--shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 40%);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
137
src/utils/aiService.ts
Normal file
137
src/utils/aiService.ts
Normal file
@@ -0,0 +1,137 @@
|
||||
// src/utils/aiService.ts
|
||||
import 'dotenv/config';
|
||||
|
||||
export interface AIServiceConfig {
|
||||
endpoint: string;
|
||||
apiKey: string;
|
||||
model: string;
|
||||
}
|
||||
|
||||
export interface AICallOptions {
|
||||
temperature?: number;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
export interface AIResponse {
|
||||
content: string;
|
||||
usage?: {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
class AIService {
|
||||
private config: AIServiceConfig;
|
||||
private defaultOptions: AICallOptions;
|
||||
|
||||
constructor() {
|
||||
this.config = {
|
||||
endpoint: this.getRequiredEnv('AI_ANALYZER_ENDPOINT'),
|
||||
apiKey: this.getRequiredEnv('AI_ANALYZER_API_KEY'),
|
||||
model: this.getRequiredEnv('AI_ANALYZER_MODEL')
|
||||
};
|
||||
|
||||
this.defaultOptions = {
|
||||
temperature: 0.3,
|
||||
timeout: 60000
|
||||
};
|
||||
|
||||
console.log('[AI-SERVICE] Initialized with model:', this.config.model);
|
||||
}
|
||||
|
||||
private getRequiredEnv(key: string): string {
|
||||
const value = process.env[key];
|
||||
if (!value) {
|
||||
throw new Error(`Missing required environment variable: ${key}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
async callAI(prompt: string, options: AICallOptions = {}): Promise<AIResponse> {
|
||||
const mergedOptions = { ...this.defaultOptions, ...options };
|
||||
|
||||
console.log('[AI-SERVICE] Making API call:', {
|
||||
promptLength: prompt.length,
|
||||
temperature: mergedOptions.temperature
|
||||
});
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
|
||||
if (this.config.apiKey) {
|
||||
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
|
||||
}
|
||||
|
||||
const requestBody = {
|
||||
model: this.config.model,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
temperature: mergedOptions.temperature
|
||||
};
|
||||
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), mergedOptions.timeout);
|
||||
|
||||
const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: controller.signal
|
||||
});
|
||||
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error('[AI-SERVICE] API Error:', response.status, errorText);
|
||||
throw new Error(`AI API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const content = data.choices?.[0]?.message?.content;
|
||||
|
||||
if (!content) {
|
||||
console.error('[AI-SERVICE] No response content from AI model');
|
||||
throw new Error('No response from AI model');
|
||||
}
|
||||
|
||||
console.log('[AI-SERVICE] API call successful:', {
|
||||
responseLength: content.length,
|
||||
usage: data.usage
|
||||
});
|
||||
|
||||
return {
|
||||
content: content.trim(),
|
||||
usage: data.usage
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
if (error.name === 'AbortError') {
|
||||
console.error('[AI-SERVICE] Request timeout');
|
||||
throw new Error('AI request timeout');
|
||||
}
|
||||
|
||||
console.error('[AI-SERVICE] API call failed:', error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async callMicroTaskAI(prompt: string): Promise<AIResponse> {
|
||||
return this.callAI(prompt, {
|
||||
temperature: 0.3,
|
||||
timeout: 30000
|
||||
});
|
||||
}
|
||||
|
||||
estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
getConfig(): AIServiceConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
}
|
||||
|
||||
export const aiService = new AIService();
|
||||
@@ -83,26 +83,21 @@ export const apiServerError = {
|
||||
};
|
||||
|
||||
export const apiSpecial = {
|
||||
// JSON parsing error
|
||||
invalidJSON: (): Response =>
|
||||
apiError.badRequest('Invalid JSON in request body'),
|
||||
|
||||
// Missing required fields
|
||||
missingRequired: (fields: string[]): Response =>
|
||||
apiError.badRequest(`Missing required fields: ${fields.join(', ')}`),
|
||||
|
||||
// Empty request body
|
||||
emptyBody: (): Response =>
|
||||
apiError.badRequest('Request body cannot be empty'),
|
||||
|
||||
// File upload responses
|
||||
uploadSuccess: (data: { url: string; filename: string; size: number; storage: string }): Response =>
|
||||
apiResponse.created(data),
|
||||
|
||||
uploadFailed: (error: string): Response =>
|
||||
apiServerError.internal(`Upload failed: ${error}`),
|
||||
|
||||
// Contribution responses
|
||||
contributionSuccess: (data: { prUrl?: string; branchName?: string; message: string }): Response =>
|
||||
apiResponse.created({ success: true, ...data }),
|
||||
|
||||
@@ -114,7 +109,6 @@ export const apiWithHeaders = {
|
||||
successWithHeaders: (data: any, headers: Record<string, string>): Response =>
|
||||
createAPIResponse(data, 200, headers),
|
||||
|
||||
// Redirect response
|
||||
redirect: (location: string, temporary: boolean = true): Response =>
|
||||
new Response(null, {
|
||||
status: temporary ? 302 : 301,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -52,22 +52,17 @@ function getEnv(key: string): string {
|
||||
|
||||
export function getSessionFromRequest(request: Request): string | null {
|
||||
const cookieHeader = request.headers.get('cookie');
|
||||
console.log('[DEBUG] Cookie header:', cookieHeader ? 'present' : 'missing');
|
||||
|
||||
if (!cookieHeader) return null;
|
||||
|
||||
const cookies = parseCookie(cookieHeader);
|
||||
console.log('[DEBUG] Parsed cookies:', Object.keys(cookies));
|
||||
console.log('[DEBUG] Session cookie found:', !!cookies.session);
|
||||
|
||||
return cookies.session || null;
|
||||
}
|
||||
|
||||
export async function verifySession(sessionToken: string): Promise<SessionData | null> {
|
||||
try {
|
||||
console.log('[DEBUG] Verifying session token, length:', sessionToken.length);
|
||||
const { payload } = await jwtVerify(sessionToken, SECRET_KEY);
|
||||
console.log('[DEBUG] JWT verification successful, payload keys:', Object.keys(payload));
|
||||
|
||||
if (
|
||||
typeof payload.userId === 'string' &&
|
||||
@@ -75,7 +70,6 @@ export async function verifySession(sessionToken: string): Promise<SessionData |
|
||||
typeof payload.authenticated === 'boolean' &&
|
||||
typeof payload.exp === 'number'
|
||||
) {
|
||||
console.log('[DEBUG] Session validation successful for user:', payload.userId);
|
||||
return {
|
||||
userId: payload.userId,
|
||||
email: payload.email,
|
||||
@@ -84,17 +78,14 @@ export async function verifySession(sessionToken: string): Promise<SessionData |
|
||||
};
|
||||
}
|
||||
|
||||
console.log('[DEBUG] Session payload validation failed, payload:', payload);
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.log('[DEBUG] Session verification failed:', error.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function createSession(userId: string, email: string): Promise<string> {
|
||||
const exp = Math.floor(Date.now() / 1000) + SESSION_DURATION;
|
||||
console.log('[DEBUG] Creating session for user:', userId, 'exp:', exp);
|
||||
|
||||
const token = await new SignJWT({
|
||||
userId,
|
||||
@@ -106,7 +97,6 @@ export async function createSession(userId: string, email: string): Promise<stri
|
||||
.setExpirationTime(exp)
|
||||
.sign(SECRET_KEY);
|
||||
|
||||
console.log('[DEBUG] Session token created, length:', token.length);
|
||||
return token;
|
||||
}
|
||||
|
||||
@@ -123,7 +113,6 @@ export function createSessionCookie(sessionToken: string): string {
|
||||
path: '/'
|
||||
});
|
||||
|
||||
console.log('[DEBUG] Created session cookie:', cookie.substring(0, 100) + '...');
|
||||
return cookie;
|
||||
}
|
||||
|
||||
@@ -292,8 +281,6 @@ export async function createSessionWithCookie(userInfo: UserInfo): Promise<{
|
||||
|
||||
export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'general'): Promise<AuthContext | Response> {
|
||||
const authRequired = getAuthRequirement(context);
|
||||
console.log(`[DEBUG PAGE] Auth required for ${context}:`, authRequired);
|
||||
console.log('[DEBUG PAGE] Request URL:', Astro.url.toString());
|
||||
|
||||
if (!authRequired) {
|
||||
return {
|
||||
@@ -305,10 +292,8 @@ export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'g
|
||||
}
|
||||
|
||||
const sessionToken = getSessionFromRequest(Astro.request);
|
||||
console.log('[DEBUG PAGE] Session token found:', !!sessionToken);
|
||||
|
||||
if (!sessionToken) {
|
||||
console.log('[DEBUG PAGE] No session token, redirecting to login');
|
||||
const loginUrl = `/api/auth/login?returnTo=${encodeURIComponent(Astro.url.toString())}`;
|
||||
return new Response(null, {
|
||||
status: 302,
|
||||
@@ -317,10 +302,8 @@ export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'g
|
||||
}
|
||||
|
||||
const session = await verifySession(sessionToken);
|
||||
console.log('[DEBUG PAGE] Session verification result:', !!session);
|
||||
|
||||
if (!session) {
|
||||
console.log('[DEBUG PAGE] Session verification failed, redirecting to login');
|
||||
const loginUrl = `/api/auth/login?returnTo=${encodeURIComponent(Astro.url.toString())}`;
|
||||
return new Response(null, {
|
||||
status: 302,
|
||||
@@ -328,7 +311,6 @@ export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'g
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[DEBUG PAGE] Page authentication successful for ${context}:`, session.userId);
|
||||
return {
|
||||
authenticated: true,
|
||||
session,
|
||||
@@ -354,10 +336,8 @@ export async function withAPIAuth(request: Request, context: AuthContextType = '
|
||||
}
|
||||
|
||||
const sessionToken = getSessionFromRequest(request);
|
||||
console.log(`[DEBUG API] Session token found for ${context}:`, !!sessionToken);
|
||||
|
||||
if (!sessionToken) {
|
||||
console.log(`[DEBUG API] No session token found for ${context}`);
|
||||
return {
|
||||
authenticated: false,
|
||||
userId: '',
|
||||
@@ -366,10 +346,8 @@ export async function withAPIAuth(request: Request, context: AuthContextType = '
|
||||
}
|
||||
|
||||
const session = await verifySession(sessionToken);
|
||||
console.log(`[DEBUG API] Session verification result for ${context}:`, !!session);
|
||||
|
||||
if (!session) {
|
||||
console.log(`[DEBUG API] Session verification failed for ${context}`);
|
||||
return {
|
||||
authenticated: false,
|
||||
userId: '',
|
||||
@@ -377,7 +355,6 @@ export async function withAPIAuth(request: Request, context: AuthContextType = '
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`[DEBUG API] Authentication successful for ${context}:`, session.userId);
|
||||
return {
|
||||
authenticated: true,
|
||||
userId: session.userId,
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
// src/utils/clientUtils.ts
|
||||
|
||||
|
||||
export function createToolSlug(toolName: string): string {
|
||||
if (!toolName || typeof toolName !== 'string') {
|
||||
console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
|
||||
console.warn('[CLIENT-UTILS] Invalid toolName provided to createToolSlug:', toolName);
|
||||
return '';
|
||||
}
|
||||
|
||||
@@ -30,6 +29,81 @@ export function isToolHosted(tool: any): boolean {
|
||||
tool.projectUrl.trim() !== "";
|
||||
}
|
||||
|
||||
export function sanitizeText(text: string): string {
|
||||
if (typeof text !== 'string') return '';
|
||||
|
||||
return text
|
||||
.replace(/^#{1,6}\s+/gm, '')
|
||||
.replace(/^\s*[-*+]\s+/gm, '')
|
||||
.replace(/^\s*\d+\.\s+/gm, '')
|
||||
.replace(/\*\*(.+?)\*\*/g, '$1')
|
||||
.replace(/\*(.+?)\*/g, '$1')
|
||||
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
|
||||
.replace(/```[\s\S]*?```/g, '[CODE BLOCK]')
|
||||
.replace(/`([^`]+)`/g, '$1')
|
||||
.replace(/<[^>]+>/g, '')
|
||||
.replace(/\n\s*\n\s*\n/g, '\n\n')
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function escapeHtml(text: string): string {
|
||||
if (typeof text !== 'string') return String(text);
|
||||
const div = document.createElement('div');
|
||||
div.textContent = text;
|
||||
return div.innerHTML;
|
||||
}
|
||||
|
||||
export function truncateText(text: string, maxLength: number): string {
|
||||
if (!text || text.length <= maxLength) return text;
|
||||
return text.slice(0, maxLength) + '...';
|
||||
}
|
||||
|
||||
export function summarizeData(data: any): string {
|
||||
if (data === null || data === undefined) return 'null';
|
||||
if (typeof data === 'string') {
|
||||
return data.length > 100 ? data.slice(0, 100) + '...' : data;
|
||||
}
|
||||
if (typeof data === 'number' || typeof data === 'boolean') {
|
||||
return data.toString();
|
||||
}
|
||||
if (Array.isArray(data)) {
|
||||
if (data.length === 0) return '[]';
|
||||
if (data.length <= 3) return JSON.stringify(data);
|
||||
return `[${data.slice(0, 3).map(i => typeof i === 'string' ? i : JSON.stringify(i)).join(', ')}, ...+${data.length - 3}]`;
|
||||
}
|
||||
if (typeof data === 'object') {
|
||||
const keys = Object.keys(data);
|
||||
if (keys.length === 0) return '{}';
|
||||
if (keys.length <= 3) {
|
||||
return '{' + keys.map(k => `${k}: ${typeof data[k] === 'string' ? data[k].slice(0, 20) + (data[k].length > 20 ? '...' : '') : JSON.stringify(data[k])}`).join(', ') + '}';
|
||||
}
|
||||
return `{${keys.slice(0, 3).join(', ')}, ...+${keys.length - 3} keys}`;
|
||||
}
|
||||
return String(data);
|
||||
}
|
||||
|
||||
export function formatDuration(ms: number): string {
|
||||
if (ms < 1000) return '< 1s';
|
||||
if (ms < 60000) return `${Math.ceil(ms / 1000)}s`;
|
||||
const minutes = Math.floor(ms / 60000);
|
||||
const seconds = Math.ceil((ms % 60000) / 1000);
|
||||
return seconds > 0 ? `${minutes}m ${seconds}s` : `${minutes}m`;
|
||||
}
|
||||
|
||||
export function showElement(element: HTMLElement | null): void {
|
||||
if (element) {
|
||||
element.style.display = 'block';
|
||||
element.classList.remove('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
export function hideElement(element: HTMLElement | null): void {
|
||||
if (element) {
|
||||
element.style.display = 'none';
|
||||
element.classList.add('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
interface AutocompleteOptions {
|
||||
minLength?: number;
|
||||
maxResults?: number;
|
||||
@@ -202,7 +276,7 @@ export class AutocompleteManager {
|
||||
|
||||
defaultRender(item: any): string {
|
||||
const text = typeof item === 'string' ? item : item.name || item.label || item.toString();
|
||||
return `<div class="autocomplete-item">${this.escapeHtml(text)}</div>`;
|
||||
return `<div class="autocomplete-item">${escapeHtml(text)}</div>`;
|
||||
}
|
||||
|
||||
renderDropdown(): void {
|
||||
@@ -284,8 +358,8 @@ export class AutocompleteManager {
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
">
|
||||
${this.escapeHtml(item)}
|
||||
<button type="button" class="autocomplete-remove" data-item="${this.escapeHtml(item)}" style="
|
||||
${escapeHtml(item)}
|
||||
<button type="button" class="autocomplete-remove" data-item="${escapeHtml(item)}" style="
|
||||
background: none;
|
||||
border: none;
|
||||
color: white;
|
||||
@@ -327,12 +401,6 @@ export class AutocompleteManager {
|
||||
this.selectedIndex = -1;
|
||||
}
|
||||
|
||||
escapeHtml(text: string): string {
|
||||
const div = document.createElement('div');
|
||||
div.textContent = text;
|
||||
return div.innerHTML;
|
||||
}
|
||||
|
||||
setDataSource(newDataSource: any[]): void {
|
||||
this.dataSource = newDataSource;
|
||||
}
|
||||
|
||||
225
src/utils/confidenceScoring.ts
Normal file
225
src/utils/confidenceScoring.ts
Normal file
@@ -0,0 +1,225 @@
|
||||
// src/utils/confidenceScoring.ts
|
||||
import { isToolHosted } from './clientUtils.js';
|
||||
import 'dotenv/config';
|
||||
|
||||
export interface ConfidenceMetrics {
|
||||
overall: number;
|
||||
semanticRelevance: number;
|
||||
taskSuitability: number;
|
||||
uncertaintyFactors: string[];
|
||||
strengthIndicators: string[];
|
||||
}
|
||||
|
||||
export interface ConfidenceConfig {
|
||||
semanticWeight: number;
|
||||
suitabilityWeight: number;
|
||||
minimumThreshold: number;
|
||||
mediumThreshold: number;
|
||||
highThreshold: number;
|
||||
}
|
||||
|
||||
export interface AnalysisContext {
|
||||
userQuery: string;
|
||||
mode: string;
|
||||
embeddingsSimilarities: Map<string, number>;
|
||||
selectedTools?: Array<{
|
||||
tool: any;
|
||||
phase: string;
|
||||
priority: string;
|
||||
justification?: string;
|
||||
taskRelevance?: number;
|
||||
limitations?: string[];
|
||||
}>;
|
||||
}
|
||||
|
||||
class ConfidenceScoring {
|
||||
private config: ConfidenceConfig;
|
||||
|
||||
constructor() {
|
||||
this.config = {
|
||||
semanticWeight: this.getEnvFloat('CONFIDENCE_SEMANTIC_WEIGHT', 0.3),
|
||||
suitabilityWeight: this.getEnvFloat('CONFIDENCE_SUITABILITY_WEIGHT', 0.7),
|
||||
minimumThreshold: this.getEnvInt('CONFIDENCE_MINIMUM_THRESHOLD', 40),
|
||||
mediumThreshold: this.getEnvInt('CONFIDENCE_MEDIUM_THRESHOLD', 60),
|
||||
highThreshold: this.getEnvInt('CONFIDENCE_HIGH_THRESHOLD', 80)
|
||||
};
|
||||
|
||||
console.log('[CONFIDENCE-SCORING] Initialized with restored config:', this.config);
|
||||
}
|
||||
|
||||
private getEnvFloat(key: string, defaultValue: number): number {
|
||||
const value = process.env[key];
|
||||
return value ? parseFloat(value) : defaultValue;
|
||||
}
|
||||
|
||||
private getEnvInt(key: string, defaultValue: number): number {
|
||||
const value = process.env[key];
|
||||
return value ? parseInt(value, 10) : defaultValue;
|
||||
}
|
||||
|
||||
calculateRecommendationConfidence(
|
||||
tool: any,
|
||||
context: AnalysisContext,
|
||||
taskRelevance: number = 70,
|
||||
limitations: string[] = []
|
||||
): ConfidenceMetrics {
|
||||
console.log('[CONFIDENCE-SCORING] Calculating confidence for tool:', tool.name);
|
||||
|
||||
const rawSemanticRelevance = context.embeddingsSimilarities.has(tool.name) ?
|
||||
context.embeddingsSimilarities.get(tool.name)! * 100 : 50;
|
||||
|
||||
let enhancedTaskSuitability = taskRelevance;
|
||||
|
||||
if (context.mode === 'workflow') {
|
||||
const toolSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name);
|
||||
if (toolSelection && tool.phases && Array.isArray(tool.phases) && tool.phases.includes(toolSelection.phase)) {
|
||||
const phaseBonus = Math.min(15, 100 - taskRelevance);
|
||||
enhancedTaskSuitability = Math.min(100, taskRelevance + phaseBonus);
|
||||
console.log('[CONFIDENCE-SCORING] Phase alignment bonus applied:', phaseBonus);
|
||||
}
|
||||
}
|
||||
|
||||
const overall = (
|
||||
rawSemanticRelevance * this.config.semanticWeight +
|
||||
enhancedTaskSuitability * this.config.suitabilityWeight
|
||||
);
|
||||
|
||||
const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, limitations, overall);
|
||||
const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall);
|
||||
|
||||
const result = {
|
||||
overall: Math.round(overall),
|
||||
semanticRelevance: Math.round(rawSemanticRelevance),
|
||||
taskSuitability: Math.round(enhancedTaskSuitability),
|
||||
uncertaintyFactors,
|
||||
strengthIndicators
|
||||
};
|
||||
|
||||
console.log('[CONFIDENCE-SCORING] Confidence calculated:', {
|
||||
tool: tool.name,
|
||||
overall: result.overall,
|
||||
semantic: result.semanticRelevance,
|
||||
task: result.taskSuitability,
|
||||
uncertaintyCount: uncertaintyFactors.length,
|
||||
strengthCount: strengthIndicators.length
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private identifyUncertaintyFactors(
|
||||
tool: any,
|
||||
context: AnalysisContext,
|
||||
limitations: string[],
|
||||
confidence: number
|
||||
): string[] {
|
||||
const factors: string[] = [];
|
||||
|
||||
if (limitations?.length > 0) {
|
||||
factors.push(...limitations.slice(0, 2));
|
||||
}
|
||||
|
||||
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
|
||||
if (similarity < 0.7) {
|
||||
factors.push('Geringe semantische Ähnlichkeit zur Anfrage');
|
||||
}
|
||||
|
||||
if (tool.skillLevel === 'expert' && /schnell|rapid|triage|urgent|sofort/i.test(context.userQuery)) {
|
||||
factors.push('Experten-Tool für zeitkritisches Szenario');
|
||||
}
|
||||
|
||||
if (tool.skillLevel === 'novice' && /komplex|erweitert|tiefgehend|advanced|forensisch/i.test(context.userQuery)) {
|
||||
factors.push('Einsteiger-Tool für komplexe Analyse');
|
||||
}
|
||||
|
||||
if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') {
|
||||
factors.push('Installation und Setup erforderlich');
|
||||
}
|
||||
|
||||
if (tool.license === 'Proprietary') {
|
||||
factors.push('Kommerzielle Software - Lizenzkosten zu beachten');
|
||||
}
|
||||
|
||||
if (confidence < 60) {
|
||||
factors.push('Moderate Gesamtbewertung - alternative Ansätze empfohlen');
|
||||
}
|
||||
|
||||
return factors.slice(0, 4);
|
||||
}
|
||||
|
||||
private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
|
||||
const indicators: string[] = [];
|
||||
|
||||
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
|
||||
if (similarity >= 0.7) {
|
||||
indicators.push('Sehr gute semantische Übereinstimmung mit Ihrer Anfrage');
|
||||
}
|
||||
|
||||
if (tool.knowledgebase === true) {
|
||||
indicators.push('Umfassende Dokumentation und Wissensbasis verfügbar');
|
||||
}
|
||||
|
||||
if (isToolHosted(tool)) {
|
||||
indicators.push('Sofort verfügbar über gehostete Lösung');
|
||||
}
|
||||
|
||||
if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') {
|
||||
indicators.push('Ausgewogenes Verhältnis zwischen Funktionalität und Benutzerfreundlichkeit');
|
||||
}
|
||||
|
||||
if (tool.type === 'method' && /methodik|vorgehen|prozess|ansatz/i.test(context.userQuery)) {
|
||||
indicators.push('Methodischer Ansatz passt zu Ihrer prozeduralen Anfrage');
|
||||
}
|
||||
|
||||
return indicators.slice(0, 4);
|
||||
}
|
||||
|
||||
calculateSelectionConfidence(result: any, candidateCount: number): number {
|
||||
if (!result?.selectedTools) {
|
||||
console.log('[CONFIDENCE-SCORING] No selected tools for confidence calculation');
|
||||
return 30;
|
||||
}
|
||||
|
||||
const selectionRatio = result.selectedTools.length / candidateCount;
|
||||
const hasReasoning = result.reasoning && result.reasoning.length > 50;
|
||||
|
||||
let confidence = 60;
|
||||
|
||||
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
|
||||
else if (selectionRatio <= 0.05) confidence -= 10;
|
||||
else confidence -= 15;
|
||||
|
||||
if (hasReasoning) confidence += 15;
|
||||
if (result.selectedConcepts?.length > 0) confidence += 5;
|
||||
|
||||
const finalConfidence = Math.min(95, Math.max(25, confidence));
|
||||
|
||||
console.log('[CONFIDENCE-SCORING] Selection confidence calculated:', {
|
||||
candidateCount,
|
||||
selectedCount: result.selectedTools.length,
|
||||
selectionRatio: selectionRatio.toFixed(3),
|
||||
hasReasoning,
|
||||
confidence: finalConfidence
|
||||
});
|
||||
|
||||
return finalConfidence;
|
||||
}
|
||||
|
||||
getConfidenceLevel(confidence: number): 'weak' | 'moderate' | 'strong' {
|
||||
if (confidence >= this.config.highThreshold) return 'strong';
|
||||
if (confidence >= this.config.mediumThreshold) return 'moderate';
|
||||
return 'weak';
|
||||
}
|
||||
|
||||
getConfidenceColor(confidence: number): string {
|
||||
if (confidence >= this.config.highThreshold) return 'var(--color-accent)';
|
||||
if (confidence >= this.config.mediumThreshold) return 'var(--color-warning)';
|
||||
return 'var(--color-error)';
|
||||
}
|
||||
|
||||
getConfig(): ConfidenceConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
}
|
||||
|
||||
export const confidenceScoring = new ConfidenceScoring();
|
||||
@@ -1,4 +1,4 @@
|
||||
// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
|
||||
// src/utils/dataService.ts
|
||||
import { promises as fs } from 'fs';
|
||||
import { load } from 'js-yaml';
|
||||
import path from 'path';
|
||||
@@ -85,7 +85,7 @@ let cachedData: ToolsData | null = null;
|
||||
let cachedRandomizedData: ToolsData | null = null;
|
||||
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
|
||||
let lastRandomizationDate: string | null = null;
|
||||
let dataVersion: string | null = null;
|
||||
let cachedToolsHash: string | null = null;
|
||||
|
||||
function seededRandom(seed: number): () => number {
|
||||
let x = Math.sin(seed) * 10000;
|
||||
@@ -110,17 +110,6 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
|
||||
return shuffled;
|
||||
}
|
||||
|
||||
function generateDataVersion(data: any): string {
|
||||
const str = JSON.stringify(data, Object.keys(data).sort());
|
||||
let hash = 0;
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
hash = ((hash << 5) - hash) + char;
|
||||
hash = hash & hash;
|
||||
}
|
||||
return Math.abs(hash).toString(36);
|
||||
}
|
||||
|
||||
async function loadRawData(): Promise<ToolsData> {
|
||||
if (!cachedData) {
|
||||
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
||||
@@ -142,8 +131,9 @@ async function loadRawData(): Promise<ToolsData> {
|
||||
};
|
||||
}
|
||||
|
||||
dataVersion = generateDataVersion(cachedData);
|
||||
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
|
||||
const { getToolsFileHash } = await import('./hashUtils.js');
|
||||
cachedToolsHash = await getToolsFileHash();
|
||||
console.log(`[DATA SERVICE] Loaded data with hash: ${cachedToolsHash.slice(0, 12)}...`);
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
@@ -234,7 +224,7 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
|
||||
}
|
||||
|
||||
export function getDataVersion(): string | null {
|
||||
return dataVersion;
|
||||
return cachedToolsHash;
|
||||
}
|
||||
|
||||
export function clearCache(): void {
|
||||
@@ -242,7 +232,7 @@ export function clearCache(): void {
|
||||
cachedRandomizedData = null;
|
||||
cachedCompressedData = null;
|
||||
lastRandomizationDate = null;
|
||||
dataVersion = null;
|
||||
cachedToolsHash = null;
|
||||
|
||||
console.log('[DATA SERVICE] Enhanced cache cleared');
|
||||
}
|
||||
@@ -1,11 +1,11 @@
|
||||
// src/utils/embeddings.ts
|
||||
// src/utils/embeddings.ts - Refactored
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import { getCompressedToolsDataForAI } from './dataService.js';
|
||||
import 'dotenv/config';
|
||||
import crypto from 'crypto';
|
||||
|
||||
interface EmbeddingData {
|
||||
export interface EmbeddingData {
|
||||
id: string;
|
||||
type: 'tool' | 'concept';
|
||||
name: string;
|
||||
@@ -20,14 +20,22 @@ interface EmbeddingData {
|
||||
};
|
||||
}
|
||||
|
||||
export interface SimilarityResult extends EmbeddingData {
|
||||
similarity: number;
|
||||
}
|
||||
|
||||
interface EmbeddingsDatabase {
|
||||
version: string;
|
||||
lastUpdated: number;
|
||||
embeddings: EmbeddingData[];
|
||||
}
|
||||
|
||||
interface SimilarityResult extends EmbeddingData {
|
||||
similarity: number;
|
||||
interface EmbeddingsConfig {
|
||||
endpoint?: string;
|
||||
apiKey?: string;
|
||||
model?: string;
|
||||
batchSize: number;
|
||||
batchDelay: number;
|
||||
}
|
||||
|
||||
class EmbeddingsService {
|
||||
@@ -35,48 +43,30 @@ class EmbeddingsService {
|
||||
private isInitialized = false;
|
||||
private initializationPromise: Promise<void> | null = null;
|
||||
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
|
||||
private readonly batchSize: number;
|
||||
private readonly batchDelay: number;
|
||||
private enabled: boolean = false;
|
||||
private config: EmbeddingsConfig;
|
||||
|
||||
constructor() {
|
||||
this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
|
||||
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
|
||||
|
||||
this.enabled = true;
|
||||
this.config = this.loadConfig();
|
||||
console.log('[EMBEDDINGS-SERVICE] Initialized:', {
|
||||
hasEndpoint: !!this.config.endpoint,
|
||||
hasModel: !!this.config.model
|
||||
});
|
||||
}
|
||||
|
||||
private async checkEnabledStatus(): Promise<void> {
|
||||
try {
|
||||
const envEnabled = process.env.AI_EMBEDDINGS_ENABLED;
|
||||
|
||||
if (envEnabled === 'true') {
|
||||
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
||||
const model = process.env.AI_EMBEDDINGS_MODEL;
|
||||
|
||||
if (!endpoint || !model) {
|
||||
console.warn('[EMBEDDINGS] Embeddings enabled but API configuration missing - disabling');
|
||||
this.enabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[EMBEDDINGS] All requirements met - enabling embeddings');
|
||||
this.enabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await fs.stat(this.embeddingsPath);
|
||||
console.log('[EMBEDDINGS] Existing embeddings file found - enabling');
|
||||
this.enabled = true;
|
||||
} catch {
|
||||
console.log('[EMBEDDINGS] Embeddings not explicitly enabled - disabling');
|
||||
this.enabled = false;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[EMBEDDINGS] Error checking enabled status:', error);
|
||||
this.enabled = false;
|
||||
}
|
||||
private loadConfig(): EmbeddingsConfig {
|
||||
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
||||
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
|
||||
const model = process.env.AI_EMBEDDINGS_MODEL;
|
||||
const batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
|
||||
const batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
|
||||
|
||||
return {
|
||||
endpoint,
|
||||
apiKey,
|
||||
model,
|
||||
batchSize,
|
||||
batchDelay
|
||||
};
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
@@ -93,63 +83,55 @@ class EmbeddingsService {
|
||||
}
|
||||
|
||||
private async performInitialization(): Promise<void> {
|
||||
await this.checkEnabledStatus();
|
||||
if (!this.enabled) {
|
||||
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
|
||||
return;
|
||||
}
|
||||
|
||||
const initStart = Date.now();
|
||||
|
||||
try {
|
||||
console.log('[EMBEDDINGS] Initializing embeddings system…');
|
||||
console.log('[EMBEDDINGS-SERVICE] Starting initialization');
|
||||
|
||||
/*if (!this.config.enabled) {
|
||||
console.log('[EMBEDDINGS-SERVICE] Service disabled via configuration');
|
||||
return;
|
||||
}*/
|
||||
|
||||
await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
|
||||
|
||||
const toolsData = await getCompressedToolsDataForAI();
|
||||
const currentDataHash = await this.hashToolsFile();
|
||||
const toolsData = await getCompressedToolsDataForAI();
|
||||
const { getToolsFileHash } = await import('./hashUtils.js');
|
||||
const currentDataHash = await getToolsFileHash();
|
||||
|
||||
const existing = await this.loadEmbeddings();
|
||||
console.log('[EMBEDDINGS] Current hash:', currentDataHash);
|
||||
console.log('[EMBEDDINGS] Existing file version:', existing?.version);
|
||||
console.log('[EMBEDDINGS] Existing embeddings length:', existing?.embeddings?.length);
|
||||
|
||||
const cacheIsUsable =
|
||||
existing &&
|
||||
const existing = await this.loadEmbeddings();
|
||||
|
||||
const cacheIsUsable = existing &&
|
||||
existing.version === currentDataHash &&
|
||||
Array.isArray(existing.embeddings) &&
|
||||
existing.embeddings.length > 0;
|
||||
|
||||
if (cacheIsUsable) {
|
||||
console.log('[EMBEDDINGS] Using cached embeddings');
|
||||
this.embeddings = existing.embeddings;
|
||||
console.log('[EMBEDDINGS-SERVICE] Using cached embeddings');
|
||||
this.embeddings = existing.embeddings;
|
||||
} else {
|
||||
console.log('[EMBEDDINGS] Generating new embeddings…');
|
||||
console.log('[EMBEDDINGS-SERVICE] Generating new embeddings');
|
||||
await this.generateEmbeddings(toolsData, currentDataHash);
|
||||
}
|
||||
|
||||
this.isInitialized = true;
|
||||
console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings in ${Date.now() - initStart} ms`);
|
||||
} catch (err) {
|
||||
console.error('[EMBEDDINGS] Failed to initialize:', err);
|
||||
console.log(`[EMBEDDINGS-SERVICE] Initialized successfully with ${this.embeddings.length} embeddings in ${Date.now() - initStart}ms`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('[EMBEDDINGS-SERVICE] Initialization failed:', error);
|
||||
this.isInitialized = false;
|
||||
throw err;
|
||||
throw error;
|
||||
} finally {
|
||||
this.initializationPromise = null;
|
||||
}
|
||||
}
|
||||
|
||||
private async hashToolsFile(): Promise<string> {
|
||||
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
|
||||
const raw = await fs.readFile(file, 'utf8');
|
||||
return crypto.createHash('sha256').update(raw).digest('hex');
|
||||
}
|
||||
|
||||
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
|
||||
try {
|
||||
const data = await fs.readFile(this.embeddingsPath, 'utf8');
|
||||
return JSON.parse(data);
|
||||
} catch (error) {
|
||||
console.log('[EMBEDDINGS] No existing embeddings found');
|
||||
console.log('[EMBEDDINGS-SERVICE] No existing embeddings file found');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -162,7 +144,7 @@ class EmbeddingsService {
|
||||
};
|
||||
|
||||
await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
|
||||
console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
|
||||
console.log(`[EMBEDDINGS-SERVICE] Saved ${this.embeddings.length} embeddings to disk`);
|
||||
}
|
||||
|
||||
private createContentString(item: any): string {
|
||||
@@ -178,30 +160,23 @@ class EmbeddingsService {
|
||||
}
|
||||
|
||||
private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
|
||||
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
||||
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
|
||||
const model = process.env.AI_EMBEDDINGS_MODEL;
|
||||
|
||||
if (!endpoint || !model) {
|
||||
const missing: string[] = [];
|
||||
if (!endpoint) missing.push('AI_EMBEDDINGS_ENDPOINT');
|
||||
if (!model) missing.push('AI_EMBEDDINGS_MODEL');
|
||||
throw new Error(`Missing embeddings API configuration: ${missing.join(', ')}`);
|
||||
if (!this.config.endpoint || !this.config.model) {
|
||||
throw new Error('Missing embeddings API configuration');
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
|
||||
if (apiKey) {
|
||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
||||
if (this.config.apiKey) {
|
||||
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
|
||||
}
|
||||
|
||||
const response = await fetch(endpoint, {
|
||||
const response = await fetch(this.config.endpoint, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
model: this.config.model,
|
||||
input: contents
|
||||
})
|
||||
});
|
||||
@@ -233,11 +208,16 @@ class EmbeddingsService {
|
||||
const contents = allItems.map(item => this.createContentString(item));
|
||||
this.embeddings = [];
|
||||
|
||||
for (let i = 0; i < contents.length; i += this.batchSize) {
|
||||
const batch = contents.slice(i, i + this.batchSize);
|
||||
const batchItems = allItems.slice(i, i + this.batchSize);
|
||||
console.log(`[EMBEDDINGS-SERVICE] Generating embeddings for ${contents.length} items`);
|
||||
|
||||
for (let i = 0; i < contents.length; i += this.config.batchSize) {
|
||||
const batch = contents.slice(i, i + this.config.batchSize);
|
||||
const batchItems = allItems.slice(i, i + this.config.batchSize);
|
||||
|
||||
console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
|
||||
const batchNumber = Math.ceil((i + 1) / this.config.batchSize);
|
||||
const totalBatches = Math.ceil(contents.length / this.config.batchSize);
|
||||
|
||||
console.log(`[EMBEDDINGS-SERVICE] Processing batch ${batchNumber}/${totalBatches}`);
|
||||
|
||||
try {
|
||||
const embeddings = await this.generateEmbeddingsBatch(batch);
|
||||
@@ -260,12 +240,12 @@ class EmbeddingsService {
|
||||
});
|
||||
});
|
||||
|
||||
if (i + this.batchSize < contents.length) {
|
||||
await new Promise(resolve => setTimeout(resolve, this.batchDelay));
|
||||
if (i + this.config.batchSize < contents.length) {
|
||||
await new Promise(resolve => setTimeout(resolve, this.config.batchDelay));
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
|
||||
console.error(`[EMBEDDINGS-SERVICE] Batch ${batchNumber} failed:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
@@ -273,18 +253,17 @@ class EmbeddingsService {
|
||||
await this.saveEmbeddings(version);
|
||||
}
|
||||
|
||||
public async embedText(text: string): Promise<number[]> {
|
||||
if (!this.enabled || !this.isInitialized) {
|
||||
async embedText(text: string): Promise<number[]> {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Embeddings service not available');
|
||||
}
|
||||
|
||||
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
|
||||
return embedding;
|
||||
}
|
||||
|
||||
async waitForInitialization(): Promise<void> {
|
||||
await this.checkEnabledStatus();
|
||||
|
||||
if (!this.enabled || this.isInitialized) {
|
||||
if (this.isInitialized) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
@@ -296,13 +275,6 @@ class EmbeddingsService {
|
||||
return this.initialize();
|
||||
}
|
||||
|
||||
async forceRecheckEnvironment(): Promise<void> {
|
||||
this.enabled = false;
|
||||
this.isInitialized = false;
|
||||
await this.checkEnabledStatus();
|
||||
console.log('[EMBEDDINGS] Environment status re-checked, enabled:', this.enabled);
|
||||
}
|
||||
|
||||
private cosineSimilarity(a: number[], b: number[]): number {
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
@@ -318,145 +290,62 @@ class EmbeddingsService {
|
||||
}
|
||||
|
||||
async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<SimilarityResult[]> {
|
||||
if (!this.enabled) {
|
||||
console.log('[EMBEDDINGS] Service disabled for similarity search');
|
||||
/*if (!this.config.enabled) {
|
||||
console.log('[EMBEDDINGS-SERVICE] Service disabled, returning empty results');
|
||||
return [];
|
||||
}*/
|
||||
|
||||
if (!this.isInitialized || this.embeddings.length === 0) {
|
||||
console.log('[EMBEDDINGS-SERVICE] Not initialized or no embeddings available');
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
if (this.isInitialized && this.embeddings.length > 0) {
|
||||
console.log(`[EMBEDDINGS] Using embeddings data for similarity search: ${query}`);
|
||||
|
||||
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
|
||||
const queryEmbedding = queryEmbeddings[0];
|
||||
console.log(`[EMBEDDINGS-SERVICE] Finding similar items for query: "${query}"`);
|
||||
|
||||
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
|
||||
const queryEmbedding = queryEmbeddings[0];
|
||||
|
||||
console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
|
||||
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
|
||||
...item,
|
||||
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
||||
}));
|
||||
|
||||
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
|
||||
...item,
|
||||
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
||||
}));
|
||||
const topScore = Math.max(...similarities.map(s => s.similarity));
|
||||
const dynamicThreshold = Math.max(threshold, topScore * 0.85);
|
||||
|
||||
const topScore = Math.max(...similarities.map(s => s.similarity));
|
||||
const dynamicCutOff = Math.max(threshold, topScore * 0.85);
|
||||
const results = similarities
|
||||
.filter(item => item.similarity >= dynamicThreshold)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, maxResults);
|
||||
|
||||
const results = similarities
|
||||
.filter(item => item.similarity >= dynamicCutOff)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, maxResults);
|
||||
|
||||
|
||||
const orderingValid = results.every((item, index) => {
|
||||
if (index === 0) return true;
|
||||
return item.similarity <= results[index - 1].similarity;
|
||||
console.log(`[EMBEDDINGS-SERVICE] Found ${results.length} similar items (threshold: ${dynamicThreshold.toFixed(3)})`);
|
||||
|
||||
if (results.length > 0) {
|
||||
console.log('[EMBEDDINGS-SERVICE] Top 5 matches:');
|
||||
results.slice(0, 5).forEach((item, idx) => {
|
||||
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
|
||||
});
|
||||
|
||||
if (!orderingValid) {
|
||||
console.error('[EMBEDDINGS] CRITICAL: Similarity ordering is broken!');
|
||||
results.forEach((item, idx) => {
|
||||
console.error(` ${idx}: ${item.name} = ${item.similarity.toFixed(4)}`);
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
|
||||
if (results.length > 0) {
|
||||
console.log('[EMBEDDINGS] Top 10 similarity matches:');
|
||||
results.slice(0, 10).forEach((item, idx) => {
|
||||
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
|
||||
});
|
||||
|
||||
const topSimilarity = results[0].similarity;
|
||||
const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
|
||||
if (hasHigherSimilarity) {
|
||||
console.error('[EMBEDDINGS] CRITICAL: Top result is not actually the highest similarity!');
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
|
||||
} else {
|
||||
console.log(`[EMBEDDINGS] No embeddings data, using fallback text matching: ${query}`);
|
||||
|
||||
const { getToolsData } = await import('./dataService.js');
|
||||
const toolsData = await getToolsData();
|
||||
|
||||
const queryLower = query.toLowerCase();
|
||||
const queryWords = queryLower.split(/\s+/).filter(w => w.length > 2);
|
||||
|
||||
const similarities: SimilarityResult[] = toolsData.tools
|
||||
.map((tool: any) => {
|
||||
let similarity = 0;
|
||||
|
||||
if (tool.name.toLowerCase().includes(queryLower)) {
|
||||
similarity += 0.8;
|
||||
}
|
||||
|
||||
if (tool.description && tool.description.toLowerCase().includes(queryLower)) {
|
||||
similarity += 0.6;
|
||||
}
|
||||
|
||||
if (tool.tags && Array.isArray(tool.tags)) {
|
||||
const matchingTags = tool.tags.filter((tag: string) =>
|
||||
tag.toLowerCase().includes(queryLower) || queryLower.includes(tag.toLowerCase())
|
||||
);
|
||||
if (tool.tags.length > 0) {
|
||||
similarity += (matchingTags.length / tool.tags.length) * 0.4;
|
||||
}
|
||||
}
|
||||
|
||||
const toolText = `${tool.name} ${tool.description || ''} ${(tool.tags || []).join(' ')}`.toLowerCase();
|
||||
const matchingWords = queryWords.filter(word => toolText.includes(word));
|
||||
if (queryWords.length > 0) {
|
||||
similarity += (matchingWords.length / queryWords.length) * 0.3;
|
||||
}
|
||||
|
||||
return {
|
||||
id: `tool_${tool.name.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase()}`,
|
||||
type: 'tool' as const,
|
||||
name: tool.name,
|
||||
content: toolText,
|
||||
embedding: [],
|
||||
metadata: {
|
||||
domains: tool.domains || [],
|
||||
phases: tool.phases || [],
|
||||
tags: tool.tags || [],
|
||||
skillLevel: tool.skillLevel,
|
||||
type: tool.type
|
||||
},
|
||||
similarity: Math.min(similarity, 1.0)
|
||||
};
|
||||
})
|
||||
.filter(item => item.similarity >= threshold)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, maxResults);
|
||||
|
||||
console.log(`[EMBEDDINGS] Fallback found ${similarities.length} similar items`);
|
||||
return similarities;
|
||||
}
|
||||
|
||||
return results;
|
||||
|
||||
} catch (error) {
|
||||
console.error('[EMBEDDINGS] Failed to find similar items:', error);
|
||||
console.error('[EMBEDDINGS-SERVICE] Similarity search failed:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
isEnabled(): boolean {
|
||||
if (!this.enabled && !this.isInitialized) {
|
||||
this.checkEnabledStatus().catch(console.error);
|
||||
}
|
||||
|
||||
return this.enabled;
|
||||
}
|
||||
|
||||
getStats(): { enabled: boolean; initialized: boolean; count: number } {
|
||||
getStats(): {initialized: boolean; count: number } {
|
||||
return {
|
||||
enabled: this.enabled,
|
||||
initialized: this.isInitialized,
|
||||
count: this.embeddings.length
|
||||
};
|
||||
}
|
||||
|
||||
getConfig(): EmbeddingsConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
}
|
||||
|
||||
const embeddingsService = new EmbeddingsService();
|
||||
|
||||
export { embeddingsService, type EmbeddingData, type SimilarityResult };
|
||||
export const embeddingsService = new EmbeddingsService();
|
||||
20
src/utils/hashUtils.ts
Normal file
20
src/utils/hashUtils.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
// src/utils/hashUtils.ts
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
|
||||
export async function getToolsFileHash(): Promise<string> {
|
||||
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
|
||||
const raw = await fs.readFile(file, 'utf8');
|
||||
return crypto.createHash('sha256').update(raw).digest('hex');
|
||||
}
|
||||
|
||||
export function getToolsFileHashSync(): string | null {
|
||||
try {
|
||||
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
|
||||
const raw = require('fs').readFileSync(file, 'utf8');
|
||||
return crypto.createHash('sha256').update(raw).digest('hex');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
356
src/utils/jsonUtils.ts
Normal file
356
src/utils/jsonUtils.ts
Normal file
@@ -0,0 +1,356 @@
|
||||
// src/utils/jsonUtils.ts
|
||||
export class JSONParser {
|
||||
static safeParseJSON(jsonString: string, fallback: any = null): any {
|
||||
try {
|
||||
let cleaned = jsonString.trim();
|
||||
|
||||
const jsonBlockPatterns = [
|
||||
/```json\s*([\s\S]*?)\s*```/i,
|
||||
/```\s*([\s\S]*?)\s*```/i,
|
||||
/\{[\s\S]*\}/,
|
||||
];
|
||||
|
||||
for (const pattern of jsonBlockPatterns) {
|
||||
const match = cleaned.match(pattern);
|
||||
if (match) {
|
||||
cleaned = match[1] || match[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
|
||||
console.warn('[JSON-PARSER] JSON appears truncated, attempting recovery');
|
||||
cleaned = this.repairTruncatedJSON(cleaned);
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(cleaned);
|
||||
|
||||
if (parsed && typeof parsed === 'object') {
|
||||
if (!parsed.selectedTools) parsed.selectedTools = [];
|
||||
if (!parsed.selectedConcepts) parsed.selectedConcepts = [];
|
||||
if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
|
||||
if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
|
||||
}
|
||||
|
||||
return parsed;
|
||||
|
||||
} catch (error) {
|
||||
console.warn('[JSON-PARSER] JSON parsing failed:', error.message);
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
private static repairTruncatedJSON(cleaned: string): string {
|
||||
let braceCount = 0;
|
||||
let bracketCount = 0;
|
||||
let inString = false;
|
||||
let escaped = false;
|
||||
let lastCompleteStructure = '';
|
||||
|
||||
for (let i = 0; i < cleaned.length; i++) {
|
||||
const char = cleaned[i];
|
||||
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '\\') {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '"' && !escaped) {
|
||||
inString = !inString;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inString) {
|
||||
if (char === '{') braceCount++;
|
||||
if (char === '}') braceCount--;
|
||||
if (char === '[') bracketCount++;
|
||||
if (char === ']') bracketCount--;
|
||||
|
||||
if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
|
||||
lastCompleteStructure = cleaned.substring(0, i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (lastCompleteStructure) {
|
||||
return lastCompleteStructure;
|
||||
} else {
|
||||
if (braceCount > 0) cleaned += '}';
|
||||
if (bracketCount > 0) cleaned += ']';
|
||||
return cleaned;
|
||||
}
|
||||
}
|
||||
|
||||
static extractToolsFromMalformedJSON(jsonString: string): { selectedTools: string[]; selectedConcepts: string[] } {
|
||||
const selectedTools: string[] = [];
|
||||
const selectedConcepts: string[] = [];
|
||||
|
||||
const toolsMatch = jsonString.match(/"selectedTools"\s*:\s*\[([\s\S]*?)\]/i);
|
||||
if (toolsMatch) {
|
||||
const toolMatches = toolsMatch[1].match(/"([^"]+)"/g);
|
||||
if (toolMatches) {
|
||||
selectedTools.push(...toolMatches.map(match => match.replace(/"/g, '')));
|
||||
}
|
||||
}
|
||||
|
||||
const conceptsMatch = jsonString.match(/"selectedConcepts"\s*:\s*\[([\s\S]*?)\]/i);
|
||||
if (conceptsMatch) {
|
||||
const conceptMatches = conceptsMatch[1].match(/"([^"]+)"/g);
|
||||
if (conceptMatches) {
|
||||
selectedConcepts.push(...conceptMatches.map(match => match.replace(/"/g, '')));
|
||||
}
|
||||
}
|
||||
|
||||
if (selectedTools.length === 0 && selectedConcepts.length === 0) {
|
||||
const allMatches = jsonString.match(/"([^"]+)"/g);
|
||||
if (allMatches) {
|
||||
const possibleNames = allMatches
|
||||
.map(match => match.replace(/"/g, ''))
|
||||
.filter(name =>
|
||||
name.length > 2 &&
|
||||
!['selectedTools', 'selectedConcepts', 'reasoning'].includes(name) &&
|
||||
!name.includes(':') &&
|
||||
!name.match(/^\d+$/)
|
||||
)
|
||||
.slice(0, 15);
|
||||
|
||||
selectedTools.push(...possibleNames);
|
||||
}
|
||||
}
|
||||
|
||||
return { selectedTools, selectedConcepts };
|
||||
}
|
||||
|
||||
static secureParseJSON(jsonString: string, maxSize: number = 10 * 1024 * 1024): any {
|
||||
if (typeof jsonString !== 'string') {
|
||||
throw new Error('Input must be a string');
|
||||
}
|
||||
|
||||
if (jsonString.length > maxSize) {
|
||||
throw new Error(`JSON string too large (${jsonString.length} bytes, max ${maxSize})`);
|
||||
}
|
||||
|
||||
const suspiciousPatterns = [
|
||||
/<script/i,
|
||||
/javascript:/i,
|
||||
/eval\(/i,
|
||||
/function\s*\(/i,
|
||||
/__proto__/i,
|
||||
/constructor/i
|
||||
];
|
||||
|
||||
for (const pattern of suspiciousPatterns) {
|
||||
if (pattern.test(jsonString)) {
|
||||
throw new Error('Potentially malicious content detected in JSON');
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(jsonString);
|
||||
|
||||
if (typeof parsed !== 'object' || parsed === null) {
|
||||
throw new Error('JSON must be an object');
|
||||
}
|
||||
|
||||
return parsed;
|
||||
} catch (error) {
|
||||
if (error instanceof SyntaxError) {
|
||||
throw new Error(`Invalid JSON syntax: ${error.message}`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static sanitizeForAudit(obj: any, maxDepth: number = 5, currentDepth: number = 0): any {
|
||||
if (currentDepth >= maxDepth) {
|
||||
return '[Max depth reached]';
|
||||
}
|
||||
|
||||
if (obj === null || obj === undefined) {
|
||||
return obj;
|
||||
}
|
||||
|
||||
if (typeof obj === 'string') {
|
||||
if (obj.length > 500) {
|
||||
return obj.slice(0, 500) + '...[truncated]';
|
||||
}
|
||||
return obj.replace(/<script[\s\S]*?<\/script>/gi, '[script removed]');
|
||||
}
|
||||
|
||||
if (typeof obj === 'number' || typeof obj === 'boolean') {
|
||||
return obj;
|
||||
}
|
||||
|
||||
if (Array.isArray(obj)) {
|
||||
if (obj.length > 20) {
|
||||
return [
|
||||
...obj.slice(0, 20).map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1)),
|
||||
`...[${obj.length - 20} more items]`
|
||||
];
|
||||
}
|
||||
return obj.map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1));
|
||||
}
|
||||
|
||||
if (typeof obj === 'object') {
|
||||
const keys = Object.keys(obj);
|
||||
if (keys.length > 50) {
|
||||
const sanitized: any = {};
|
||||
keys.slice(0, 50).forEach(key => {
|
||||
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
|
||||
});
|
||||
sanitized['[truncated]'] = `${keys.length - 50} more properties`;
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
const sanitized: any = {};
|
||||
keys.forEach(key => {
|
||||
if (['__proto__', 'constructor', 'prototype'].includes(key)) {
|
||||
return;
|
||||
}
|
||||
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
|
||||
});
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
return String(obj);
|
||||
}
|
||||
|
||||
static validateAuditExportStructure(data: any): { isValid: boolean; errors: string[] } {
|
||||
const errors: string[] = [];
|
||||
|
||||
if (!data || typeof data !== 'object') {
|
||||
errors.push('Export data must be an object');
|
||||
return { isValid: false, errors };
|
||||
}
|
||||
|
||||
const requiredProps = ['metadata', 'recommendation', 'auditTrail'];
|
||||
for (const prop of requiredProps) {
|
||||
if (!(prop in data)) {
|
||||
errors.push(`Missing required property: ${prop}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (data.metadata && typeof data.metadata === 'object') {
|
||||
const requiredMetadataProps = ['timestamp', 'version', 'userQuery', 'mode'];
|
||||
for (const prop of requiredMetadataProps) {
|
||||
if (!(prop in data.metadata)) {
|
||||
errors.push(`Missing required metadata property: ${prop}`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
errors.push('Invalid metadata structure');
|
||||
}
|
||||
|
||||
if (!Array.isArray(data.auditTrail)) {
|
||||
errors.push('auditTrail must be an array');
|
||||
} else {
|
||||
data.auditTrail.forEach((entry: any, index: number) => {
|
||||
if (!entry || typeof entry !== 'object') {
|
||||
errors.push(`Audit entry ${index} is not a valid object`);
|
||||
return;
|
||||
}
|
||||
|
||||
const requiredEntryProps = ['timestamp', 'phase', 'action', 'confidence', 'processingTimeMs'];
|
||||
for (const prop of requiredEntryProps) {
|
||||
if (!(prop in entry)) {
|
||||
errors.push(`Audit entry ${index} missing required property: ${prop}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
isValid: errors.length === 0,
|
||||
errors
|
||||
};
|
||||
}
|
||||
|
||||
static prepareAuditExport(
|
||||
recommendation: any,
|
||||
userQuery: string,
|
||||
mode: string,
|
||||
auditTrail: any[] = [],
|
||||
additionalMetadata: any = {}
|
||||
): any {
|
||||
return {
|
||||
metadata: {
|
||||
timestamp: new Date().toISOString(),
|
||||
version: "1.0",
|
||||
userQuery: userQuery.slice(0, 1000),
|
||||
mode,
|
||||
exportedBy: 'ForensicPathways',
|
||||
toolsDataHash: additionalMetadata.toolsDataHash || 'unknown',
|
||||
aiModel: additionalMetadata.aiModel || 'unknown',
|
||||
aiParameters: additionalMetadata.aiParameters || {},
|
||||
processingStats: additionalMetadata.processingStats || {}
|
||||
},
|
||||
recommendation: this.sanitizeForAudit(recommendation, 6),
|
||||
auditTrail: auditTrail.map(entry => this.sanitizeForAudit(entry, 4)),
|
||||
rawContext: {
|
||||
selectedTools: additionalMetadata.selectedTools || [],
|
||||
backgroundKnowledge: additionalMetadata.backgroundKnowledge || [],
|
||||
contextHistory: additionalMetadata.contextHistory || [],
|
||||
embeddingsSimilarities: additionalMetadata.embeddingsSimilarities || {}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static validateUploadedAnalysis(data: any): { isValid: boolean; issues: string[]; warnings: string[] } {
|
||||
const issues: string[] = [];
|
||||
const warnings: string[] = [];
|
||||
|
||||
const structureValidation = this.validateAuditExportStructure(data);
|
||||
if (!structureValidation.isValid) {
|
||||
issues.push(...structureValidation.errors);
|
||||
return { isValid: false, issues, warnings };
|
||||
}
|
||||
|
||||
if (data.metadata) {
|
||||
const timestamp = new Date(data.metadata.timestamp);
|
||||
if (isNaN(timestamp.getTime())) {
|
||||
warnings.push('Invalid timestamp in metadata');
|
||||
} else {
|
||||
const age = Date.now() - timestamp.getTime();
|
||||
const maxAge = 30 * 24 * 60 * 60 * 1000; // 30 days
|
||||
if (age > maxAge) {
|
||||
warnings.push(`Analysis is ${Math.floor(age / (24 * 60 * 60 * 1000))} days old`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!['workflow', 'tool'].includes(data.metadata.mode)) {
|
||||
warnings.push(`Unknown analysis mode: ${data.metadata.mode}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(data.auditTrail)) {
|
||||
const aiDecisions = data.auditTrail.filter(e => e.action === 'ai-decision').length;
|
||||
const toolSelections = data.auditTrail.filter(e => e.action === 'selection-decision').length;
|
||||
|
||||
if (aiDecisions === 0) {
|
||||
warnings.push('No AI decisions found in audit trail');
|
||||
}
|
||||
|
||||
if (toolSelections === 0) {
|
||||
warnings.push('No tool selections found in audit trail');
|
||||
}
|
||||
|
||||
const entriesWithConfidence = data.auditTrail.filter(e => typeof e.confidence === 'number').length;
|
||||
const confidenceRatio = entriesWithConfidence / data.auditTrail.length;
|
||||
|
||||
if (confidenceRatio < 0.8) {
|
||||
warnings.push(`Only ${Math.round(confidenceRatio * 100)}% of audit entries have confidence scores`);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
isValid: issues.length === 0,
|
||||
issues,
|
||||
warnings
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
// src/utils/nextcloud.ts
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
// src/utils/rateLimitedQueue.ts
|
||||
|
||||
import dotenv from "dotenv";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
@@ -3,37 +3,44 @@ import { visit } from 'unist-util-visit';
|
||||
import type { Plugin } from 'unified';
|
||||
import type { Root } from 'hast';
|
||||
|
||||
function escapeHtml(unsafe: string): string {
|
||||
if (typeof unsafe !== 'string') return '';
|
||||
|
||||
return unsafe
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
export const remarkVideoPlugin: Plugin<[], Root> = () => {
|
||||
return (tree: Root) => {
|
||||
visit(tree, 'html', (node: any, index: number | undefined, parent: any) => {
|
||||
if (node.value && node.value.includes('<video') && typeof index === 'number') {
|
||||
|
||||
const srcMatch = node.value.match(/src=["']([^"']+)["']/);
|
||||
const titleMatch = node.value.match(/title=["']([^"']+)["']/);
|
||||
|
||||
if (srcMatch) {
|
||||
const originalSrc = srcMatch[1];
|
||||
const title = titleMatch?.[1] || 'Video';
|
||||
|
||||
|
||||
const hasControls = node.value.includes('controls');
|
||||
const hasAutoplay = node.value.includes('autoplay');
|
||||
const hasMuted = node.value.includes('muted');
|
||||
const hasLoop = node.value.includes('loop');
|
||||
const hasPreload = node.value.match(/preload=["']([^"']+)["']/);
|
||||
const preloadMatch = node.value.match(/preload=["']([^"']+)["']/);
|
||||
|
||||
const enhancedHTML = `
|
||||
<div class="video-container aspect-16-9">
|
||||
<div class="video-container">
|
||||
<video
|
||||
src="${escapeHtml(originalSrc)}"
|
||||
${hasControls ? 'controls' : ''}
|
||||
${hasAutoplay ? 'autoplay' : ''}
|
||||
${hasMuted ? 'muted' : ''}
|
||||
${hasLoop ? 'loop' : ''}
|
||||
${hasPreload ? `preload="${hasPreload[1]}"` : 'preload="metadata"'}
|
||||
style="width: 100%; height: 100%;"
|
||||
${preloadMatch ? `preload="${preloadMatch[1]}"` : 'preload="metadata"'}
|
||||
data-video-title="${escapeHtml(title)}"
|
||||
data-original-src="${escapeHtml(originalSrc)}"
|
||||
>
|
||||
<p>Your browser does not support the video element.</p>
|
||||
</video>
|
||||
@@ -46,23 +53,31 @@ export const remarkVideoPlugin: Plugin<[], Root> = () => {
|
||||
`.trim();
|
||||
|
||||
parent.children[index] = { type: 'html', value: enhancedHTML };
|
||||
|
||||
console.log(`[VIDEO] Processed: ${title}`);
|
||||
console.log(`[VIDEO] Final URL: ${originalSrc}`);
|
||||
console.log(`[VIDEO] Enhanced: ${title} (${originalSrc})`);
|
||||
}
|
||||
}
|
||||
|
||||
if (node.value && node.value.includes('<iframe') && typeof index === 'number' && parent) {
|
||||
|
||||
if (node.value.includes('video-container')) {
|
||||
return;
|
||||
}
|
||||
|
||||
const titleMatch = node.value.match(/title=["']([^"']+)["']/);
|
||||
const title = titleMatch?.[1] || 'Embedded Video';
|
||||
|
||||
const enhancedHTML = `
|
||||
<div class="video-container">
|
||||
${node.value}
|
||||
</div>
|
||||
<div class="video-metadata">
|
||||
<div class="video-title">${escapeHtml(title)}</div>
|
||||
</div>
|
||||
`.trim();
|
||||
|
||||
parent.children[index] = { type: 'html', value: enhancedHTML };
|
||||
console.log(`[VIDEO] Enhanced iframe: ${title}`);
|
||||
}
|
||||
});
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
function escapeHtml(unsafe: string): string {
|
||||
if (typeof unsafe !== 'string') return '';
|
||||
|
||||
return unsafe
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
};
|
||||
@@ -1,22 +0,0 @@
|
||||
// src/utils/toolHelpers.ts
|
||||
|
||||
export interface Tool {
|
||||
name: string;
|
||||
type?: 'software' | 'method' | 'concept';
|
||||
projectUrl?: string | null;
|
||||
license?: string;
|
||||
knowledgebase?: boolean;
|
||||
domains?: string[];
|
||||
phases?: string[];
|
||||
platforms?: string[];
|
||||
skillLevel?: string;
|
||||
description?: string;
|
||||
tags?: string[];
|
||||
related_concepts?: string[];
|
||||
}
|
||||
|
||||
export {
|
||||
createToolSlug,
|
||||
findToolByIdentifier,
|
||||
isToolHosted
|
||||
} from './clientUtils.js';
|
||||
372
src/utils/toolSelector.ts
Normal file
372
src/utils/toolSelector.ts
Normal file
@@ -0,0 +1,372 @@
|
||||
// src/utils/toolSelector.ts
|
||||
import { aiService } from './aiService.js';
|
||||
import { embeddingsService, type SimilarityResult } from './embeddings.js';
|
||||
import { confidenceScoring } from './confidenceScoring.js';
|
||||
import { JSONParser } from './jsonUtils.js';
|
||||
import { getPrompt } from '../config/prompts.js';
|
||||
import 'dotenv/config';
|
||||
|
||||
export interface ToolSelectionConfig {
|
||||
maxSelectedItems: number;
|
||||
embeddingCandidates: number;
|
||||
similarityThreshold: number;
|
||||
embeddingSelectionLimit: number;
|
||||
embeddingConceptsLimit: number;
|
||||
embeddingsMinTools: number;
|
||||
embeddingsMaxReductionRatio: number;
|
||||
methodSelectionRatio: number;
|
||||
softwareSelectionRatio: number;
|
||||
}
|
||||
|
||||
|
||||
export interface SelectionContext {
|
||||
userQuery: string;
|
||||
mode: string;
|
||||
embeddingsSimilarities: Map<string, number>;
|
||||
seenToolNames: Set<string>;
|
||||
selectedTools?: Array<{
|
||||
tool: any;
|
||||
phase: string;
|
||||
priority: string;
|
||||
justification?: string;
|
||||
taskRelevance?: number;
|
||||
limitations?: string[];
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface ToolSelectionResult {
|
||||
selectedTools: any[];
|
||||
selectedConcepts: any[];
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
class ToolSelector {
|
||||
private config: ToolSelectionConfig;
|
||||
|
||||
constructor() {
|
||||
this.config = {
|
||||
maxSelectedItems: this.getEnvInt('AI_MAX_SELECTED_ITEMS', 25),
|
||||
embeddingCandidates: this.getEnvInt('AI_EMBEDDING_CANDIDATES', 50),
|
||||
similarityThreshold: this.getEnvFloat('AI_SIMILARITY_THRESHOLD', 0.3),
|
||||
embeddingSelectionLimit: this.getEnvInt('AI_EMBEDDING_SELECTION_LIMIT', 30),
|
||||
embeddingConceptsLimit: this.getEnvInt('AI_EMBEDDING_CONCEPTS_LIMIT', 15),
|
||||
embeddingsMinTools: this.getEnvInt('AI_EMBEDDINGS_MIN_TOOLS', 8),
|
||||
embeddingsMaxReductionRatio: this.getEnvFloat('AI_EMBEDDINGS_MAX_REDUCTION_RATIO', 0.75),
|
||||
methodSelectionRatio: this.getEnvFloat('AI_METHOD_SELECTION_RATIO', 0.4),
|
||||
softwareSelectionRatio: this.getEnvFloat('AI_SOFTWARE_SELECTION_RATIO', 0.5),
|
||||
};
|
||||
console.log('[TOOL-SELECTOR] Initialized with config:', this.config);
|
||||
}
|
||||
|
||||
private getEnvInt(key: string, defaultValue: number): number {
|
||||
const value = process.env[key];
|
||||
return value ? parseInt(value, 10) : defaultValue;
|
||||
}
|
||||
|
||||
private getEnvFloat(key: string, defaultValue: number): number {
|
||||
const value = process.env[key];
|
||||
return value ? parseFloat(value) : defaultValue;
|
||||
}
|
||||
|
||||
async getIntelligentCandidates(
|
||||
userQuery: string,
|
||||
toolsData: any,
|
||||
mode: string,
|
||||
context: SelectionContext
|
||||
): Promise<{
|
||||
tools: any[];
|
||||
concepts: any[];
|
||||
domains: any[];
|
||||
phases: any[];
|
||||
'domain-agnostic-software': any[];
|
||||
}> {
|
||||
console.log('[TOOL-SELECTOR] Getting intelligent candidates for query');
|
||||
|
||||
let candidateTools: any[] = [];
|
||||
let candidateConcepts: any[] = [];
|
||||
|
||||
context.embeddingsSimilarities.clear();
|
||||
|
||||
try {
|
||||
await embeddingsService.waitForInitialization();
|
||||
} catch (error) {
|
||||
console.error('[TOOL-SELECTOR] Embeddings initialization failed:', error);
|
||||
}
|
||||
|
||||
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
|
||||
|
||||
const embeddingsSearchStart = Date.now();
|
||||
|
||||
const similarItems = await embeddingsService.findSimilar(
|
||||
userQuery,
|
||||
this.config.embeddingCandidates,
|
||||
this.config.similarityThreshold
|
||||
) as SimilarityResult[];
|
||||
|
||||
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
|
||||
|
||||
const { auditService } = await import('./auditService.js');
|
||||
const { getDataVersion } = await import('./dataService.js');
|
||||
|
||||
const toolsDataHash = getDataVersion() || 'unknown';
|
||||
|
||||
auditService.addEmbeddingsSearch(
|
||||
userQuery,
|
||||
similarItems,
|
||||
this.config.similarityThreshold,
|
||||
embeddingsSearchStart,
|
||||
{
|
||||
toolsDataHash: toolsDataHash,
|
||||
selectionPhase: 'initial-candidate-selection',
|
||||
candidateLimit: this.config.embeddingCandidates,
|
||||
mode: mode,
|
||||
reasoning: `Initiale semantische Suche für ${mode}-Modus - Reduzierung der ${toolsData.tools.length} verfügbaren Tools auf ${similarItems.length} relevante Kandidaten`
|
||||
}
|
||||
);
|
||||
|
||||
similarItems.forEach(item => {
|
||||
context.embeddingsSimilarities.set(item.name, item.similarity);
|
||||
});
|
||||
|
||||
const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool]));
|
||||
const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept]));
|
||||
|
||||
const similarTools = similarItems
|
||||
.filter((item: any) => item.type === 'tool')
|
||||
.map((item: any) => toolsMap.get(item.name))
|
||||
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
|
||||
|
||||
const similarConcepts = similarItems
|
||||
.filter((item: any) => item.type === 'concept')
|
||||
.map((item: any) => conceptsMap.get(item.name))
|
||||
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
|
||||
|
||||
const totalAvailableTools = toolsData.tools.length;
|
||||
const reductionRatio = similarTools.length / totalAvailableTools;
|
||||
|
||||
if (similarTools.length >= this.config.embeddingsMinTools && reductionRatio <= this.config.embeddingsMaxReductionRatio) {
|
||||
candidateTools = similarTools;
|
||||
candidateConcepts = similarConcepts;
|
||||
|
||||
console.log('[TOOL-SELECTOR] Using embeddings filtering:', totalAvailableTools, '→', similarTools.length, 'tools');
|
||||
} else {
|
||||
console.log('[TOOL-SELECTOR] Embeddings filtering insufficient, using full dataset');
|
||||
candidateTools = toolsData.tools;
|
||||
candidateConcepts = toolsData.concepts;
|
||||
}
|
||||
|
||||
const selection = await this.performAISelection(
|
||||
userQuery,
|
||||
candidateTools,
|
||||
candidateConcepts,
|
||||
mode,
|
||||
context
|
||||
);
|
||||
|
||||
return {
|
||||
tools: selection.selectedTools,
|
||||
concepts: selection.selectedConcepts,
|
||||
domains: toolsData.domains,
|
||||
phases: toolsData.phases,
|
||||
'domain-agnostic-software': toolsData['domain-agnostic-software']
|
||||
};
|
||||
}
|
||||
|
||||
private async performAISelection(
|
||||
userQuery: string,
|
||||
candidateTools: any[],
|
||||
candidateConcepts: any[],
|
||||
mode: string,
|
||||
context: SelectionContext
|
||||
): Promise<ToolSelectionResult> {
|
||||
console.log('[TOOL-SELECTOR] Performing AI selection');
|
||||
|
||||
const candidateMethods = candidateTools.filter((t: any) => t && t.type === 'method');
|
||||
const candidateSoftware = candidateTools.filter((t: any) => t && t.type === 'software');
|
||||
|
||||
console.log('[TOOL-SELECTOR] Candidates:',
|
||||
candidateMethods.length, 'methods,',
|
||||
candidateSoftware.length, 'software,',
|
||||
candidateConcepts.length, 'concepts'
|
||||
);
|
||||
|
||||
const methodsWithFullData = candidateMethods.map(this.createToolData);
|
||||
const softwareWithFullData = candidateSoftware.map(this.createToolData);
|
||||
const conceptsWithFullData = candidateConcepts.map(this.createConceptData);
|
||||
|
||||
const maxTools = Math.min(this.config.embeddingSelectionLimit, candidateTools.length);
|
||||
const maxConcepts = Math.min(this.config.embeddingConceptsLimit, candidateConcepts.length);
|
||||
|
||||
const methodRatio = Math.max(0, Math.min(1, this.config.methodSelectionRatio));
|
||||
const softwareRatio = Math.max(0, Math.min(1, this.config.softwareSelectionRatio));
|
||||
|
||||
let methodLimit = Math.round(maxTools * methodRatio);
|
||||
let softwareLimit = Math.round(maxTools * softwareRatio);
|
||||
|
||||
if (methodLimit + softwareLimit > maxTools) {
|
||||
const scale = maxTools / (methodLimit + softwareLimit);
|
||||
methodLimit = Math.floor(methodLimit * scale);
|
||||
softwareLimit = Math.floor(softwareLimit * scale);
|
||||
}
|
||||
|
||||
const methodsPrimary = methodsWithFullData.slice(0, methodLimit);
|
||||
const softwarePrimary = softwareWithFullData.slice(0, softwareLimit);
|
||||
|
||||
const toolsToSend: any[] = [...methodsPrimary, ...softwarePrimary];
|
||||
|
||||
let mIdx = methodsPrimary.length;
|
||||
let sIdx = softwarePrimary.length;
|
||||
|
||||
while (toolsToSend.length < maxTools && (mIdx < methodsWithFullData.length || sIdx < softwareWithFullData.length)) {
|
||||
const remM = methodsWithFullData.length - mIdx;
|
||||
const remS = softwareWithFullData.length - sIdx;
|
||||
|
||||
if (remS >= remM && sIdx < softwareWithFullData.length) {
|
||||
toolsToSend.push(softwareWithFullData[sIdx++]);
|
||||
} else if (mIdx < methodsWithFullData.length) {
|
||||
toolsToSend.push(methodsWithFullData[mIdx++]);
|
||||
} else if (sIdx < softwareWithFullData.length) {
|
||||
toolsToSend.push(softwareWithFullData[sIdx++]);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
|
||||
|
||||
console.log('[TOOL-SELECTOR-DEBUG] maxTools:', maxTools, 'maxConcepts:', maxConcepts);
|
||||
console.log('[TOOL-SELECTOR] Sending to AI:',
|
||||
toolsToSend.filter((t: any) => t.type === 'method').length, 'methods,',
|
||||
toolsToSend.filter((t: any) => t.type === 'software').length, 'software,',
|
||||
conceptsToSend.length, 'concepts'
|
||||
);
|
||||
|
||||
const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
|
||||
const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
|
||||
|
||||
try {
|
||||
const response = await aiService.callAI(prompt);
|
||||
const result = JSONParser.safeParseJSON(response.content, null);
|
||||
|
||||
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
|
||||
console.error('[TOOL-SELECTOR] AI selection returned invalid structure');
|
||||
throw new Error('AI selection failed to return valid tool and concept selection');
|
||||
}
|
||||
|
||||
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
|
||||
if (totalSelected === 0) {
|
||||
throw new Error('AI selection returned empty selection');
|
||||
}
|
||||
|
||||
const toolsMap = new Map(candidateTools.map((tool: any) => [tool.name, tool]));
|
||||
const conceptsMap = new Map(candidateConcepts.map((concept: any) => [concept.name, concept]));
|
||||
|
||||
const selectedTools = result.selectedTools
|
||||
.map((name: string) => toolsMap.get(name))
|
||||
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
|
||||
|
||||
const selectedConcepts = result.selectedConcepts
|
||||
.map((name: string) => conceptsMap.get(name))
|
||||
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
|
||||
|
||||
const selectedMethods = selectedTools.filter((t: any) => t && t.type === 'method');
|
||||
const selectedSoftware = selectedTools.filter((t: any) => t && t.type === 'software');
|
||||
|
||||
console.log('[TOOL-SELECTOR] AI selected:',
|
||||
selectedMethods.length, 'methods,',
|
||||
selectedSoftware.length, 'software,',
|
||||
selectedConcepts.length, 'concepts'
|
||||
);
|
||||
|
||||
const confidence = confidenceScoring.calculateSelectionConfidence(
|
||||
result,
|
||||
candidateTools.length + candidateConcepts.length
|
||||
);
|
||||
|
||||
return { selectedTools, selectedConcepts, confidence };
|
||||
} catch (error) {
|
||||
console.error('[TOOL-SELECTOR] AI selection failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async selectToolsForPhase(
|
||||
userQuery: string,
|
||||
phase: any,
|
||||
availableTools: any[],
|
||||
context: SelectionContext
|
||||
): Promise<Array<{
|
||||
toolName: string;
|
||||
taskRelevance: number;
|
||||
justification: string;
|
||||
limitations: string[];
|
||||
}>> {
|
||||
console.log('[TOOL-SELECTOR] Selecting tools for phase:', phase.id);
|
||||
|
||||
if (availableTools.length === 0) {
|
||||
console.log('[TOOL-SELECTOR] No tools available for phase:', phase.id);
|
||||
return [];
|
||||
}
|
||||
|
||||
const prompt = getPrompt('phaseToolSelection', userQuery, phase, availableTools);
|
||||
|
||||
try {
|
||||
const response = await aiService.callMicroTaskAI(prompt);
|
||||
const selections = JSONParser.safeParseJSON(response.content, []);
|
||||
|
||||
if (Array.isArray(selections)) {
|
||||
const validSelections = selections.filter((sel: any) => {
|
||||
const matchingTool = availableTools.find((tool: any) => tool && tool.name === sel.toolName);
|
||||
if (!matchingTool) {
|
||||
console.warn('[TOOL-SELECTOR] Invalid tool selection for phase:', phase.id, sel.toolName);
|
||||
}
|
||||
return !!matchingTool;
|
||||
});
|
||||
|
||||
console.log('[TOOL-SELECTOR] Valid selections for phase:', phase.id, validSelections.length);
|
||||
return validSelections;
|
||||
}
|
||||
|
||||
return [];
|
||||
|
||||
} catch (error) {
|
||||
console.error('[TOOL-SELECTOR] Phase tool selection failed:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
private createToolData = (tool: any) => ({
|
||||
name: tool.name,
|
||||
type: tool.type,
|
||||
description: tool.description,
|
||||
domains: tool.domains,
|
||||
phases: tool.phases,
|
||||
platforms: tool.platforms || [],
|
||||
tags: tool.tags || [],
|
||||
skillLevel: tool.skillLevel,
|
||||
license: tool.license,
|
||||
accessType: tool.accessType,
|
||||
projectUrl: tool.projectUrl,
|
||||
knowledgebase: tool.knowledgebase,
|
||||
related_concepts: tool.related_concepts || [],
|
||||
related_software: tool.related_software || []
|
||||
});
|
||||
|
||||
private createConceptData = (concept: any) => ({
|
||||
name: concept.name,
|
||||
type: 'concept',
|
||||
description: concept.description,
|
||||
domains: concept.domains,
|
||||
phases: concept.phases,
|
||||
tags: concept.tags || [],
|
||||
skillLevel: concept.skillLevel,
|
||||
related_concepts: concept.related_concepts || [],
|
||||
related_software: concept.related_software || []
|
||||
});
|
||||
|
||||
getConfig(): ToolSelectionConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
}
|
||||
|
||||
export const toolSelector = new ToolSelector();
|
||||
@@ -1,115 +0,0 @@
|
||||
// src/utils/videoUtils.ts - SIMPLIFIED - Basic utilities only
|
||||
import 'dotenv/config';
|
||||
|
||||
|
||||
export interface SimpleVideoMetadata {
|
||||
title?: string;
|
||||
description?: string;
|
||||
}
|
||||
|
||||
export function getVideoMimeType(url: string): string {
|
||||
let extension: string | undefined;
|
||||
try {
|
||||
const pathname = new URL(url).pathname;
|
||||
extension = pathname.split('.').pop()?.toLowerCase();
|
||||
} catch {
|
||||
extension = url.split('?')[0].split('.').pop()?.toLowerCase();
|
||||
}
|
||||
|
||||
const mimeTypes: Record<string, string> = {
|
||||
mp4: 'video/mp4',
|
||||
webm: 'video/webm',
|
||||
ogg: 'video/ogg',
|
||||
mov: 'video/quicktime',
|
||||
avi: 'video/x-msvideo',
|
||||
m4v: 'video/m4v',
|
||||
mkv: 'video/x-matroska',
|
||||
flv: 'video/x-flv'
|
||||
};
|
||||
|
||||
return (extension && mimeTypes[extension]) || 'video/mp4';
|
||||
}
|
||||
|
||||
export function formatDuration(seconds: number): string {
|
||||
const hours = Math.floor(seconds / 3600);
|
||||
const minutes = Math.floor((seconds % 3600) / 60);
|
||||
const remainingSeconds = Math.floor(seconds % 60);
|
||||
|
||||
if (hours > 0) {
|
||||
return `${hours}:${minutes.toString().padStart(2, '0')}:${remainingSeconds.toString().padStart(2, '0')}`;
|
||||
}
|
||||
|
||||
return `${minutes}:${remainingSeconds.toString().padStart(2, '0')}`;
|
||||
}
|
||||
|
||||
export function formatFileSize(bytes: number): string {
|
||||
if (bytes < 1024) return `${bytes} B`;
|
||||
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
||||
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
||||
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
|
||||
}
|
||||
|
||||
export function escapeHtml(unsafe: string): string {
|
||||
if (typeof unsafe !== 'string') return '';
|
||||
|
||||
return unsafe
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
export function generateVideoHTML(
|
||||
src: string,
|
||||
options: {
|
||||
title?: string;
|
||||
controls?: boolean;
|
||||
autoplay?: boolean;
|
||||
muted?: boolean;
|
||||
loop?: boolean;
|
||||
preload?: 'none' | 'metadata' | 'auto';
|
||||
aspectRatio?: '16:9' | '4:3' | '1:1';
|
||||
showMetadata?: boolean;
|
||||
} = {}
|
||||
): string {
|
||||
const {
|
||||
title = 'Video',
|
||||
controls = true,
|
||||
autoplay = false,
|
||||
muted = false,
|
||||
loop = false,
|
||||
preload = 'metadata',
|
||||
aspectRatio = '16:9',
|
||||
showMetadata = true
|
||||
} = options;
|
||||
|
||||
const aspectClass = `aspect-${aspectRatio.replace(':', '-')}`;
|
||||
const videoAttributes = [
|
||||
controls ? 'controls' : '',
|
||||
autoplay ? 'autoplay' : '',
|
||||
muted ? 'muted' : '',
|
||||
loop ? 'loop' : '',
|
||||
`preload="${preload}"`
|
||||
].filter(Boolean).join(' ');
|
||||
|
||||
const metadataHTML = showMetadata && title !== 'Video' ? `
|
||||
<div class="video-metadata">
|
||||
<div class="video-title">${escapeHtml(title)}</div>
|
||||
</div>
|
||||
` : '';
|
||||
|
||||
return `
|
||||
<div class="video-container ${aspectClass}">
|
||||
<video
|
||||
src="${escapeHtml(src)}"
|
||||
${videoAttributes}
|
||||
style="width: 100%; height: 100%;"
|
||||
data-video-title="${escapeHtml(title)}"
|
||||
>
|
||||
<p>Your browser does not support the video element.</p>
|
||||
</video>
|
||||
${metadataHTML}
|
||||
</div>
|
||||
`.trim();
|
||||
}
|
||||
Reference in New Issue
Block a user