Compare commits
67 Commits
d6760d0f84
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bdee77f459 | ||
| 8a6d9d3324 | |||
|
|
dc9f52fb7c | ||
|
|
b17458d153 | ||
|
|
b14ca1d243 | ||
|
|
4ee1cc4984 | ||
|
|
bbe1b12251 | ||
|
|
d569b74a20 | ||
|
|
a2d3d3170a | ||
|
|
3823407d49 | ||
|
|
496f2a5b43 | ||
|
|
20a4c71d02 | ||
|
|
dad5e5ea0c | ||
|
|
b689f24502 | ||
|
|
630fc1643e | ||
|
|
1d750307c4 | ||
| 05d957324a | |||
|
|
6160620e24 | ||
|
|
1d91dbf478 | ||
|
|
76694e003c | ||
|
|
28af56d6ef | ||
|
|
3d5d2506e9 | ||
|
|
6b09eb062f | ||
|
|
70fb012d63 | ||
|
|
2cb25d1dd6 | ||
|
|
bcd92af8a0 | ||
|
|
5ecbabea90 | ||
|
|
07c8f707df | ||
|
|
e63ec367a5 | ||
|
|
5c3c308225 | ||
|
|
dd26d45a21 | ||
|
|
afbd8d2cd3 | ||
|
|
8bba0eefa9 | ||
|
|
170638a5fa | ||
|
|
c60730b4aa | ||
|
|
b9964685f9 | ||
|
|
5d72549bb7 | ||
|
|
15d302031e | ||
|
|
48209c4639 | ||
|
|
6d08dbdcd0 | ||
|
|
77f09ed399 | ||
|
|
0c7c502b03 | ||
|
|
1d98dd3257 | ||
|
|
3ad0d8120a | ||
|
|
88cf682790 | ||
|
|
182b9d01f9 | ||
|
|
12368ed7c8 | ||
|
|
c4c52f6064 | ||
|
|
e93f394263 | ||
|
|
75410e2b84 | ||
|
|
88e79d7780 | ||
| 8283b71b8c | |||
|
|
b630668897 | ||
|
|
479075e485 | ||
|
|
b6b3dfce8d | ||
|
|
9c2e43af22 | ||
|
|
6656c28ae0 | ||
|
|
6e9b7b4ea1 | ||
|
|
be76f2be5a | ||
| 4fd257cbd6 | |||
|
|
d1c297189d | ||
|
|
e8daa37d08 | ||
|
|
27b94edcfa | ||
|
|
b291492e2d | ||
|
|
0e3d654a58 | ||
|
|
2d920391ad | ||
|
|
f159f904f0 |
File diff suppressed because one or more lines are too long
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"_variables": {
|
||||
"lastUpdateCheck": 1754571688630
|
||||
"lastUpdateCheck": 1755901660216
|
||||
}
|
||||
}
|
||||
23
.env.example
23
.env.example
@@ -59,8 +59,7 @@ FORENSIC_AUDIT_RETENTION_HOURS=24
|
||||
FORENSIC_AUDIT_MAX_ENTRIES=50
|
||||
|
||||
# === AI SEMANTIC SEARCH ===
|
||||
# Enable semantic search (highly recommended for better results)
|
||||
AI_EMBEDDINGS_ENABLED=true
|
||||
# semantic search
|
||||
AI_EMBEDDINGS_ENDPOINT=https://api.mistral.ai/v1/embeddings
|
||||
AI_EMBEDDINGS_API_KEY=your-embeddings-api-key-here
|
||||
AI_EMBEDDINGS_MODEL=mistral-embed
|
||||
@@ -68,6 +67,16 @@ AI_EMBEDDINGS_MODEL=mistral-embed
|
||||
# User rate limiting (queries per minute)
|
||||
AI_RATE_LIMIT_MAX_REQUESTS=4
|
||||
|
||||
# ============================================================================
|
||||
# CACHING BEHAVIOR
|
||||
# ============================================================================
|
||||
# - Videos downloaded once, cached permanently
|
||||
# - No time-based expiration
|
||||
# - Dramatically improves loading times after first download
|
||||
# - Emergency cleanup only when approaching disk space limit
|
||||
# - Perfect for manually curated forensics training content
|
||||
# ============================================================================
|
||||
|
||||
# ============================================================================
|
||||
# 🎛️ PERFORMANCE TUNING - SENSIBLE DEFAULTS PROVIDED
|
||||
# ============================================================================
|
||||
@@ -91,17 +100,11 @@ AI_SOFTWARE_SELECTION_RATIO=0.5 # 50% software tools (increase for more tool re
|
||||
|
||||
# AI selection limits
|
||||
AI_MAX_SELECTED_ITEMS=25
|
||||
AI_MAX_TOOLS_TO_ANALYZE=20
|
||||
AI_MAX_CONCEPTS_TO_ANALYZE=10
|
||||
|
||||
# Efficiency thresholds
|
||||
AI_EMBEDDINGS_MIN_TOOLS=8
|
||||
AI_EMBEDDINGS_MAX_REDUCTION_RATIO=0.75
|
||||
|
||||
# Fallback limits when embeddings are disabled
|
||||
AI_NO_EMBEDDINGS_TOOL_LIMIT=25
|
||||
AI_NO_EMBEDDINGS_CONCEPT_LIMIT=10
|
||||
|
||||
# === Rate Limiting & Timing ===
|
||||
AI_MICRO_TASK_TOTAL_LIMIT=30
|
||||
AI_MICRO_TASK_DELAY_MS=500
|
||||
@@ -111,10 +114,6 @@ AI_RATE_LIMIT_DELAY_MS=2000
|
||||
AI_EMBEDDINGS_BATCH_SIZE=10
|
||||
AI_EMBEDDINGS_BATCH_DELAY_MS=1000
|
||||
|
||||
# === Context Management ===
|
||||
AI_MAX_CONTEXT_TOKENS=4000
|
||||
AI_MAX_PROMPT_TOKENS=2500
|
||||
|
||||
# === Confidence Scoring ===
|
||||
CONFIDENCE_SEMANTIC_WEIGHT=0.5
|
||||
CONFIDENCE_SUITABILITY_WEIGHT=0.5
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -87,3 +87,6 @@ temp/
|
||||
.astro/content.d.ts
|
||||
prompt.md
|
||||
.astro/settings.json
|
||||
|
||||
src/content/knowledgebase/
|
||||
public/videos
|
||||
16
README.md
16
README.md
@@ -250,10 +250,26 @@ Ihr Artikel-Inhalt hier...
|
||||
2. Installer ausführen
|
||||
3. Einstellungen konfigurieren
|
||||
|
||||
## Video-Demonstration
|
||||
<video src="/videos/setup-tutorial.mp4" title="Setup-Tutorial" controls></video>
|
||||
|
||||
## Häufige Probleme
|
||||
Lösungen für typische Probleme...
|
||||
```
|
||||
|
||||
### Video-Integration
|
||||
|
||||
Knowledgebase-Artikel unterstützen eingebettete Videos für praktische Demonstrationen:
|
||||
|
||||
```html
|
||||
<video src="/videos/demo.mp4" title="Tool-Demonstration" controls></video>
|
||||
```
|
||||
|
||||
**Wichtige Hinweise**:
|
||||
- Videos müssen manuell in `public/videos/` bereitgestellt werden (nicht im Git-Repository enthalten)
|
||||
- Firefox-kompatible Formate verwenden (MP4 H.264, WebM VP9)
|
||||
- Detaillierte Video-Dokumentation: siehe `src/content/knowledgebase/README.md`
|
||||
|
||||
### Artikel-Struktur-Richtlinien
|
||||
|
||||
**Erforderliche Felder**:
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { defineConfig } from 'astro/config';
|
||||
import node from '@astrojs/node';
|
||||
import { remarkVideoPlugin } from './src/utils/remarkVideoPlugin.ts';
|
||||
|
||||
export default defineConfig({
|
||||
output: 'server',
|
||||
@@ -7,6 +8,13 @@ export default defineConfig({
|
||||
mode: 'standalone'
|
||||
}),
|
||||
|
||||
markdown: {
|
||||
remarkPlugins: [
|
||||
remarkVideoPlugin
|
||||
],
|
||||
extendDefaultPlugins: true
|
||||
},
|
||||
|
||||
build: {
|
||||
assets: '_astro'
|
||||
},
|
||||
@@ -16,4 +24,4 @@ export default defineConfig({
|
||||
host: true
|
||||
},
|
||||
allowImportingTsExtensions: true
|
||||
});
|
||||
});
|
||||
381146
data/embeddings.json
381146
data/embeddings.json
File diff suppressed because it is too large
Load Diff
83
embedding-test-config.json
Normal file
83
embedding-test-config.json
Normal file
@@ -0,0 +1,83 @@
|
||||
{
|
||||
"toolsYamlPath": "./src/data/tools.yaml",
|
||||
"models": [
|
||||
{
|
||||
"name": "granite-embedding:278m",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 512
|
||||
},
|
||||
{
|
||||
"name": "paraphrase-multilingual:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 128
|
||||
},
|
||||
{
|
||||
"name": "bge-large:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 512
|
||||
},
|
||||
{
|
||||
"name": "snowflake-arctic-embed2:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 8192
|
||||
},
|
||||
{
|
||||
"name": "snowflake-arctic-embed:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 512
|
||||
},
|
||||
{
|
||||
"name": "all-minilm:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 256
|
||||
},
|
||||
{
|
||||
"name": "bge-m3:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 8192
|
||||
},
|
||||
{
|
||||
"name": "mxbai-embed-large:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 512
|
||||
},
|
||||
{
|
||||
"name": "nomic-embed-text:latest",
|
||||
"type": "ollama",
|
||||
"endpoint": "http://192.168.178.100:11434/api/embeddings",
|
||||
"rateLimit": false,
|
||||
"contextSize": 2048
|
||||
},
|
||||
{
|
||||
"name": "mistral-embed",
|
||||
"type": "mistral",
|
||||
"endpoint": "https://api.mistral.ai/v1/embeddings",
|
||||
"apiKey": "${AI_EMBEDDINGS_API_KEY}",
|
||||
"rateLimit": true,
|
||||
"rateLimitDelayMs": 2000,
|
||||
"contextSize": 8192
|
||||
}
|
||||
],
|
||||
"testSettings": {
|
||||
"maxToolsPerCategory": 6,
|
||||
"maxNegativeExamples": 4,
|
||||
"contextSizeTests": true,
|
||||
"performanceIterations": 3
|
||||
}
|
||||
}
|
||||
897
embeddings-comparison.js
Normal file
897
embeddings-comparison.js
Normal file
@@ -0,0 +1,897 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// efficient-embedding-comparison.js
|
||||
// Proper embedding model evaluation with batch processing and vector search
|
||||
// Run with: node efficient-embedding-comparison.js --config=config.json
|
||||
|
||||
import fs from 'fs/promises';
|
||||
import yaml from 'js-yaml';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
|
||||
class EmbeddingCache {
|
||||
constructor(cacheDir = './embedding-cache') {
|
||||
this.cacheDir = cacheDir;
|
||||
}
|
||||
|
||||
async ensureCacheDir() {
|
||||
try {
|
||||
await fs.access(this.cacheDir);
|
||||
} catch {
|
||||
await fs.mkdir(this.cacheDir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
getCacheKey(model, text) {
|
||||
const content = `${model.name}:${text}`;
|
||||
return crypto.createHash('md5').update(content).digest('hex');
|
||||
}
|
||||
|
||||
async getCachedEmbedding(model, text) {
|
||||
await this.ensureCacheDir();
|
||||
const key = this.getCacheKey(model, text);
|
||||
const cachePath = path.join(this.cacheDir, `${key}.json`);
|
||||
|
||||
try {
|
||||
const data = await fs.readFile(cachePath, 'utf8');
|
||||
return JSON.parse(data);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async setCachedEmbedding(model, text, embedding) {
|
||||
await this.ensureCacheDir();
|
||||
const key = this.getCacheKey(model, text);
|
||||
const cachePath = path.join(this.cacheDir, `${key}.json`);
|
||||
|
||||
await fs.writeFile(cachePath, JSON.stringify(embedding));
|
||||
}
|
||||
|
||||
async getCacheStats(model) {
|
||||
await this.ensureCacheDir();
|
||||
const files = await fs.readdir(this.cacheDir);
|
||||
const modelFiles = files.filter(f => f.includes(model.name.replace(/[^a-zA-Z0-9]/g, '_')));
|
||||
return { cached: modelFiles.length, total: files.length };
|
||||
}
|
||||
}
|
||||
|
||||
class SearchEvaluator {
|
||||
constructor() {
|
||||
this.cache = new EmbeddingCache();
|
||||
}
|
||||
|
||||
async rateLimitedDelay(model) {
|
||||
if (model.rateLimit && model.rateLimitDelayMs) {
|
||||
await new Promise(resolve => setTimeout(resolve, model.rateLimitDelayMs));
|
||||
}
|
||||
}
|
||||
|
||||
async getEmbedding(text, model) {
|
||||
// Check cache first
|
||||
const cached = await this.cache.getCachedEmbedding(model, text);
|
||||
if (cached) return cached;
|
||||
|
||||
const headers = { 'Content-Type': 'application/json' };
|
||||
let body, endpoint;
|
||||
|
||||
if (model.type === 'mistral') {
|
||||
if (model.apiKey) {
|
||||
headers['Authorization'] = `Bearer ${model.apiKey.replace('${AI_EMBEDDINGS_API_KEY}', process.env.AI_EMBEDDINGS_API_KEY || '')}`;
|
||||
}
|
||||
body = { model: model.name, input: [text] };
|
||||
endpoint = model.endpoint;
|
||||
} else {
|
||||
body = { model: model.name, prompt: text };
|
||||
endpoint = model.endpoint;
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(endpoint, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(body)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
if (response.status === 429 && model.rateLimit) {
|
||||
console.log(` ⚠️ Rate limited, waiting...`);
|
||||
await new Promise(resolve => setTimeout(resolve, 10000));
|
||||
return this.getEmbedding(text, model);
|
||||
}
|
||||
throw new Error(`API error ${response.status}: ${await response.text()}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const embedding = model.type === 'mistral' ? data.data[0].embedding : data.embedding;
|
||||
|
||||
// Cache the result
|
||||
await this.cache.setCachedEmbedding(model, text, embedding);
|
||||
return embedding;
|
||||
|
||||
} catch (error) {
|
||||
console.error(`❌ Failed to get embedding: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
constructToolText(item, maxLength = null) {
|
||||
if (typeof item === 'string') {
|
||||
// Even for string inputs, don't truncate to match real app behavior
|
||||
return item.toLowerCase();
|
||||
}
|
||||
|
||||
// EXACT match to embeddings.ts createContentString() - NO TRUNCATION
|
||||
const parts = [
|
||||
item.name,
|
||||
item.description || '',
|
||||
...(item.tags || []),
|
||||
...(item.domains || []),
|
||||
...(item.phases || [])
|
||||
];
|
||||
|
||||
const contentString = parts.filter(Boolean).join(' ').toLowerCase();
|
||||
|
||||
// CRITICAL: No truncation! Return full content like real app
|
||||
return contentString;
|
||||
}
|
||||
|
||||
calculateOptimalBatchSize(model) {
|
||||
// Factors that ACTUALLY matter for batching individual API calls:
|
||||
|
||||
// 1. Rate limiting aggressiveness
|
||||
if (model.rateLimit && model.rateLimitDelayMs > 2000) {
|
||||
return 5; // Conservative batching for heavily rate-limited APIs
|
||||
}
|
||||
|
||||
// 2. API latency expectations
|
||||
if (model.type === 'ollama') {
|
||||
return 15; // Local APIs are fast, can handle larger batches
|
||||
} else if (model.type === 'mistral') {
|
||||
return 10; // Remote APIs might be slower, medium batches
|
||||
}
|
||||
|
||||
// 3. Progress reporting frequency preference
|
||||
// For 185 tools:
|
||||
// - Batch size 10 = 19 progress updates
|
||||
// - Batch size 15 = 13 progress updates
|
||||
// - Batch size 20 = 10 progress updates
|
||||
|
||||
return 15; // Good balance for ~13 progress updates
|
||||
}
|
||||
|
||||
async createBatchEmbeddings(items, model) {
|
||||
const batchSize = this.calculateOptimalBatchSize(model);
|
||||
const contextSize = model.contextSize || 2000; // Only for display/info
|
||||
|
||||
console.log(` 📦 Creating embeddings for ${items.length} items`);
|
||||
console.log(` 📏 Model context: ${contextSize} chars (for reference - NOT truncating)`);
|
||||
console.log(` 📋 Batch size: ${batchSize} (for progress reporting)`);
|
||||
|
||||
const embeddings = new Map();
|
||||
let apiCalls = 0;
|
||||
let cacheHits = 0;
|
||||
const totalBatches = Math.ceil(items.length / batchSize);
|
||||
|
||||
for (let i = 0; i < items.length; i += batchSize) {
|
||||
const batch = items.slice(i, i + batchSize);
|
||||
const batchNum = Math.floor(i/batchSize) + 1;
|
||||
|
||||
console.log(` 📋 Processing batch ${batchNum}/${totalBatches} (${batch.length} tools)`);
|
||||
|
||||
for (const item of batch) {
|
||||
// Get FULL content (no truncation)
|
||||
const text = this.constructToolText(item);
|
||||
|
||||
// Show actual text length for first few tools (full length!)
|
||||
if (i < batchSize && batch.indexOf(item) < 3) {
|
||||
const truncatedDisplay = text.length > 100 ? text.slice(0, 100) + '...' : text;
|
||||
console.log(` 📝 ${item.name}: ${text.length} chars (full) - "${truncatedDisplay}"`);
|
||||
}
|
||||
|
||||
try {
|
||||
const embedding = await this.getEmbedding(text, model);
|
||||
embeddings.set(item.id || item.name || text, {
|
||||
text,
|
||||
embedding,
|
||||
metadata: item
|
||||
});
|
||||
|
||||
const cached = await this.cache.getCachedEmbedding(model, text);
|
||||
if (cached) cacheHits++; else apiCalls++;
|
||||
|
||||
await this.rateLimitedDelay(model);
|
||||
} catch (error) {
|
||||
console.warn(` ⚠️ Failed to embed: ${item.name || text.slice(0, 50)}...`);
|
||||
// Log the error for debugging
|
||||
if (text.length > 8000) {
|
||||
console.warn(` 📏 Text was ${text.length} chars - may exceed model limits`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Show content length statistics
|
||||
const lengths = Array.from(embeddings.values()).map(e => e.text.length);
|
||||
const avgLength = lengths.reduce((a, b) => a + b, 0) / lengths.length;
|
||||
const maxLength = Math.max(...lengths);
|
||||
const minLength = Math.min(...lengths);
|
||||
|
||||
console.log(` 📊 Content stats: avg ${avgLength.toFixed(0)} chars, range ${minLength}-${maxLength} chars`);
|
||||
console.log(` ✅ Created ${embeddings.size} embeddings (${apiCalls} API calls, ${cacheHits} cache hits)`);
|
||||
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
cosineSimilarity(a, b) {
|
||||
if (!a || !b || a.length === 0 || b.length === 0) return 0;
|
||||
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
const minLength = Math.min(a.length, b.length);
|
||||
|
||||
for (let i = 0; i < minLength; i++) {
|
||||
dotProduct += a[i] * b[i];
|
||||
normA += a[i] * a[i];
|
||||
normB += b[i] * b[i];
|
||||
}
|
||||
|
||||
if (normA === 0 || normB === 0) return 0;
|
||||
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
|
||||
}
|
||||
|
||||
searchSimilar(queryEmbedding, toolEmbeddings, topK = 10) {
|
||||
const similarities = [];
|
||||
|
||||
for (const [id, data] of toolEmbeddings) {
|
||||
const similarity = this.cosineSimilarity(queryEmbedding, data.embedding);
|
||||
similarities.push({
|
||||
id,
|
||||
similarity,
|
||||
metadata: data.metadata,
|
||||
text: data.text
|
||||
});
|
||||
}
|
||||
|
||||
return similarities
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, topK);
|
||||
}
|
||||
|
||||
calculateRetrievalMetrics(results, relevantIds, k = 10) {
|
||||
const topK = results.slice(0, k);
|
||||
const retrievedIds = new Set(topK.map(r => r.id));
|
||||
const relevantSet = new Set(relevantIds);
|
||||
|
||||
// Precision@K
|
||||
const relevantRetrieved = topK.filter(r => relevantSet.has(r.id)).length;
|
||||
const precisionAtK = topK.length > 0 ? relevantRetrieved / topK.length : 0;
|
||||
|
||||
// Recall@K
|
||||
const recallAtK = relevantIds.length > 0 ? relevantRetrieved / relevantIds.length : 0;
|
||||
|
||||
// F1@K
|
||||
const f1AtK = (precisionAtK + recallAtK) > 0 ?
|
||||
2 * (precisionAtK * recallAtK) / (precisionAtK + recallAtK) : 0;
|
||||
|
||||
// Mean Reciprocal Rank (MRR)
|
||||
let mrr = 0;
|
||||
for (let i = 0; i < topK.length; i++) {
|
||||
if (relevantSet.has(topK[i].id)) {
|
||||
mrr = 1 / (i + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// NDCG@K (simplified binary relevance)
|
||||
let dcg = 0;
|
||||
let idcg = 0;
|
||||
|
||||
for (let i = 0; i < k; i++) {
|
||||
const rank = i + 1;
|
||||
const discount = Math.log2(rank + 1);
|
||||
|
||||
// DCG
|
||||
if (i < topK.length && relevantSet.has(topK[i].id)) {
|
||||
dcg += 1 / discount;
|
||||
}
|
||||
|
||||
// IDCG (ideal ranking)
|
||||
if (i < relevantIds.length) {
|
||||
idcg += 1 / discount;
|
||||
}
|
||||
}
|
||||
|
||||
const ndcgAtK = idcg > 0 ? dcg / idcg : 0;
|
||||
|
||||
return {
|
||||
precisionAtK,
|
||||
recallAtK,
|
||||
f1AtK,
|
||||
mrr,
|
||||
ndcgAtK,
|
||||
relevantRetrieved,
|
||||
totalRelevant: relevantIds.length
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
class EfficientEmbeddingComparison {
|
||||
constructor(configPath = './embedding-test-config.json') {
|
||||
this.configPath = configPath;
|
||||
this.config = null;
|
||||
this.tools = [];
|
||||
this.evaluator = new SearchEvaluator();
|
||||
|
||||
// Test queries tailored to the actual tools.yaml content
|
||||
this.testQueries = [
|
||||
{
|
||||
query: "memory forensics RAM analysis",
|
||||
keywords: ["memory", "forensics", "volatility", "ram", "dump", "analysis"],
|
||||
category: "memory_analysis"
|
||||
},
|
||||
{
|
||||
query: "network packet capture traffic analysis",
|
||||
keywords: ["network", "packet", "pcap", "wireshark", "traffic", "capture"],
|
||||
category: "network_analysis"
|
||||
},
|
||||
{
|
||||
query: "malware reverse engineering binary analysis",
|
||||
keywords: ["malware", "reverse", "engineering", "ghidra", "binary", "disassemble"],
|
||||
category: "malware_analysis"
|
||||
},
|
||||
{
|
||||
query: "digital forensics disk imaging",
|
||||
keywords: ["forensics", "disk", "imaging", "autopsy", "investigation", "evidence"],
|
||||
category: "disk_forensics"
|
||||
},
|
||||
{
|
||||
query: "incident response threat hunting",
|
||||
keywords: ["incident", "response", "threat", "hunting", "investigation", "compromise"],
|
||||
category: "incident_response"
|
||||
},
|
||||
{
|
||||
query: "mobile device smartphone forensics",
|
||||
keywords: ["mobile", "smartphone", "android", "ios", "device", "cellebrite"],
|
||||
category: "mobile_forensics"
|
||||
},
|
||||
{
|
||||
query: "timeline analysis event correlation",
|
||||
keywords: ["timeline", "analysis", "correlation", "events", "plaso", "timesketch"],
|
||||
category: "timeline_analysis"
|
||||
},
|
||||
{
|
||||
query: "registry analysis windows artifacts",
|
||||
keywords: ["registry", "windows", "artifacts", "regripper", "hives", "keys"],
|
||||
category: "registry_analysis"
|
||||
},
|
||||
{
|
||||
query: "cloud forensics container analysis",
|
||||
keywords: ["cloud", "container", "docker", "virtualization", "aws", "azure"],
|
||||
category: "cloud_forensics"
|
||||
},
|
||||
{
|
||||
query: "blockchain cryptocurrency investigation",
|
||||
keywords: ["blockchain", "cryptocurrency", "bitcoin", "chainalysis", "transaction"],
|
||||
category: "blockchain_analysis"
|
||||
}
|
||||
];
|
||||
|
||||
console.log('[INIT] Efficient embedding comparison initialized');
|
||||
}
|
||||
|
||||
async loadConfig() {
|
||||
try {
|
||||
const configData = await fs.readFile(this.configPath, 'utf8');
|
||||
this.config = JSON.parse(configData);
|
||||
console.log(`[CONFIG] Loaded ${this.config.models.length} models`);
|
||||
} catch (error) {
|
||||
console.error('[CONFIG] Failed to load configuration:', error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async loadTools() {
|
||||
try {
|
||||
const yamlContent = await fs.readFile(this.config.toolsYamlPath, 'utf8');
|
||||
const data = yaml.load(yamlContent);
|
||||
|
||||
// Extract tools (flexible - handle different YAML structures)
|
||||
this.tools = data.tools || data.entries || data.applications || data;
|
||||
if (!Array.isArray(this.tools)) {
|
||||
this.tools = Object.values(this.tools);
|
||||
}
|
||||
|
||||
// Filter out concepts and ensure required fields
|
||||
this.tools = this.tools.filter(tool =>
|
||||
tool &&
|
||||
tool.type !== 'concept' &&
|
||||
(tool.name || tool.title) &&
|
||||
(tool.description || tool.summary)
|
||||
);
|
||||
|
||||
// Normalize tool structure
|
||||
this.tools = this.tools.map((tool, index) => ({
|
||||
id: tool.id || tool.name || tool.title || `tool_${index}`,
|
||||
name: tool.name || tool.title,
|
||||
description: tool.description || tool.summary || '',
|
||||
tags: tool.tags || [],
|
||||
domains: tool.domains || tool.categories || [],
|
||||
phases: tool.phases || [],
|
||||
platforms: tool.platforms || [],
|
||||
type: tool.type || 'tool',
|
||||
skillLevel: tool.skillLevel,
|
||||
license: tool.license
|
||||
}));
|
||||
|
||||
console.log(`[DATA] Loaded ${this.tools.length} tools from ${this.config.toolsYamlPath}`);
|
||||
|
||||
// Show some statistics
|
||||
const domainCounts = {};
|
||||
const tagCounts = {};
|
||||
|
||||
this.tools.forEach(tool => {
|
||||
(tool.domains || []).forEach(domain => {
|
||||
domainCounts[domain] = (domainCounts[domain] || 0) + 1;
|
||||
});
|
||||
(tool.tags || []).forEach(tag => {
|
||||
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
|
||||
});
|
||||
});
|
||||
|
||||
const topDomains = Object.entries(domainCounts)
|
||||
.sort(([,a], [,b]) => b - a)
|
||||
.slice(0, 5)
|
||||
.map(([domain, count]) => `${domain}(${count})`)
|
||||
.join(', ');
|
||||
|
||||
console.log(`[DATA] Top domains: ${topDomains}`);
|
||||
console.log(`[DATA] Sample tools: ${this.tools.slice(0, 3).map(t => t.name).join(', ')}`);
|
||||
|
||||
if (this.tools.length === 0) {
|
||||
throw new Error('No valid tools found in YAML file');
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error('[DATA] Failed to load tools:', error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
findRelevantTools(query) {
|
||||
const queryLower = query.query.toLowerCase();
|
||||
const keywords = query.keywords.map(k => k.toLowerCase());
|
||||
|
||||
const relevantTools = this.tools.filter(tool => {
|
||||
// Build searchable text from all tool metadata
|
||||
const searchableFields = [
|
||||
tool.name || '',
|
||||
tool.description || '',
|
||||
(tool.tags || []).join(' '),
|
||||
(tool.domains || []).join(' '),
|
||||
(tool.phases || []).join(' '),
|
||||
(tool.platforms || []).join(' ')
|
||||
];
|
||||
|
||||
const toolText = searchableFields.join(' ').toLowerCase();
|
||||
|
||||
// Check for keyword matches
|
||||
const hasKeywordMatch = keywords.some(keyword => toolText.includes(keyword));
|
||||
|
||||
// Check for query word matches (words longer than 3 chars)
|
||||
const queryWords = queryLower.split(' ').filter(word => word.length > 3);
|
||||
const hasQueryWordMatch = queryWords.some(word => toolText.includes(word));
|
||||
|
||||
// Check for domain-specific matches
|
||||
const isDomainRelevant = query.category && tool.domains &&
|
||||
tool.domains.some(domain => domain.includes(query.category.replace('_', '-')));
|
||||
|
||||
return hasKeywordMatch || hasQueryWordMatch || isDomainRelevant;
|
||||
});
|
||||
|
||||
console.log(` 🎯 Found ${relevantTools.length} relevant tools for "${query.query}"`);
|
||||
|
||||
// Log some examples for debugging
|
||||
if (relevantTools.length > 0) {
|
||||
console.log(` 📋 Examples: ${relevantTools.slice(0, 3).map(t => t.name).join(', ')}`);
|
||||
}
|
||||
|
||||
return relevantTools.map(tool => tool.id || tool.name);
|
||||
}
|
||||
|
||||
async testSearchPerformance(model) {
|
||||
console.log(` 🔍 Testing search performance...`);
|
||||
|
||||
// Create embeddings for all tools
|
||||
const toolEmbeddings = await this.evaluator.createBatchEmbeddings(this.tools, model);
|
||||
|
||||
const results = [];
|
||||
let totalApiCalls = 0;
|
||||
|
||||
for (const testQuery of this.testQueries) {
|
||||
console.log(` 📋 Query: "${testQuery.query}"`);
|
||||
|
||||
// Get query embedding
|
||||
const queryEmbedding = await this.evaluator.getEmbedding(testQuery.query, model);
|
||||
totalApiCalls++;
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
|
||||
// Find relevant tools for this query
|
||||
const relevantIds = this.findRelevantTools(testQuery);
|
||||
console.log(` 📊 Found ${relevantIds.length} relevant tools`);
|
||||
|
||||
if (relevantIds.length === 0) {
|
||||
console.log(` ⚠️ No relevant tools found, skipping metrics calculation`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Perform search
|
||||
const searchResults = this.evaluator.searchSimilar(queryEmbedding, toolEmbeddings, 20);
|
||||
|
||||
// Calculate metrics for different k values
|
||||
const metrics = {};
|
||||
for (const k of [1, 3, 5, 10]) {
|
||||
metrics[`k${k}`] = this.evaluator.calculateRetrievalMetrics(searchResults, relevantIds, k);
|
||||
}
|
||||
|
||||
results.push({
|
||||
query: testQuery.query,
|
||||
category: testQuery.category,
|
||||
relevantCount: relevantIds.length,
|
||||
searchResults: searchResults.slice(0, 5), // Top 5 for display
|
||||
metrics
|
||||
});
|
||||
|
||||
// Display results
|
||||
console.log(` 🎯 Top results:`);
|
||||
searchResults.slice(0, 3).forEach((result, i) => {
|
||||
const isRelevant = relevantIds.includes(result.id) ? '✓' : '✗';
|
||||
console.log(` ${i+1}. ${isRelevant} ${result.metadata.name} (${(result.similarity*100).toFixed(1)}%)`);
|
||||
});
|
||||
|
||||
console.log(` 📈 P@5: ${(metrics.k5.precisionAtK*100).toFixed(1)}% | R@5: ${(metrics.k5.recallAtK*100).toFixed(1)}% | NDCG@5: ${(metrics.k5.ndcgAtK*100).toFixed(1)}%`);
|
||||
}
|
||||
|
||||
return { results, totalApiCalls };
|
||||
}
|
||||
|
||||
async testSemanticUnderstanding(model) {
|
||||
console.log(` 🧠 Testing semantic understanding...`);
|
||||
|
||||
const semanticTests = [
|
||||
{
|
||||
primary: "memory forensics",
|
||||
synonyms: ["RAM analysis", "volatile memory examination", "memory dump investigation"],
|
||||
unrelated: ["file compression", "web browser", "text editor"]
|
||||
},
|
||||
{
|
||||
primary: "network analysis",
|
||||
synonyms: ["packet inspection", "traffic monitoring", "protocol analysis"],
|
||||
unrelated: ["image editing", "music player", "calculator"]
|
||||
},
|
||||
{
|
||||
primary: "malware detection",
|
||||
synonyms: ["virus scanning", "threat identification", "malicious code analysis"],
|
||||
unrelated: ["video converter", "password manager", "calendar app"]
|
||||
}
|
||||
];
|
||||
|
||||
let totalCorrect = 0;
|
||||
let totalTests = 0;
|
||||
let apiCalls = 0;
|
||||
|
||||
for (const test of semanticTests) {
|
||||
console.log(` 🔤 Testing: "${test.primary}"`);
|
||||
|
||||
const primaryEmbedding = await this.evaluator.getEmbedding(test.primary, model);
|
||||
apiCalls++;
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
|
||||
// Test synonyms (should be similar)
|
||||
for (const synonym of test.synonyms) {
|
||||
const synonymEmbedding = await this.evaluator.getEmbedding(synonym, model);
|
||||
apiCalls++;
|
||||
|
||||
const synonymSim = this.evaluator.cosineSimilarity(primaryEmbedding, synonymEmbedding);
|
||||
console.log(` ✓ "${synonym}": ${(synonymSim*100).toFixed(1)}%`);
|
||||
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
}
|
||||
|
||||
// Test unrelated terms (should be dissimilar)
|
||||
for (const unrelated of test.unrelated) {
|
||||
const unrelatedEmbedding = await this.evaluator.getEmbedding(unrelated, model);
|
||||
apiCalls++;
|
||||
|
||||
const unrelatedSim = this.evaluator.cosineSimilarity(primaryEmbedding, unrelatedEmbedding);
|
||||
console.log(` ✗ "${unrelated}": ${(unrelatedSim*100).toFixed(1)}%`);
|
||||
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
}
|
||||
|
||||
// Calculate semantic coherence
|
||||
const avgSynonymSim = await this.calculateAvgSimilarity(primaryEmbedding, test.synonyms, model);
|
||||
const avgUnrelatedSim = await this.calculateAvgSimilarity(primaryEmbedding, test.unrelated, model);
|
||||
|
||||
const isCorrect = avgSynonymSim > avgUnrelatedSim;
|
||||
if (isCorrect) totalCorrect++;
|
||||
totalTests++;
|
||||
|
||||
console.log(` 📊 Synonyms: ${(avgSynonymSim*100).toFixed(1)}% | Unrelated: ${(avgUnrelatedSim*100).toFixed(1)}% ${isCorrect ? '✓' : '✗'}`);
|
||||
}
|
||||
|
||||
return {
|
||||
accuracy: totalCorrect / totalTests,
|
||||
correctTests: totalCorrect,
|
||||
totalTests,
|
||||
apiCalls
|
||||
};
|
||||
}
|
||||
|
||||
async calculateAvgSimilarity(baseEmbedding, terms, model) {
|
||||
let totalSim = 0;
|
||||
|
||||
for (const term of terms) {
|
||||
const embedding = await this.evaluator.getEmbedding(term, model);
|
||||
const sim = this.evaluator.cosineSimilarity(baseEmbedding, embedding);
|
||||
totalSim += sim;
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
}
|
||||
|
||||
return totalSim / terms.length;
|
||||
}
|
||||
|
||||
async benchmarkPerformance(model) {
|
||||
console.log(` ⚡ Benchmarking performance...`);
|
||||
|
||||
const testTexts = this.tools.slice(0, 10).map(tool => `${tool.name} ${tool.description}`.slice(0, 500));
|
||||
const times = [];
|
||||
let apiCalls = 0;
|
||||
|
||||
console.log(` 🏃 Processing ${testTexts.length} texts...`);
|
||||
|
||||
for (const text of testTexts) {
|
||||
const start = Date.now();
|
||||
await this.evaluator.getEmbedding(text, model);
|
||||
const time = Date.now() - start;
|
||||
times.push(time);
|
||||
apiCalls++;
|
||||
|
||||
await this.evaluator.rateLimitedDelay(model);
|
||||
}
|
||||
|
||||
const avgTime = times.reduce((a, b) => a + b, 0) / times.length;
|
||||
const minTime = Math.min(...times);
|
||||
const maxTime = Math.max(...times);
|
||||
|
||||
console.log(` 📊 Avg: ${avgTime.toFixed(0)}ms | Min: ${minTime}ms | Max: ${maxTime}ms`);
|
||||
|
||||
return {
|
||||
avgLatency: avgTime,
|
||||
minLatency: minTime,
|
||||
maxLatency: maxTime,
|
||||
throughput: 1000 / avgTime, // requests per second
|
||||
apiCalls
|
||||
};
|
||||
}
|
||||
|
||||
async testModel(model) {
|
||||
console.log(`\n🧪 Testing ${model.name} (${model.type})...`);
|
||||
|
||||
const startTime = Date.now();
|
||||
let totalApiCalls = 0;
|
||||
|
||||
try {
|
||||
// 1. Search Performance Testing
|
||||
const searchResults = await this.testSearchPerformance(model);
|
||||
totalApiCalls += searchResults.totalApiCalls;
|
||||
|
||||
// 2. Semantic Understanding Testing
|
||||
const semanticResults = await this.testSemanticUnderstanding(model);
|
||||
totalApiCalls += semanticResults.apiCalls;
|
||||
|
||||
// 3. Performance Benchmarking
|
||||
const perfResults = await this.benchmarkPerformance(model);
|
||||
totalApiCalls += perfResults.apiCalls;
|
||||
|
||||
const totalTime = Date.now() - startTime;
|
||||
|
||||
console.log(` ✅ ${model.name} completed in ${(totalTime/1000).toFixed(1)}s (${totalApiCalls} API calls)`);
|
||||
|
||||
return {
|
||||
searchPerformance: searchResults.results,
|
||||
semanticUnderstanding: semanticResults,
|
||||
performance: perfResults,
|
||||
totalTime,
|
||||
totalApiCalls
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
console.error(` ❌ ${model.name} failed:`, error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
calculateOverallScore(results) {
|
||||
// Calculate average metrics across all queries
|
||||
const searchMetrics = results.searchPerformance.filter(r => r.metrics && Object.keys(r.metrics).length > 0);
|
||||
|
||||
if (searchMetrics.length === 0) {
|
||||
console.warn('⚠️ No search metrics available for scoring - may indicate relevance matching issues');
|
||||
return {
|
||||
overall: 0,
|
||||
components: {
|
||||
precision5: 0,
|
||||
recall5: 0,
|
||||
ndcg5: 0,
|
||||
mrr: 0,
|
||||
semanticAccuracy: results.semanticUnderstanding?.accuracy || 0,
|
||||
throughput: results.performance?.throughput || 0
|
||||
},
|
||||
warning: 'No search metrics available'
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`📊 Calculating score from ${searchMetrics.length} valid search results`);
|
||||
|
||||
const avgPrecision5 = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.precisionAtK || 0), 0) / searchMetrics.length;
|
||||
const avgRecall5 = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.recallAtK || 0), 0) / searchMetrics.length;
|
||||
const avgNDCG5 = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.ndcgAtK || 0), 0) / searchMetrics.length;
|
||||
const avgMRR = searchMetrics.reduce((sum, r) => sum + (r.metrics.k5?.mrr || 0), 0) / searchMetrics.length;
|
||||
|
||||
const semanticAccuracy = results.semanticUnderstanding?.accuracy || 0;
|
||||
const throughput = results.performance?.throughput || 0;
|
||||
|
||||
// Weighted overall score
|
||||
const weights = {
|
||||
precision: 0.25,
|
||||
recall: 0.25,
|
||||
ndcg: 0.20,
|
||||
semantic: 0.20,
|
||||
speed: 0.10
|
||||
};
|
||||
|
||||
const normalizedThroughput = Math.min(throughput / 10, 1); // Normalize to 0-1 (10 req/s = 1.0)
|
||||
|
||||
const overall = (
|
||||
avgPrecision5 * weights.precision +
|
||||
avgRecall5 * weights.recall +
|
||||
avgNDCG5 * weights.ndcg +
|
||||
semanticAccuracy * weights.semantic +
|
||||
normalizedThroughput * weights.speed
|
||||
);
|
||||
|
||||
return {
|
||||
overall,
|
||||
components: {
|
||||
precision5: avgPrecision5,
|
||||
recall5: avgRecall5,
|
||||
ndcg5: avgNDCG5,
|
||||
mrr: avgMRR,
|
||||
semanticAccuracy,
|
||||
throughput
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
printResults(modelResults) {
|
||||
console.log(`\n${'='.repeat(80)}`);
|
||||
console.log("🏆 EFFICIENT EMBEDDING MODEL COMPARISON RESULTS");
|
||||
console.log(`${'='.repeat(80)}`);
|
||||
|
||||
const scores = modelResults.map(mr => ({
|
||||
model: mr.model,
|
||||
score: this.calculateOverallScore(mr.results),
|
||||
results: mr.results
|
||||
})).sort((a, b) => b.score.overall - a.score.overall);
|
||||
|
||||
console.log(`\n🥇 OVERALL RANKINGS:`);
|
||||
scores.forEach((score, index) => {
|
||||
console.log(` ${index + 1}. ${score.model.name}: ${(score.score.overall * 100).toFixed(1)}% overall`);
|
||||
});
|
||||
|
||||
console.log(`\n📊 DETAILED METRICS:`);
|
||||
|
||||
console.log(`\n 🎯 Search Performance (Precision@5):`);
|
||||
scores.forEach(score => {
|
||||
console.log(` ${score.model.name}: ${(score.score.components.precision5 * 100).toFixed(1)}%`);
|
||||
});
|
||||
|
||||
console.log(`\n 🔍 Search Performance (Recall@5):`);
|
||||
scores.forEach(score => {
|
||||
console.log(` ${score.model.name}: ${(score.score.components.recall5 * 100).toFixed(1)}%`);
|
||||
});
|
||||
|
||||
console.log(`\n 📈 Search Quality (NDCG@5):`);
|
||||
scores.forEach(score => {
|
||||
console.log(` ${score.model.name}: ${(score.score.components.ndcg5 * 100).toFixed(1)}%`);
|
||||
});
|
||||
|
||||
console.log(`\n 🧠 Semantic Understanding:`);
|
||||
scores.forEach(score => {
|
||||
console.log(` ${score.model.name}: ${(score.score.components.semanticAccuracy * 100).toFixed(1)}%`);
|
||||
});
|
||||
|
||||
console.log(`\n ⚡ Performance (req/s):`);
|
||||
scores.forEach(score => {
|
||||
console.log(` ${score.model.name}: ${score.score.components.throughput.toFixed(1)} req/s`);
|
||||
});
|
||||
|
||||
// Winner analysis
|
||||
const winner = scores[0];
|
||||
console.log(`\n🏆 WINNER: ${winner.model.name}`);
|
||||
console.log(` Overall Score: ${(winner.score.overall * 100).toFixed(1)}%`);
|
||||
console.log(` Best for: ${this.getBestUseCase(winner.score.components)}`);
|
||||
|
||||
// Summary stats
|
||||
const totalQueries = modelResults[0]?.results.searchPerformance.length || 0;
|
||||
const totalTools = this.tools.length;
|
||||
|
||||
console.log(`\n📋 Test Summary:`);
|
||||
console.log(` Tools tested: ${totalTools}`);
|
||||
console.log(` Search queries: ${totalQueries}`);
|
||||
console.log(` Models compared: ${scores.length}`);
|
||||
console.log(` Total API calls: ${modelResults.reduce((sum, mr) => sum + mr.results.totalApiCalls, 0)}`);
|
||||
}
|
||||
|
||||
getBestUseCase(components) {
|
||||
const strengths = [];
|
||||
if (components.precision5 > 0.7) strengths.push("High precision");
|
||||
if (components.recall5 > 0.7) strengths.push("High recall");
|
||||
if (components.semanticAccuracy > 0.8) strengths.push("Semantic understanding");
|
||||
if (components.throughput > 5) strengths.push("High performance");
|
||||
|
||||
return strengths.length > 0 ? strengths.join(", ") : "General purpose";
|
||||
}
|
||||
|
||||
async run() {
|
||||
try {
|
||||
console.log("🚀 EFFICIENT EMBEDDING MODEL COMPARISON");
|
||||
console.log("=====================================");
|
||||
|
||||
await this.loadConfig();
|
||||
await this.loadTools();
|
||||
|
||||
console.log(`\n📋 Test Overview:`);
|
||||
console.log(` Models: ${this.config.models.length}`);
|
||||
console.log(` Tools: ${this.tools.length}`);
|
||||
console.log(` Search queries: ${this.testQueries.length}`);
|
||||
console.log(` Cache: ${this.evaluator.cache.cacheDir}`);
|
||||
|
||||
const modelResults = [];
|
||||
|
||||
for (const model of this.config.models) {
|
||||
try {
|
||||
const results = await this.testModel(model);
|
||||
modelResults.push({ model, results });
|
||||
} catch (error) {
|
||||
console.error(`❌ Skipping ${model.name}: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (modelResults.length === 0) {
|
||||
throw new Error('No models completed testing successfully');
|
||||
}
|
||||
|
||||
this.printResults(modelResults);
|
||||
|
||||
} catch (error) {
|
||||
console.error('\n❌ Test failed:', error.message);
|
||||
console.log('\nDebugging steps:');
|
||||
console.log('1. Verify tools.yaml exists and contains valid tool data');
|
||||
console.log('2. Check model endpoints are accessible');
|
||||
console.log('3. For Ollama: ensure models are pulled and ollama serve is running');
|
||||
console.log('4. For Mistral: verify AI_EMBEDDINGS_API_KEY environment variable');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Execute
|
||||
const configArg = process.argv.find(arg => arg.startsWith('--config='));
|
||||
const configPath = configArg ? configArg.split('=')[1] : './embedding-test-config.json';
|
||||
|
||||
(async () => {
|
||||
const comparison = new EfficientEmbeddingComparison(configPath);
|
||||
await comparison.run();
|
||||
})().catch(console.error);
|
||||
333
find-duplicates.mjs
Normal file
333
find-duplicates.mjs
Normal file
@@ -0,0 +1,333 @@
|
||||
#!/usr/bin/env node
|
||||
// find-duplicate-functions.mjs
|
||||
// Usage:
|
||||
// node find-duplicate-functions.mjs [rootDir] [--mode exact|struct] [--min-lines N] [--json]
|
||||
// Example:
|
||||
// node find-duplicate-functions.mjs . --mode struct --min-lines 3
|
||||
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import * as url from "url";
|
||||
import ts from "typescript";
|
||||
|
||||
const __dirname = path.dirname(url.fileURLToPath(import.meta.url));
|
||||
|
||||
/** -------- CLI OPTIONS -------- */
|
||||
const args = process.argv.slice(2);
|
||||
let rootDir = ".";
|
||||
let mode = "struct"; // "exact" | "struct"
|
||||
let minLines = 3;
|
||||
let outputJson = false;
|
||||
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
const a = args[i];
|
||||
if (!a.startsWith("--") && rootDir === ".") {
|
||||
rootDir = a;
|
||||
} else if (a === "--mode") {
|
||||
mode = (args[++i] || "struct").toLowerCase();
|
||||
if (!["exact", "struct"].includes(mode)) {
|
||||
console.error("Invalid --mode. Use 'exact' or 'struct'.");
|
||||
process.exit(1);
|
||||
}
|
||||
} else if (a === "--min-lines") {
|
||||
minLines = parseInt(args[++i] || "3", 10);
|
||||
} else if (a === "--json") {
|
||||
outputJson = true;
|
||||
}
|
||||
}
|
||||
|
||||
/** -------- FILE DISCOVERY -------- */
|
||||
const DEFAULT_IGNORES = new Set([
|
||||
"node_modules",
|
||||
".git",
|
||||
".next",
|
||||
".vercel",
|
||||
"dist",
|
||||
"build",
|
||||
".astro", // Astro's generated cache dir
|
||||
]);
|
||||
|
||||
const VALID_EXTS = new Set([".ts", ".tsx", ".astro", ".mts", ".cts"]);
|
||||
|
||||
function walk(dir) {
|
||||
/** @type {string[]} */
|
||||
const out = [];
|
||||
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
||||
for (const e of entries) {
|
||||
const p = path.join(dir, e.name);
|
||||
if (e.isDirectory()) {
|
||||
if (DEFAULT_IGNORES.has(e.name)) continue;
|
||||
out.push(...walk(p));
|
||||
} else if (e.isFile() && VALID_EXTS.has(path.extname(e.name))) {
|
||||
out.push(p);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/** -------- ASTRO CODE EXTRACTION --------
|
||||
* Extract TS/JS code from:
|
||||
* - frontmatter: --- ... ---
|
||||
* - <script ...> ... </script>
|
||||
*/
|
||||
function extractCodeFromAstro(source) {
|
||||
/** @type {{code:string, offset:number}[]} */
|
||||
const blocks = [];
|
||||
|
||||
// Frontmatter (must be at top in Astro)
|
||||
// Match the FIRST pair of --- ... ---
|
||||
const fm = source.startsWith("---")
|
||||
? (() => {
|
||||
const end = source.indexOf("\n---", 3);
|
||||
if (end !== -1) {
|
||||
const front = source.slice(3, end + 1); // include trailing \n
|
||||
return { start: 0, end: end + 4, code: front };
|
||||
}
|
||||
return null;
|
||||
})()
|
||||
: null;
|
||||
if (fm) {
|
||||
// offset for line numbers is after the first '---\n'
|
||||
blocks.push({ code: fm.code, offset: 4 }); // rough; we’ll fix line numbers via positions later
|
||||
}
|
||||
|
||||
// <script ...> ... </script>
|
||||
const scriptRe = /<script\b[^>]*>([\s\S]*?)<\/script>/gi;
|
||||
let m;
|
||||
while ((m = scriptRe.exec(source))) {
|
||||
const code = m[1] || "";
|
||||
blocks.push({ code, offset: indexToLine(source, m.index) });
|
||||
}
|
||||
|
||||
return blocks;
|
||||
}
|
||||
|
||||
/** -------- UTIL: index -> 1-based line -------- */
|
||||
function indexToLine(text, idx) {
|
||||
let line = 1;
|
||||
for (let i = 0; i < idx && i < text.length; i++) {
|
||||
if (text.charCodeAt(i) === 10) line++;
|
||||
}
|
||||
return line;
|
||||
}
|
||||
|
||||
/** -------- AST HELPERS -------- */
|
||||
function createSourceFile(virtualPath, code) {
|
||||
return ts.createSourceFile(
|
||||
virtualPath,
|
||||
code,
|
||||
ts.ScriptTarget.Latest,
|
||||
/*setParentNodes*/ true,
|
||||
virtualPath.endsWith(".tsx") ? ts.ScriptKind.TSX : ts.ScriptKind.TS
|
||||
);
|
||||
}
|
||||
|
||||
// Normalize AST to a structural signature string
|
||||
function structuralSignature(node) {
|
||||
/** @type {string[]} */
|
||||
const parts = [];
|
||||
const visit = (n) => {
|
||||
// Skip trivia: comments/whitespace are already not in AST
|
||||
const kindName = ts.SyntaxKind[n.kind] || `K${n.kind}`;
|
||||
switch (n.kind) {
|
||||
case ts.SyntaxKind.Identifier:
|
||||
parts.push("Id");
|
||||
return;
|
||||
case ts.SyntaxKind.PrivateIdentifier:
|
||||
parts.push("PrivId");
|
||||
return;
|
||||
case ts.SyntaxKind.StringLiteral:
|
||||
case ts.SyntaxKind.NoSubstitutionTemplateLiteral:
|
||||
case ts.SyntaxKind.TemplateHead:
|
||||
case ts.SyntaxKind.TemplateMiddle:
|
||||
case ts.SyntaxKind.TemplateTail:
|
||||
parts.push("Str");
|
||||
return;
|
||||
case ts.SyntaxKind.NumericLiteral:
|
||||
parts.push("Num");
|
||||
return;
|
||||
case ts.SyntaxKind.TrueKeyword:
|
||||
case ts.SyntaxKind.FalseKeyword:
|
||||
parts.push("Bool");
|
||||
return;
|
||||
case ts.SyntaxKind.NullKeyword:
|
||||
case ts.SyntaxKind.UndefinedKeyword:
|
||||
parts.push("Nil");
|
||||
return;
|
||||
case ts.SyntaxKind.PropertyAssignment:
|
||||
case ts.SyntaxKind.ShorthandPropertyAssignment:
|
||||
case ts.SyntaxKind.MethodDeclaration:
|
||||
case ts.SyntaxKind.MethodSignature:
|
||||
parts.push("Prop");
|
||||
break;
|
||||
default:
|
||||
parts.push(kindName);
|
||||
}
|
||||
n.forEachChild(visit);
|
||||
};
|
||||
visit(node);
|
||||
return parts.join("|");
|
||||
}
|
||||
|
||||
function getFunctionInfo(sf, filePath) {
|
||||
/** @type {Array<{
|
||||
name: string,
|
||||
bodyText: string,
|
||||
structKey: string,
|
||||
start: number,
|
||||
end: number,
|
||||
startLine: number,
|
||||
endLine: number
|
||||
}>} */
|
||||
const out = [];
|
||||
|
||||
const addFunc = (nameNode, bodyNode) => {
|
||||
if (!bodyNode) return;
|
||||
const bodyText = bodyNode.getText(sf).trim();
|
||||
const start = bodyNode.getStart(sf);
|
||||
const end = bodyNode.getEnd();
|
||||
const { line: startLine } = sf.getLineAndCharacterOfPosition(start);
|
||||
const { line: endLine } = sf.getLineAndCharacterOfPosition(end);
|
||||
const name =
|
||||
nameNode && ts.isIdentifier(nameNode) ? nameNode.escapedText.toString() : "(anonymous)";
|
||||
|
||||
// min-lines filter
|
||||
const lines = bodyText.split(/\r?\n/).filter(Boolean);
|
||||
if (lines.length < minLines) return;
|
||||
|
||||
// structural signature from the body
|
||||
const structKey = structuralSignature(bodyNode);
|
||||
|
||||
out.push({
|
||||
name,
|
||||
bodyText,
|
||||
structKey,
|
||||
start,
|
||||
end,
|
||||
startLine: startLine + 1,
|
||||
endLine: endLine + 1,
|
||||
});
|
||||
};
|
||||
|
||||
const visit = (node) => {
|
||||
if (ts.isFunctionDeclaration(node) && node.body) {
|
||||
addFunc(node.name ?? null, node.body);
|
||||
} else if (
|
||||
ts.isFunctionExpression(node) ||
|
||||
ts.isArrowFunction(node)
|
||||
) {
|
||||
// find name if it’s assigned: const foo = () => {}
|
||||
let name = null;
|
||||
if (node.parent && ts.isVariableDeclaration(node.parent) && node.parent.name) {
|
||||
name = node.parent.name;
|
||||
} else if (
|
||||
node.parent &&
|
||||
ts.isPropertyAssignment(node.parent) &&
|
||||
ts.isIdentifier(node.parent.name)
|
||||
) {
|
||||
name = node.parent.name;
|
||||
} else if (node.name) {
|
||||
name = node.name;
|
||||
}
|
||||
if (node.body) addFunc(name, node.body);
|
||||
} else if (ts.isMethodDeclaration(node) && node.body) {
|
||||
addFunc(node.name, node.body);
|
||||
}
|
||||
node.forEachChild(visit);
|
||||
};
|
||||
|
||||
visit(sf);
|
||||
return out;
|
||||
}
|
||||
|
||||
/** -------- MAIN SCAN -------- */
|
||||
const files = walk(path.resolve(process.cwd(), rootDir));
|
||||
|
||||
/** Maps from hash -> occurrences */
|
||||
const groups = new Map();
|
||||
/** Helper for exact hash */
|
||||
import crypto from "crypto";
|
||||
const exactHash = (text) => crypto.createHash("sha1").update(text.replace(/\s+/g, " ").trim()).digest("hex");
|
||||
|
||||
for (const file of files) {
|
||||
try {
|
||||
const ext = path.extname(file).toLowerCase();
|
||||
const raw = fs.readFileSync(file, "utf8");
|
||||
|
||||
/** @type {Array<{virtualPath:string, code:string, lineOffset:number}>} */
|
||||
const codeUnits = [];
|
||||
|
||||
if (ext === ".astro") {
|
||||
const blocks = extractCodeFromAstro(raw);
|
||||
blocks.forEach((b, i) => {
|
||||
codeUnits.push({
|
||||
virtualPath: file + `#astro${i + 1}.ts`,
|
||||
code: b.code,
|
||||
lineOffset: b.offset || 1,
|
||||
});
|
||||
});
|
||||
} else {
|
||||
codeUnits.push({ virtualPath: file, code: raw, lineOffset: 1 });
|
||||
}
|
||||
|
||||
for (const { virtualPath, code, lineOffset } of codeUnits) {
|
||||
const sf = createSourceFile(virtualPath, code);
|
||||
const funcs = getFunctionInfo(sf, file);
|
||||
for (const f of funcs) {
|
||||
const key =
|
||||
mode === "exact" ? exactHash(f.bodyText) : crypto.createHash("sha1").update(f.structKey).digest("hex");
|
||||
const item = {
|
||||
file,
|
||||
where:
|
||||
ext === ".astro"
|
||||
? `${path.relative(process.cwd(), file)}:${f.startLine + lineOffset - 1}-${f.endLine + lineOffset - 1}`
|
||||
: `${path.relative(process.cwd(), file)}:${f.startLine}-${f.endLine}`,
|
||||
name: f.name,
|
||||
lines: f.endLine - f.startLine + 1,
|
||||
preview: f.bodyText.split(/\r?\n/).slice(0, 5).join("\n") + (f.endLine - f.startLine + 1 > 5 ? "\n..." : ""),
|
||||
};
|
||||
if (!groups.has(key)) groups.set(key, []);
|
||||
groups.get(key).push(item);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(`⚠️ Skipping ${file}: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/** -------- REPORT -------- */
|
||||
const dupes = [...groups.entries()]
|
||||
.map(([key, arr]) => ({ key, items: arr }))
|
||||
.filter((g) => g.items.length > 1)
|
||||
.sort((a, b) => b.items.length - a.items.length);
|
||||
|
||||
if (outputJson) {
|
||||
console.log(JSON.stringify({ mode, minLines, groups: dupes }, null, 2));
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
if (dupes.length === 0) {
|
||||
console.log(`✅ No duplicate functions found (mode=${mode}, min-lines=${minLines}).`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
console.log(`\nFound ${dupes.length} duplicate group(s) (mode=${mode}, min-lines=${minLines}):\n`);
|
||||
dupes.forEach((g, i) => {
|
||||
console.log(`== Group ${i + 1} (${g.items.length} matches) ==`);
|
||||
const example = g.items[0];
|
||||
console.log(` Sample (${example.lines} lines) from ${example.where}${example.name ? ` [${example.name}]` : ""}`);
|
||||
console.log(" ---");
|
||||
console.log(indent(example.preview, " "));
|
||||
console.log(" ---");
|
||||
g.items.forEach((it) => {
|
||||
console.log(` • ${it.where}${it.name ? ` [${it.name}]` : ""} (${it.lines} lines)`);
|
||||
});
|
||||
console.log();
|
||||
});
|
||||
|
||||
function indent(s, pre) {
|
||||
return s
|
||||
.split("\n")
|
||||
.map((l) => pre + l)
|
||||
.join("\n");
|
||||
}
|
||||
@@ -10,13 +10,14 @@
|
||||
"astro": "astro"
|
||||
},
|
||||
"dependencies": {
|
||||
"@astrojs/node": "^9.3.0",
|
||||
"astro": "^5.12.3",
|
||||
"@astrojs/node": "^9.4.3",
|
||||
"astro": "^5.13.7",
|
||||
"cookie": "^1.0.2",
|
||||
"dotenv": "^16.4.5",
|
||||
"jose": "^5.2.0",
|
||||
"dotenv": "^16.6.1",
|
||||
"jose": "^5.10.0",
|
||||
"js-yaml": "^4.1.0",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
"simple-boost": "^2.0.2",
|
||||
"zod": "^3.25.76"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
405
public/videos/README.md
Normal file
405
public/videos/README.md
Normal file
@@ -0,0 +1,405 @@
|
||||
# Video-Bereitstellung für ForensicPathways Knowledgebase
|
||||
|
||||
Videos müssen manuell in diesem Verzeichnis bereitgestellt werden, da sie aufgrund unterschiedlicher Lizenzierung nicht Bestandteil des Open-Source-Git-Repositorys sind.
|
||||
|
||||
## 🎥 Video-Quelle und Lizenzierung
|
||||
|
||||
**Video-Quelle:** https://cloud.cc24.dev/f/47971 (Interner Nextcloud-Share)
|
||||
**Kontakt bei Fragen:** mstoeck3@hs-mittweida.de
|
||||
|
||||
### Lizenzhinweise
|
||||
|
||||
- Videos können proprietäre Lizenzen haben
|
||||
- Nicht für öffentliche Redistribution geeignet
|
||||
- Nur für den internen Gebrauch in ForensicPathways
|
||||
- Urheberrechte beachten bei eigenen Video-Beiträgen
|
||||
|
||||
## 📁 Empfohlene Verzeichnisstruktur
|
||||
|
||||
```
|
||||
public/videos/
|
||||
├── tools/ # Tool-spezifische Tutorials
|
||||
│ ├── autopsy/
|
||||
│ │ ├── autopsy-installation.mp4
|
||||
│ │ ├── autopsy-basics.mp4
|
||||
│ │ └── autopsy-advanced-analysis.webm
|
||||
│ ├── volatility/
|
||||
│ │ ├── volatility-setup.mp4
|
||||
│ │ ├── volatility-pslist-demo.mp4
|
||||
│ │ └── volatility-malfind-tutorial.webm
|
||||
│ └── yara/
|
||||
│ ├── yara-rules-basics.mp4
|
||||
│ └── yara-advanced-hunting.mp4
|
||||
├── methods/ # Methodologie-Videos
|
||||
│ ├── timeline-analysis/
|
||||
│ │ ├── timeline-fundamentals.mp4
|
||||
│ │ └── timeline-correlation.webm
|
||||
│ ├── disk-imaging/
|
||||
│ │ ├── imaging-best-practices.mp4
|
||||
│ │ └── imaging-verification.mp4
|
||||
│ └── incident-response/
|
||||
│ ├── ir-methodology.mp4
|
||||
│ └── ir-documentation.webm
|
||||
├── concepts/ # Konzeptuelle Erklärungen
|
||||
│ ├── forensics-fundamentals/
|
||||
│ │ ├── hash-functions-explained.mp4
|
||||
│ │ ├── chain-of-custody.mp4
|
||||
│ │ └── evidence-handling.webm
|
||||
│ └── technical-concepts/
|
||||
│ ├── regex-patterns.mp4
|
||||
│ └── file-systems.webm
|
||||
└── shared/ # Übergreifende Inhalte
|
||||
├── nist-methodology.mp4
|
||||
├── legal-considerations.webm
|
||||
└── best-practices-overview.mp4
|
||||
```
|
||||
|
||||
## 🦊 Firefox-Kompatibilität (KRITISCH)
|
||||
|
||||
### **Wichtiger Hinweis**
|
||||
Videos **müssen** in Firefox-kompatiblen Formaten bereitgestellt werden, da das System automatische Firefox-Unterstützung implementiert. Nicht-kompatible Formate führen zu Fehlern!
|
||||
|
||||
### Unterstützte Formate
|
||||
|
||||
#### ✅ Empfohlene Formate (höchste Kompatibilität)
|
||||
|
||||
**MP4 (H.264/AVC + AAC):**
|
||||
```bash
|
||||
# Konvertierung mit ffmpeg
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libx264 \
|
||||
-c:a aac \
|
||||
-profile:v baseline \
|
||||
-level 3.0 \
|
||||
-movflags +faststart \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
**WebM (VP8/VP9 + Vorbis/Opus):**
|
||||
```bash
|
||||
# VP9 für beste Qualität
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libvpx-vp9 \
|
||||
-c:a libopus \
|
||||
-b:v 1M \
|
||||
-b:a 128k \
|
||||
output.webm
|
||||
|
||||
# VP8 für breitere Kompatibilität
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libvpx \
|
||||
-c:a libvorbis \
|
||||
-b:v 1M \
|
||||
-b:a 128k \
|
||||
output.webm
|
||||
```
|
||||
|
||||
#### ⚠️ Fallback-Format
|
||||
|
||||
**OGG Theora (für ältere Firefox-Versionen):**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libtheora \
|
||||
-c:a libvorbis \
|
||||
-b:v 1M \
|
||||
-b:a 128k \
|
||||
output.ogv
|
||||
```
|
||||
|
||||
### ❌ Nicht unterstützte Formate in Firefox
|
||||
|
||||
- **H.265/HEVC** (.mp4, .mov) - Wird nicht dekodiert
|
||||
- **AV1** (.mp4, .webm) - Eingeschränkte Unterstützung
|
||||
- **Proprietäre Codecs** (.wmv, .avi mit proprietären Codecs)
|
||||
- **Apple-spezifische Formate** (.mov mit ProRes, .m4v)
|
||||
|
||||
### Multi-Format-Bereitstellung
|
||||
|
||||
Für maximale Kompatibilität mehrere Formate bereitstellen:
|
||||
|
||||
```html
|
||||
<video title="Autopsy Installation Tutorial" controls>
|
||||
<source src="/videos/tools/autopsy/installation.mp4" type="video/mp4">
|
||||
<source src="/videos/tools/autopsy/installation.webm" type="video/webm">
|
||||
<source src="/videos/tools/autopsy/installation.ogv" type="video/ogg">
|
||||
<p>Ihr Browser unterstützt das Video-Element nicht.</p>
|
||||
</video>
|
||||
```
|
||||
|
||||
## 🔧 Video-Konvertierung und -Optimierung
|
||||
|
||||
### Qualitätsrichtlinien
|
||||
|
||||
#### Auflösung und Bitrate
|
||||
|
||||
**720p (empfohlen für Tutorials):**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-vf scale=1280:720 \
|
||||
-c:v libx264 \
|
||||
-b:v 2M \
|
||||
-c:a aac \
|
||||
-b:a 128k \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
**1080p (für detaillierte Demonstrationen):**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-vf scale=1920:1080 \
|
||||
-c:v libx264 \
|
||||
-b:v 4M \
|
||||
-c:a aac \
|
||||
-b:a 128k \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
**480p (mobile-optimiert):**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-vf scale=854:480 \
|
||||
-c:v libx264 \
|
||||
-b:v 1M \
|
||||
-c:a aac \
|
||||
-b:a 96k \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
### Optimierung für Web-Streaming
|
||||
|
||||
#### Fast Start für progressive Download
|
||||
```bash
|
||||
# Metadata an Dateianfang verschieben
|
||||
ffmpeg -i input.mp4 -c copy -movflags +faststart output.mp4
|
||||
```
|
||||
|
||||
#### Keyframe-Intervall optimieren
|
||||
```bash
|
||||
# Keyframes alle 2 Sekunden für bessere Suche
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libx264 \
|
||||
-g 60 \
|
||||
-keyint_min 60 \
|
||||
-sc_threshold 0 \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
### Batch-Konvertierung
|
||||
|
||||
**Alle Videos in einem Verzeichnis konvertieren:**
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# convert-all.sh
|
||||
for file in *.mov *.avi *.mkv; do
|
||||
if [ -f "$file" ]; then
|
||||
name=$(basename "$file" | cut -d. -f1)
|
||||
|
||||
# MP4 erstellen
|
||||
ffmpeg -i "$file" \
|
||||
-c:v libx264 \
|
||||
-c:a aac \
|
||||
-b:v 2M \
|
||||
-b:a 128k \
|
||||
-movflags +faststart \
|
||||
"${name}.mp4"
|
||||
|
||||
# WebM erstellen
|
||||
ffmpeg -i "$file" \
|
||||
-c:v libvpx-vp9 \
|
||||
-c:a libopus \
|
||||
-b:v 1.5M \
|
||||
-b:a 128k \
|
||||
"${name}.webm"
|
||||
fi
|
||||
done
|
||||
```
|
||||
|
||||
## 📊 Dateigröße und Performance
|
||||
|
||||
### Größenrichtlinien
|
||||
|
||||
**Streaming-optimiert:**
|
||||
- 720p: 5-15 MB/Minute
|
||||
- 1080p: 20-40 MB/Minute
|
||||
- 480p: 2-8 MB/Minute
|
||||
|
||||
**Maximale Dateigröße:**
|
||||
- Tutorial-Videos: < 100 MB
|
||||
- Kurze Demos: < 50 MB
|
||||
- Konzept-Erklärungen: < 30 MB
|
||||
|
||||
### Kompressionseinstellungen
|
||||
|
||||
**Ausgewogene Qualität/Größe:**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libx264 \
|
||||
-preset medium \
|
||||
-crf 23 \
|
||||
-c:a aac \
|
||||
-b:a 128k \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
**Hohe Kompression (kleinere Dateien):**
|
||||
```bash
|
||||
ffmpeg -i input.mov \
|
||||
-c:v libx264 \
|
||||
-preset slow \
|
||||
-crf 28 \
|
||||
-c:a aac \
|
||||
-b:a 96k \
|
||||
output.mp4
|
||||
```
|
||||
|
||||
## 🎬 Video-Thumbnail-Generierung
|
||||
|
||||
Automatische Thumbnail-Erstellung:
|
||||
|
||||
```bash
|
||||
# Thumbnail nach 10 Sekunden
|
||||
ffmpeg -i input.mp4 -ss 00:00:10 -vframes 1 -q:v 2 thumbnail.jpg
|
||||
|
||||
# Mehrere Thumbnails für Auswahl
|
||||
ffmpeg -i input.mp4 -vf fps=1/30 thumb_%03d.jpg
|
||||
```
|
||||
|
||||
Thumbnails speichern in:
|
||||
```
|
||||
public/images/video-thumbnails/
|
||||
├── autopsy-installation-thumb.jpg
|
||||
├── volatility-basics-thumb.jpg
|
||||
└── timeline-analysis-thumb.jpg
|
||||
```
|
||||
|
||||
## 🔍 Qualitätskontrolle
|
||||
|
||||
### Pre-Upload-Checkliste
|
||||
|
||||
**✅ Format-Kompatibilität:**
|
||||
- [ ] MP4 mit H.264/AVC Video-Codec
|
||||
- [ ] AAC Audio-Codec
|
||||
- [ ] Fast Start aktiviert (`movflags +faststart`)
|
||||
- [ ] Keyframe-Intervall ≤ 2 Sekunden
|
||||
|
||||
**✅ Firefox-Test:**
|
||||
- [ ] Video lädt in Firefox ohne Fehler
|
||||
- [ ] Audio synchron mit Video
|
||||
- [ ] Controls funktionieren
|
||||
- [ ] Seeking funktioniert flüssig
|
||||
|
||||
**✅ Technische Qualität:**
|
||||
- [ ] Auflösung angemessen (720p+ für GUI-Demos)
|
||||
- [ ] Audio klar und verständlich
|
||||
- [ ] Keine Kompressionsartefakte
|
||||
- [ ] Dateigröße < 100 MB
|
||||
|
||||
**✅ Inhaltliche Qualität:**
|
||||
- [ ] Beschreibender Dateiname
|
||||
- [ ] Angemessene Länge (< 10 Minuten für Tutorials)
|
||||
- [ ] Klare Demonstration der Funktionalität
|
||||
- [ ] Sichtbare UI-Elemente
|
||||
|
||||
### Automated Testing
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# video-check.sh - Basis-Validierung
|
||||
for video in public/videos/**/*.mp4; do
|
||||
echo "Checking: $video"
|
||||
|
||||
# Format prüfen
|
||||
format=$(ffprobe -v quiet -select_streams v:0 -show_entries stream=codec_name -of csv=p=0 "$video")
|
||||
if [ "$format" != "h264" ]; then
|
||||
echo "❌ Wrong codec: $format (should be h264)"
|
||||
fi
|
||||
|
||||
# Dateigröße prüfen
|
||||
size=$(stat -c%s "$video")
|
||||
if [ $size -gt 104857600 ]; then # 100MB
|
||||
echo "⚠️ Large file: $(($size / 1048576))MB"
|
||||
fi
|
||||
|
||||
echo "✅ $video validated"
|
||||
done
|
||||
```
|
||||
|
||||
## 🚨 Troubleshooting
|
||||
|
||||
### Häufige Firefox-Probleme
|
||||
|
||||
**Problem: Video lädt nicht**
|
||||
```
|
||||
Lösung:
|
||||
1. Codec überprüfen: ffprobe -v quiet -show_format -show_streams video.mp4
|
||||
2. Fallback-Format hinzufügen
|
||||
3. Fast Start aktivieren
|
||||
```
|
||||
|
||||
**Problem: Audio/Video out of sync**
|
||||
```
|
||||
Lösung:
|
||||
ffmpeg -i input.mp4 -c:v copy -c:a aac -avoid_negative_ts make_zero output.mp4
|
||||
```
|
||||
|
||||
**Problem: Seeking funktioniert nicht**
|
||||
```
|
||||
Lösung:
|
||||
ffmpeg -i input.mp4 -c copy -movflags +faststart output.mp4
|
||||
```
|
||||
|
||||
### Performance-Probleme
|
||||
|
||||
**Problem: Lange Ladezeiten**
|
||||
```
|
||||
Lösungsansätze:
|
||||
1. Bitrate reduzieren
|
||||
2. Auflösung verringern
|
||||
3. Keyframe-Intervall optimieren
|
||||
4. Progressive Download aktivieren
|
||||
```
|
||||
|
||||
**Problem: Hohe Bandbreiten-Nutzung**
|
||||
```
|
||||
Lösungsansätze:
|
||||
1. Adaptive Streaming implementieren
|
||||
2. Multiple Qualitätsstufen bereitstellen
|
||||
3. Preload="metadata" verwenden
|
||||
```
|
||||
|
||||
## 📋 Deployment-Checkliste
|
||||
|
||||
**Nach Video-Upload:**
|
||||
|
||||
1. **✅ Dateistruktur prüfen**
|
||||
```bash
|
||||
ls -la public/videos/tools/autopsy/
|
||||
```
|
||||
|
||||
2. **✅ Permissions setzen**
|
||||
```bash
|
||||
chmod 644 public/videos/**/*.mp4
|
||||
```
|
||||
|
||||
3. **✅ Artikel-Verlinkung testen**
|
||||
- Video-Tags in Markdown funktionieren
|
||||
- Responsive Container werden generiert
|
||||
- Thumbnails laden korrekt
|
||||
|
||||
4. **✅ Browser-Kompatibilität**
|
||||
- Firefox: Codec-Support prüfen
|
||||
- Chrome: Performance testen
|
||||
- Safari: Fallback-Formate testen
|
||||
- Mobile: Touch-Controls funktionieren
|
||||
|
||||
5. **✅ Build-System**
|
||||
```bash
|
||||
npm run build
|
||||
# Keine Video-bezogenen Fehler in Console
|
||||
```
|
||||
|
||||
Bei Problemen kontaktieren Sie mstoeck3@hs-mittweida.de mit:
|
||||
- Browser und Version
|
||||
- Video-Dateiname und -pfad
|
||||
- Fehlermeldungen aus Browser-Console
|
||||
- Screenshot des Problems
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
---
|
||||
// src/components/ContributionButton.astro - CLEANED: Removed duplicate auth script
|
||||
// src/components/ContributionButton.astro
|
||||
export interface Props {
|
||||
type: 'edit' | 'new' | 'write';
|
||||
toolName?: string;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
import { createToolSlug } from '../utils/toolHelpers.js';
|
||||
import { createToolSlug } from '../utils/clientUtils.js';
|
||||
|
||||
export interface Props {
|
||||
toolName: string;
|
||||
|
||||
@@ -4,7 +4,6 @@ import { getToolsData } from '../utils/dataService.js';
|
||||
const data = await getToolsData();
|
||||
const scenarios = data.scenarios || [];
|
||||
|
||||
// Configuration
|
||||
const maxDisplayed = 9;
|
||||
const displayedScenarios = scenarios.slice(0, maxDisplayed);
|
||||
---
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
---
|
||||
//src/components/ToolFilters.astro
|
||||
import { getToolsData } from '../utils/dataService.js';
|
||||
|
||||
const data = await getToolsData();
|
||||
@@ -54,7 +55,7 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
|
||||
<!-- Semantic Search Toggle - Inline -->
|
||||
<div id="semantic-search-container" class="semantic-search-inline hidden">
|
||||
<label class="semantic-toggle-wrapper" title="Semantische Suche verwendet Embeddings. Dadurch kann mit natürlicher Sprache/Begriffen gesucht werden, die Ergebnisse richten sich nach der euklidischen Distanz.">
|
||||
<label class="semantic-toggle-wrapper" title="Semantische Suche verwendet Embeddings. Dadurch kann mit natürlicher Sprache/Begriffen gesucht werden, die Ergebnisse richten sich nach der cosinus-Distanz.">
|
||||
<input type="checkbox" id="semantic-search-enabled" disabled/>
|
||||
<div class="semantic-checkbox-custom"></div>
|
||||
<span class="semantic-toggle-label">
|
||||
@@ -305,8 +306,7 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
</div>
|
||||
|
||||
<script define:vars={{ toolsData: data.tools, tagFrequency, sortedTags }}>
|
||||
window.toolsData = toolsData;
|
||||
|
||||
window.toolsData = toolsData;
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
const elements = {
|
||||
searchInput: document.getElementById('search-input'),
|
||||
@@ -358,7 +358,7 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
try {
|
||||
const res = await fetch('/api/ai/embeddings-status');
|
||||
const { embeddings } = await res.json();
|
||||
semanticSearchAvailable = embeddings?.enabled && embeddings?.initialized;
|
||||
semanticSearchAvailable = embeddings?.initialized;
|
||||
|
||||
if (semanticSearchAvailable) {
|
||||
elements.semanticContainer.classList.remove('hidden');
|
||||
@@ -392,6 +392,13 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function isToolHosted(tool) {
|
||||
return tool.projectUrl !== undefined &&
|
||||
tool.projectUrl !== null &&
|
||||
tool.projectUrl !== "" &&
|
||||
tool.projectUrl.trim() !== "";
|
||||
}
|
||||
|
||||
function toggleCollapsible(toggleBtn, content, storageKey) {
|
||||
const isCollapsed = toggleBtn.getAttribute('data-collapsed') === 'true';
|
||||
@@ -432,13 +439,6 @@ const sortedTags = Object.entries(tagFrequency)
|
||||
}
|
||||
}
|
||||
|
||||
function isToolHosted(tool) {
|
||||
return tool.projectUrl !== undefined &&
|
||||
tool.projectUrl !== null &&
|
||||
tool.projectUrl !== "" &&
|
||||
tool.projectUrl.trim() !== "";
|
||||
}
|
||||
|
||||
function initTagCloud() {
|
||||
const visibleCount = 20;
|
||||
elements.tagCloudItems.forEach((item, index) => {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
---
|
||||
//src/components/ToolMatrix.astro
|
||||
import { getToolsData } from '../utils/dataService.js';
|
||||
import ShareButton from './ShareButton.astro';
|
||||
|
||||
const data = await getToolsData();
|
||||
|
||||
@@ -193,7 +193,6 @@ domains.forEach((domain: any) => {
|
||||
</div>
|
||||
|
||||
<script define:vars={{ toolsData: tools, domainAgnosticSoftware, domainAgnosticTools }}>
|
||||
// Ensure isToolHosted is available
|
||||
if (!window.isToolHosted) {
|
||||
window.isToolHosted = function(tool) {
|
||||
return tool.projectUrl !== undefined &&
|
||||
@@ -765,14 +764,12 @@ domains.forEach((domain: any) => {
|
||||
hideToolDetails('both');
|
||||
}
|
||||
|
||||
// Register all functions globally
|
||||
window.showToolDetails = showToolDetails;
|
||||
window.hideToolDetails = hideToolDetails;
|
||||
window.hideAllToolDetails = hideAllToolDetails;
|
||||
window.toggleDomainAgnosticSection = toggleDomainAgnosticSection;
|
||||
window.showShareDialog = showShareDialog;
|
||||
|
||||
// Register matrix-prefixed versions for delegation
|
||||
window.matrixShowToolDetails = showToolDetails;
|
||||
window.matrixHideToolDetails = hideToolDetails;
|
||||
|
||||
|
||||
@@ -1,203 +1,263 @@
|
||||
// src/config/prompts.ts - Enhanced with phase completion reasoning
|
||||
// src/config/prompts.ts
|
||||
|
||||
const RELEVANCE_RUBRIC = `
|
||||
TASK RELEVANCE (INTEGER 0–100, NO %):
|
||||
- 55–65 = Basis/ok
|
||||
- 66–75 = Gut geeignet
|
||||
- 76–85 = Sehr gut geeignet
|
||||
- >85 = Nur bei nahezu perfekter Übereinstimmung
|
||||
`.trim();
|
||||
|
||||
const STRICTNESS = `
|
||||
STRICTNESS:
|
||||
- Output MUST be pure JSON (no prose, no code fences, no trailing commas).
|
||||
- Use EXACT item names as provided (casing/spelling must match).
|
||||
- Do NOT invent items or fields. If unsure, select fewer.
|
||||
`.trim();
|
||||
|
||||
export const AI_PROMPTS = {
|
||||
|
||||
toolSelection: (mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number) => {
|
||||
const modeInstruction = mode === 'workflow'
|
||||
? 'Workflow mit 15-25 Items über alle Phasen. PFLICHT: Mindestens 40% Methoden, Rest Tools/Konzepte.'
|
||||
: 'Spezifische Lösung mit 4-10 Items. PFLICHT: Mindestens 30% Methoden wenn verfügbar.';
|
||||
enhancementQuestions: (input: string) => {
|
||||
return `Sie sind DFIR-Experte. Ein Nutzer beschreibt unten ein Szenario/Problem.
|
||||
|
||||
return `Du bist ein DFIR-Experte. Wähle die BESTEN Items aus dem vorgefilterten Set.
|
||||
ZIEL:
|
||||
- Stellen Sie NUR dann 1–3 präzise Rückfragen, wenn entscheidende forensische Lücken die weitere Analyse/Toolauswahl PHASENREIHENFOLGE oder EVIDENCE-STRATEGIE wesentlich beeinflussen würden.
|
||||
- Wenn ausreichend abgedeckt: Geben Sie eine leere Liste [] zurück.
|
||||
|
||||
AUSWAHLMETHODE: ${selectionMethod}
|
||||
${selectionMethod === 'embeddings_candidates' ?
|
||||
'✓ Semantisch relevante Items bereits vorgefiltert\n✓ Wähle die BESTEN für die konkrete Aufgabe' :
|
||||
'✓ Vollständige Datenbank verfügbar\n✓ Wähle die relevantesten Items'}
|
||||
PRIORITÄT DER THEMEN (in dieser Reihenfolge prüfen):
|
||||
1) Available Evidence & Artefakte (z.B. RAM-Dump, Disk-Image, Logs, PCAP, Registry, Cloud/Audit-Logs)
|
||||
2) Scope/Systems (konkrete Plattformen/Assets/Identitäten/Netzsegmente)
|
||||
3) Investigation Objectives (Ziele: IOC-Extraktion, Timeline, Impact, Attribution)
|
||||
4) Timeline/Timeframe (kritische Zeitfenster, Erhalt flüchtiger Daten)
|
||||
5) Legal & Compliance (Chain of Custody, Aufbewahrung, DSGVO/Branchenvorgaben)
|
||||
6) Technical Constraints (Ressourcen, Zugriffsrechte, Tooling/EDR)
|
||||
|
||||
FRAGEN-QUALITÄT:
|
||||
- Forensisch spezifisch und entscheidungsrelevant (keine Allgemeinplätze).
|
||||
- Eine Frage pro Thema, keine Dopplungen.
|
||||
- Antwortbar vom Nutzer (keine Spekulation, keine “Beweise senden”-Aufforderungen).
|
||||
- Maximal 18 Wörter, endet mit "?".
|
||||
|
||||
VALIDIERUNG:
|
||||
- Stellen Sie NUR Fragen zu Themen, die im Nutzertext NICHT hinreichend konkret beantwortet sind (keine Wiederholung bereits gegebener Details).
|
||||
- Wenn alle priorisierten Themen ausreichend sind → [].
|
||||
|
||||
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
|
||||
[
|
||||
"präzise Frage 1?",
|
||||
"präzise Frage 2?",
|
||||
"präzise Frage 3?"
|
||||
]
|
||||
|
||||
NUTZER-EINGABE:
|
||||
${input}`.trim();
|
||||
},
|
||||
toolSelection: (mode: string, userQuery: string, maxSelectedItems: number) => {
|
||||
const modeInstruction =
|
||||
mode === 'workflow'
|
||||
? 'Workflow mit 15–25 Items über alle Phasen. Pflicht: ~40% Methoden, Rest Software/Konzepte (falls verfügbar).'
|
||||
: 'Spezifische Lösung mit 4–10 Items. Pflicht: ≥30% Methoden (falls verfügbar).';
|
||||
|
||||
return `Du bist DFIR-Experte. Wähle die BESTEN Items aus dem bereits semantisch vorgefilterten Set für die konkrete Aufgabe.
|
||||
|
||||
${modeInstruction}
|
||||
|
||||
ANFRAGE: "${userQuery}"
|
||||
|
||||
VERFÜGBARE ITEM-TYPEN:
|
||||
- TOOLS (type: "software"/"method") → praktische Anwendungen und Vorgehensweisen
|
||||
- KONZEPTE (type: "concept") → theoretisches Wissen und Methodiken
|
||||
ITEM-TYPEN:
|
||||
- TOOLS (type: "software" | "method")
|
||||
- KONZEPTE (type: "concept")
|
||||
|
||||
AUSWAHLSTRATEGIE:
|
||||
1. **ERSTE PRIORITÄT: Relevanz zur Anfrage**
|
||||
- Direkt anwendbar auf das Problem
|
||||
- Löst die Kernherausforderung
|
||||
|
||||
2. **ZWEITE PRIORITÄT: Ausgewogene Mischung**
|
||||
- Tools/Methoden für praktische Umsetzung → selectedTools
|
||||
- Konzepte für methodisches Verständnis → selectedConcepts
|
||||
- WICHTIG: Auch Konzepte auswählen, nicht nur Tools!
|
||||
|
||||
3. **QUALITÄT > QUANTITÄT**
|
||||
- Lieber weniger perfekte Items als viele mittelmäßige
|
||||
- Jedes Item muss begründbar sein
|
||||
AUSWAHLPRINZIPIEN:
|
||||
1) Relevanz zur Anfrage (direkt anwendbar, adressiert Kernproblem)
|
||||
2) Ausgewogene Mischung (Praxis: selectedTools; Methodik: selectedConcepts)
|
||||
3) Qualität > Quantität (lieber weniger, dafür passgenau)
|
||||
4) Keine Erfindungen. Wenn etwas nicht passt, wähle weniger.
|
||||
|
||||
AUSWAHLREGELN:
|
||||
- Wähle ${mode === 'workflow' ? '15-25' : '4-10'} Items total, max ${maxSelectedItems}
|
||||
- BEIDE Arrays füllen: selectedTools UND selectedConcepts
|
||||
- Mindestens 1-2 Konzepte auswählen für methodische Fundierung
|
||||
- Tools: 40% Methoden (type="method"), Rest Software (type="software")
|
||||
- Wähle ${mode === 'workflow' ? '15–25' : '4–10'} Items total (max ${maxSelectedItems})
|
||||
- Fülle BEIDE Arrays: selectedTools UND selectedConcepts
|
||||
- Mindestens 1–2 Konzepte (falls verfügbar)
|
||||
- Bevorzugt ~40% Methoden (Workflow) bzw. ≥30% Methoden (Tool-Modus), sofern vorhanden
|
||||
- Sortiere selectedTools grob nach Eignung (bestes zuerst)
|
||||
|
||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
|
||||
Skalenhinweis (für spätere Schritte – einheitlich):
|
||||
${RELEVANCE_RUBRIC}
|
||||
|
||||
${STRICTNESS}
|
||||
|
||||
ANTWORT (NUR JSON):
|
||||
{
|
||||
"selectedTools": ["ToolName1", "MethodName1", ...],
|
||||
"selectedConcepts": ["ConceptName1", "ConceptName2", ...],
|
||||
"reasoning": "Kurze Begründung mit Erwähnung der Tool/Konzept-Balance"
|
||||
"selectedTools": ["ToolName1", "MethodName1", "..."],
|
||||
"selectedConcepts": ["ConceptName1", "ConceptName2", "..."],
|
||||
"reasoning": "Sehr kurz: Balance/Abdeckung begründen"
|
||||
}`;
|
||||
},
|
||||
|
||||
toolSelectionWithData: (basePrompt: string, toolsToSend: any[], conceptsToSend: any[]) => {
|
||||
return `${basePrompt}
|
||||
|
||||
VERFÜGBARE TOOLS (${toolsToSend.length} Items - Methoden und Software):
|
||||
VERFÜGBARE TOOLS (${toolsToSend.length}):
|
||||
${JSON.stringify(toolsToSend, null, 2)}
|
||||
|
||||
VERFÜGBARE KONZEPTE (${conceptsToSend.length} Items - theoretisches Wissen):
|
||||
VERFÜGBARE KONZEPTE (${conceptsToSend.length}):
|
||||
${JSON.stringify(conceptsToSend, null, 2)}
|
||||
|
||||
WICHTIGER HINWEIS: Wähle sowohl aus TOOLS als auch aus KONZEPTEN aus! Konzepte sind essentiell für methodische Fundierung.`;
|
||||
WICHTIG:
|
||||
- Wähle nur aus obigen Listen. Keine neuen Namen.
|
||||
- Nutze exakte Namen. Keine Synonyme/Varianten.
|
||||
|
||||
Hinweis zur einheitlichen Relevanz-Skala:
|
||||
${RELEVANCE_RUBRIC}
|
||||
|
||||
${STRICTNESS}`;
|
||||
},
|
||||
|
||||
scenarioAnalysis: (isWorkflow: boolean, userQuery: string) => {
|
||||
const analysisType = isWorkflow ? 'Szenario' : 'Problem';
|
||||
const focus = isWorkflow ?
|
||||
'Angriffsvektoren, betroffene Systeme, Zeitkritikalität' :
|
||||
'Kernherausforderung, verfügbare Daten, methodische Anforderungen';
|
||||
const focus = isWorkflow
|
||||
? 'Angriffsvektoren, betroffene Systeme, Zeitkritikalität'
|
||||
: 'Kernherausforderung, verfügbare Daten, methodische Anforderungen';
|
||||
|
||||
return `DFIR-Experte: Analysiere das ${analysisType}.
|
||||
|
||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
|
||||
|
||||
Fokus: ${focus}
|
||||
|
||||
Antwort: Fließtext ohne Listen, max 100 Wörter.`;
|
||||
Antwort: Fließtext, max 100 Wörter. Keine Liste, keine Einleitung.`;
|
||||
},
|
||||
|
||||
investigationApproach: (isWorkflow: boolean, userQuery: string) => {
|
||||
const approachType = isWorkflow ? 'Untersuchungsansatz' : 'Lösungsansatz';
|
||||
const focus = isWorkflow ?
|
||||
'Triage-Prioritäten, Phasenabfolge, Kontaminationsvermeidung' :
|
||||
'Methodenauswahl, Validierung, Integration';
|
||||
const focus = isWorkflow
|
||||
? 'Triage-Prioritäten, Phasenabfolge, Kontaminationsvermeidung'
|
||||
: 'Methodenauswahl, Validierung, Integration';
|
||||
|
||||
return `Entwickle einen ${approachType}.
|
||||
|
||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
|
||||
|
||||
Fokus: ${focus}
|
||||
|
||||
Antwort: Fließtext ohne Listen, max 100 Wörter.`;
|
||||
Antwort: Fließtext, max 100 Wörter.`;
|
||||
},
|
||||
|
||||
criticalConsiderations: (isWorkflow: boolean, userQuery: string) => {
|
||||
const focus = isWorkflow ?
|
||||
'Beweissicherung vs. Gründlichkeit, Chain of Custody' :
|
||||
'Tool-Validierung, False Positives/Negatives, Qualifikationen';
|
||||
const focus = isWorkflow
|
||||
? 'Beweissicherung vs. Gründlichkeit, Chain of Custody'
|
||||
: 'Tool-Validierung, False Positives/Negatives, Qualifikationen';
|
||||
|
||||
return `Identifiziere kritische Überlegungen.
|
||||
|
||||
${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
|
||||
|
||||
Fokus: ${focus}
|
||||
|
||||
Antwort: Fließtext ohne Listen, max 100 Wörter.`;
|
||||
Antwort: Fließtext, max 100 Wörter.`;
|
||||
},
|
||||
|
||||
phaseToolSelection: (userQuery: string, phase: any, phaseTools: any[]) => {
|
||||
const methods = phaseTools.filter(t => t.type === 'method');
|
||||
const tools = phaseTools.filter(t => t.type === 'software');
|
||||
|
||||
|
||||
if (phaseTools.length === 0) {
|
||||
return `Keine Methoden/Tools für Phase "${phase.name}" verfügbar. Antworte mit leerem Array: []`;
|
||||
}
|
||||
|
||||
return `Du bist ein DFIR-Experte. Wähle die 2-3 BESTEN Items für Phase "${phase.name}".
|
||||
|
||||
return `Wähle die 2–3 BESTEN Items für Phase "${phase.name}".
|
||||
|
||||
SZENARIO: "${userQuery}"
|
||||
PHASE: ${phase.name} - ${phase.description || ''}
|
||||
PHASE: ${phase.name} — ${phase.description || ''}
|
||||
|
||||
VERFÜGBARE ITEMS (bereits von KI vorausgewählt):
|
||||
VERFÜGBARE ITEMS:
|
||||
${methods.length > 0 ? `
|
||||
METHODEN (${methods.length}):
|
||||
${methods.map((method: any) =>
|
||||
`- ${method.name}
|
||||
Typ: ${method.type}
|
||||
Beschreibung: ${method.description}
|
||||
Domains: ${method.domains?.join(', ') || 'N/A'}
|
||||
Skill Level: ${method.skillLevel}`
|
||||
${methods.map((m: any) =>
|
||||
`- ${m.name}
|
||||
Typ: ${m.type}
|
||||
Beschreibung: ${m.description}
|
||||
Domains: ${m.domains?.join(', ') || 'N/A'}
|
||||
Skill Level: ${m.skillLevel}`
|
||||
).join('\n\n')}
|
||||
` : 'Keine Methoden verfügbar'}
|
||||
|
||||
${tools.length > 0 ? `
|
||||
SOFTWARE TOOLS (${tools.length}):
|
||||
${tools.map((tool: any) =>
|
||||
`- ${tool.name}
|
||||
Typ: ${tool.type}
|
||||
Beschreibung: ${tool.description}
|
||||
Plattformen: ${tool.platforms?.join(', ') || 'N/A'}
|
||||
Skill Level: ${tool.skillLevel}`
|
||||
SOFTWARE (${tools.length}):
|
||||
${tools.map((t: any) =>
|
||||
`- ${t.name}
|
||||
Typ: ${t.type}
|
||||
Beschreibung: ${t.description}
|
||||
Plattformen: ${t.platforms?.join(', ') || 'N/A'}
|
||||
Skill Level: ${t.skillLevel}`
|
||||
).join('\n\n')}
|
||||
` : 'Keine Software-Tools verfügbar'}
|
||||
|
||||
AUSWAHLREGELN FÜR PHASE "${phase.name}":
|
||||
1. Wähle die 2-3 BESTEN Items für diese spezifische Phase
|
||||
2. Priorisiere Items, die DIREKT für "${phase.name}" relevant sind
|
||||
3. Mindestens 1 Methode wenn verfügbar, Rest Software-Tools
|
||||
4. Begründe WARUM jedes Item für diese Phase optimal ist
|
||||
REGELN:
|
||||
1) 2–3 Items, direkt phasenrelevant; mind. 1 Methode, falls verfügbar
|
||||
2) Begründung pro Item (präzise, anwendungsbezogen)
|
||||
3) Verwende EXAKTE Namen aus den Listen. Keine Erfindungen.
|
||||
|
||||
WICHTIG: Verwende EXAKT die Namen wie oben aufgelistet (ohne Präfixe wie M1./T2.)!
|
||||
${RELEVANCE_RUBRIC}
|
||||
|
||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB:
|
||||
${STRICTNESS}
|
||||
|
||||
ANTWORT (NUR JSON):
|
||||
[
|
||||
{
|
||||
"toolName": "Exakter Name aus der Liste oben",
|
||||
"taskRelevance": 85,
|
||||
"justification": "Detaillierte Begründung (60-80 Wörter) warum optimal für ${phase.name} - erkläre Anwendung, Vorteile und spezifische Relevanz",
|
||||
"limitations": ["Mögliche Einschränkung für diese Phase"]
|
||||
"toolName": "Exakter Name",
|
||||
"taskRelevance": 0,
|
||||
"justification": "60–80 Wörter zur phasenspezifischen Eignung",
|
||||
"limitations": ["Optionale spezifische Einschränkung"]
|
||||
}
|
||||
]`;
|
||||
},
|
||||
|
||||
toolEvaluation: (userQuery: string, tool: any, rank: number, taskRelevance: number) => {
|
||||
toolEvaluation: (userQuery: string, tool: any, rank: number) => {
|
||||
const itemType = tool.type === 'method' ? 'Methode' : 'Tool';
|
||||
|
||||
return `Erkläre die Anwendung dieser/dieses ${itemType}.
|
||||
|
||||
return `Bewerte diese/diesen ${itemType} ausschließlich bzgl. des PROBLEMS.
|
||||
|
||||
PROBLEM: "${userQuery}"
|
||||
${itemType.toUpperCase()}: ${tool.name} (${taskRelevance}% Eignung)
|
||||
${itemType.toUpperCase()}: ${tool.name}
|
||||
TYP: ${tool.type}
|
||||
|
||||
Bereits als Rang ${rank} bewertet.
|
||||
ANWEISUNGEN:
|
||||
- Nur vorhandene Metadaten nutzen (keine Annahmen, keine Websuche).
|
||||
- "taskRelevance" als GANZZAHL 0–100 nach einheitlicher Skala vergeben.
|
||||
- Realistische Scores i.d.R. 60–80, >85 nur bei nahezu perfektem Fit.
|
||||
- Keine Texte außerhalb des JSON.
|
||||
|
||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-STRUKTUR:
|
||||
${RELEVANCE_RUBRIC}
|
||||
${STRICTNESS}
|
||||
|
||||
ANTWORT (NUR JSON):
|
||||
{
|
||||
"detailed_explanation": "Warum und wie einsetzen",
|
||||
"implementation_approach": "Konkrete Schritte",
|
||||
"pros": ["Vorteil 1", "Vorteil 2"],
|
||||
"limitations": ["Einschränkung 1"],
|
||||
"alternatives": "Alternative Ansätze"
|
||||
"alternatives": "Kurz zu sinnvollen Alternativen",
|
||||
"taskRelevance": 0
|
||||
}`;
|
||||
},
|
||||
|
||||
backgroundKnowledgeSelection: (userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]) => {
|
||||
return `Wähle 2-4 relevante Konzepte.
|
||||
return `Wähle 2–4 Konzepte, die das Verständnis/den Einsatz der ausgewählten Tools verbessern.
|
||||
|
||||
${mode === 'workflow' ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
|
||||
AUSGEWÄHLTE TOOLS: ${selectedToolNames.join(', ')}
|
||||
|
||||
VERFÜGBARE KONZEPTE (${availableConcepts.length} KI-kuratiert):
|
||||
${availableConcepts.map((c: any) =>
|
||||
`- ${c.name}: ${c.description}...`
|
||||
).join('\n')}
|
||||
VERFÜGBARE KONZEPTE (${availableConcepts.length}):
|
||||
${availableConcepts.map((c: any) => `- ${c.name}: ${c.description}...`).join('\n')}
|
||||
|
||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-STRUKTUR:
|
||||
REGELN:
|
||||
- Nur Konzepte aus obiger Liste wählen.
|
||||
- Relevanz kurz und konkret begründen.
|
||||
|
||||
${STRICTNESS}
|
||||
|
||||
ANTWORT (NUR JSON):
|
||||
[
|
||||
{
|
||||
"conceptName": "Name",
|
||||
"relevance": "Warum kritisch für Methodik"
|
||||
"conceptName": "Exakter Name",
|
||||
"relevance": "Warum dieses Konzept hier methodisch wichtig ist"
|
||||
}
|
||||
]`;
|
||||
},
|
||||
@@ -209,27 +269,14 @@ ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT OHNE JEGLICHEN TEXT AUSSERHALB DER JSON-S
|
||||
tool: any,
|
||||
completionContext: string
|
||||
) => {
|
||||
return `Du bist ein DFIR-Experte. Erkläre warum dieses Tool nachträglich zur Vervollständigung hinzugefügt wurde.
|
||||
|
||||
KONTEXT DER NACHTRÄGLICHEN ERGÄNZUNG:
|
||||
- Ursprüngliche KI-Auswahl war zu spezifisch/eng gefasst
|
||||
- Phase "${phase.name}" war unterrepräsentiert in der initialen Auswahl
|
||||
- Semantische Suche fand zusätzlich relevante Tools für diese Phase
|
||||
- Tool wird nachträglich hinzugefügt um Vollständigkeit zu gewährleisten
|
||||
return `Begründe knapp die Nachergänzung für Phase "${phase.name}".
|
||||
|
||||
URSPRÜNGLICHE ANFRAGE: "${originalQuery}"
|
||||
PHASE ZU VERVOLLSTÄNDIGEN: ${phase.name} - ${phase.description || ''}
|
||||
PHASE: ${phase.name} — ${phase.description || ''}
|
||||
HINZUGEFÜGTES TOOL: ${selectedToolName} (${tool.type})
|
||||
TOOL-BESCHREIBUNG: ${tool.description}
|
||||
KONTEXT: ${completionContext}
|
||||
|
||||
BEGRÜNDUNGSKONTEXT: ${completionContext}
|
||||
|
||||
Erstelle eine präzise Begründung (max. 40 Wörter), die erklärt:
|
||||
1. WARUM dieses Tool nachträglich hinzugefügt wurde
|
||||
2. WIE es die ${phase.name}-Phase ergänzt
|
||||
3. DASS es die ursprünglich zu spezifische Auswahl erweitert
|
||||
|
||||
Antwort: Prägnanter Fließtext, knappe Begründung für Nachergänzung. Vermeide Begriffe wie "Das Tool" und gib keinen einleitenden Text wie "Begründung (40 Wörter):" an.`;
|
||||
Antwort: Prägnanter Fließtext, max 40 Wörter, keine Einleitung, keine Liste.`;
|
||||
},
|
||||
|
||||
generatePhaseCompletionPrompt(
|
||||
@@ -238,47 +285,48 @@ Antwort: Prägnanter Fließtext, knappe Begründung für Nachergänzung. Vermeid
|
||||
candidateTools: any[],
|
||||
candidateConcepts: any[]
|
||||
): string {
|
||||
return `Du bist ein DFIR-Experte. Die initiale KI-Auswahl war zu spezifisch - die Phase "${phase.name}" ist unterrepräsentiert.
|
||||
return `Unterrepräsentierte Phase: "${phase.name}". Ergänze 1–2 passende Items aus der semantischen Nachsuche.
|
||||
|
||||
KONTEXT: Die Hauptauswahl hat zu wenige Tools für "${phase.name}" identifiziert. Wähle jetzt ergänzende Tools aus semantischer Nachsuche.
|
||||
ORIGINALANFRAGE: "${originalQuery}"
|
||||
PHASE: ${phase.name} — ${phase.description || ''}
|
||||
|
||||
ORIGINAL ANFRAGE: "${originalQuery}"
|
||||
UNTERREPRÄSENTIERTE PHASE: ${phase.name} - ${phase.description || ''}
|
||||
|
||||
SEMANTISCH GEFUNDENE KANDIDATEN für Nachergänzung:
|
||||
|
||||
VERFÜGBARE TOOLS (${candidateTools.length}):
|
||||
${candidateTools.map((tool: any) => `
|
||||
- ${tool.name} (${tool.type})
|
||||
Beschreibung: ${tool.description}
|
||||
Skill Level: ${tool.skillLevel}
|
||||
KANDIDATEN — TOOLS (${candidateTools.length}):
|
||||
${candidateTools.map((t: any) => `
|
||||
- ${t.name} (${t.type})
|
||||
Beschreibung: ${t.description}
|
||||
Skill Level: ${t.skillLevel}
|
||||
`).join('')}
|
||||
|
||||
${candidateConcepts.length > 0 ? `
|
||||
VERFÜGBARE KONZEPTE (${candidateConcepts.length}):
|
||||
${candidateConcepts.map((concept: any) => `
|
||||
- ${concept.name}
|
||||
Beschreibung: ${concept.description}
|
||||
KANDIDATEN — KONZEPTE (${candidateConcepts.length}):
|
||||
${candidateConcepts.map((c: any) => `
|
||||
- ${c.name}
|
||||
Beschreibung: ${c.description}
|
||||
`).join('')}
|
||||
` : ''}
|
||||
|
||||
AUSWAHLREGELN FÜR NACHERGÄNZUNG:
|
||||
1. Wähle 1-2 BESTE Methoden/Tools die die ${phase.name}-Phase optimal ergänzen
|
||||
2. Methoden/Tools müssen für die ursprüngliche Anfrage relevant sein
|
||||
3. Ergänzen, nicht ersetzen - erweitere die zu spezifische Erstauswahl
|
||||
REGELN:
|
||||
- Wähle 1–2 Tools/Methoden, die ${phase.name} sinnvoll ergänzen (keine Ersetzung).
|
||||
- Nur aus obigen Kandidaten wählen; exakte Namen verwenden.
|
||||
- Kurze Begründung, warum diese Ergänzung nötig ist.
|
||||
|
||||
ANTWORT AUSSCHLIESSLICH IM JSON-FORMAT:
|
||||
Skalenhinweis (einheitlich):
|
||||
${RELEVANCE_RUBRIC}
|
||||
|
||||
${STRICTNESS}
|
||||
|
||||
ANTWORT (NUR JSON):
|
||||
{
|
||||
"selectedTools": ["ToolName1", "ToolName2"],
|
||||
"selectedConcepts": ["ConceptName1"],
|
||||
"completionReasoning": "Kurze Erklärung warum diese Nachergänzung für ${phase.name} notwendig war"
|
||||
"completionReasoning": "Kurze Erklärung zur Ergänzung der ${phase.name}-Phase"
|
||||
}`;
|
||||
},
|
||||
|
||||
finalRecommendations: (isWorkflow: boolean, userQuery: string, selectedToolNames: string[]) => {
|
||||
const focus = isWorkflow ?
|
||||
'Workflow-Schritte, Best Practices, Objektivität' :
|
||||
'Methodische Überlegungen, Validierung, Qualitätssicherung';
|
||||
const focus = isWorkflow
|
||||
? 'Knappe Workflow-Schritte & Best Practices; neutral formulieren'
|
||||
: 'Methodische Überlegungen, Validierung, Qualitätssicherung';
|
||||
|
||||
return `Erstelle ${isWorkflow ? 'Workflow-Empfehlung' : 'methodische Überlegungen'}.
|
||||
|
||||
@@ -286,33 +334,31 @@ ${isWorkflow ? 'SZENARIO' : 'PROBLEM'}: "${userQuery}"
|
||||
AUSGEWÄHLT: ${selectedToolNames.join(', ')}${selectedToolNames.length > 5 ? '...' : ''}
|
||||
|
||||
Fokus: ${focus}
|
||||
|
||||
Antwort: Fließtext ohne Listen, max ${isWorkflow ? '100' : '80'} Wörter.`;
|
||||
Antwort: Fließtext, max ${isWorkflow ? '100' : '80'} Wörter. Keine Liste.`;
|
||||
}
|
||||
} as const;
|
||||
|
||||
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, selectionMethod: string, maxSelectedItems: number): string;
|
||||
export function getPrompt(key: 'enhancementQuestions', input: string): string;
|
||||
export function getPrompt(key: 'toolSelection', mode: string, userQuery: string, maxSelectedItems: number): string;
|
||||
export function getPrompt(key: 'toolSelectionWithData', basePrompt: string, toolsToSend: any[], conceptsToSend: any[]): string;
|
||||
export function getPrompt(key: 'scenarioAnalysis', isWorkflow: boolean, userQuery: string): string;
|
||||
export function getPrompt(key: 'investigationApproach', isWorkflow: boolean, userQuery: string): string;
|
||||
export function getPrompt(key: 'criticalConsiderations', isWorkflow: boolean, userQuery: string): string;
|
||||
export function getPrompt(key: 'phaseToolSelection', userQuery: string, phase: any, phaseTools: any[]): string;
|
||||
export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number, taskRelevance: number): string;
|
||||
export function getPrompt(key: 'toolEvaluation', userQuery: string, tool: any, rank: number): string;
|
||||
export function getPrompt(key: 'backgroundKnowledgeSelection', userQuery: string, mode: string, selectedToolNames: string[], availableConcepts: any[]): string;
|
||||
export function getPrompt(key: 'phaseCompletionReasoning', originalQuery: string, phase: any, selectedToolName: string, tool: any, completionContext: string): string;
|
||||
export function getPrompt(key: 'finalRecommendations', isWorkflow: boolean, userQuery: string, selectedToolNames: string[]): string;
|
||||
export function getPrompt(key: 'generatePhaseCompletionPrompt', originalQuery: string, phase: any, candidateTools: any[], candidateConcepts: any[]): string;
|
||||
|
||||
export function getPrompt(promptKey: keyof typeof AI_PROMPTS, ...args: any[]): string {
|
||||
try {
|
||||
const promptFunction = AI_PROMPTS[promptKey];
|
||||
if (typeof promptFunction === 'function') {
|
||||
return (promptFunction as (...args: any[]) => string)(...args);
|
||||
} else {
|
||||
console.error(`[PROMPTS] Invalid prompt key: ${promptKey}`);
|
||||
return 'Error: Invalid prompt configuration';
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`[PROMPTS] Error generating prompt ${promptKey}:`, error);
|
||||
const f = AI_PROMPTS[promptKey];
|
||||
if (typeof f === 'function') return (f as (...a: any[]) => string)(...args);
|
||||
console.error(`[PROMPTS] Invalid prompt key: ${promptKey}`);
|
||||
return 'Error: Invalid prompt configuration';
|
||||
} catch (err) {
|
||||
console.error(`[PROMPTS] Error generating prompt ${promptKey}:`, err);
|
||||
return 'Error: Failed to generate prompt';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
501
src/content/README.md
Normal file
501
src/content/README.md
Normal file
@@ -0,0 +1,501 @@
|
||||
# Manuell hinzufügen
|
||||
|
||||
Hier müssen Artikel, die eingebettet werden sollen, manuell abgespeichert werden.
|
||||
Da diese anders lizensiert sein können, sind sie nicht Bestandteil des Open-Source-Repositorys.
|
||||
|
||||
**Artikel-Quelle:** https://cloud.cc24.dev/f/47971 (Interner Nextcloud-Share)
|
||||
|
||||
Bei Bedarf bitte Kontakt aufnehmen mit mstoeck3@hs-mittweida.de.
|
||||
|
||||
# Artikel-Schema
|
||||
|
||||
## Dateistruktur
|
||||
|
||||
Alle Artikel müssen als Markdown-Dateien im Format `src/content/knowledgebase/` gespeichert werden:
|
||||
|
||||
```
|
||||
src/content/knowledgebase/
|
||||
├── tool-autopsy-grundlagen.md
|
||||
├── tool-volatility-memory-analysis.md
|
||||
├── method-timeline-analysis.md
|
||||
└── concept-hash-functions.md
|
||||
```
|
||||
|
||||
### Namenskonventionen
|
||||
|
||||
- **Tool-Artikel**: `tool-{toolname}-{topic}.md`
|
||||
- **Methoden-Artikel**: `method-{methodname}-{topic}.md`
|
||||
- **Konzept-Artikel**: `concept-{conceptname}-{topic}.md`
|
||||
|
||||
## Frontmatter-Schema
|
||||
|
||||
Jeder Artikel muss einen YAML-Frontmatter-Header mit folgender Struktur haben:
|
||||
|
||||
### Pflichtfelder
|
||||
|
||||
```yaml
|
||||
---
|
||||
title: "Titel des Artikels"
|
||||
description: "Kurze Beschreibung für die Übersicht und SEO"
|
||||
last_updated: 2024-01-15 # Datum im YYYY-MM-DD Format
|
||||
author: "Name des Autors"
|
||||
published: true
|
||||
---
|
||||
```
|
||||
|
||||
### Optionale Felder
|
||||
|
||||
```yaml
|
||||
---
|
||||
# Tool-Verknüpfung
|
||||
tool_name: "Autopsy" # Exakter Name aus tools.yaml
|
||||
related_tools:
|
||||
- "Volatility 3"
|
||||
- "YARA"
|
||||
|
||||
# Klassifizierung
|
||||
difficulty: "intermediate" # novice, beginner, intermediate, advanced, expert
|
||||
categories:
|
||||
- "Tutorial"
|
||||
- "Best Practices"
|
||||
tags:
|
||||
- "memory-analysis"
|
||||
- "malware"
|
||||
- "windows"
|
||||
|
||||
# Zugriffskontrolle
|
||||
gated_content: false # true = Authentifizierung erforderlich
|
||||
---
|
||||
```
|
||||
|
||||
## Vollständiges Beispiel
|
||||
|
||||
**Dateiname:** `tool-volatility-memory-analysis-grundlagen.md`
|
||||
|
||||
```yaml
|
||||
---
|
||||
title: "Volatility 3 Memory Analysis Grundlagen"
|
||||
description: "Einführung in die RAM-Analyse mit Volatility 3 für Windows-Systeme"
|
||||
last_updated: 2024-01-15
|
||||
tool_name: "Volatility 3"
|
||||
related_tools:
|
||||
- "Autopsy"
|
||||
- "YARA"
|
||||
author: "Max Mustermann"
|
||||
difficulty: "intermediate"
|
||||
categories:
|
||||
- "Tutorial"
|
||||
- "Memory Analysis"
|
||||
tags:
|
||||
- "volatility"
|
||||
- "memory-dump"
|
||||
- "malware-analysis"
|
||||
- "incident-response"
|
||||
published: true
|
||||
gated_content: false
|
||||
---
|
||||
|
||||
# Volatility 3 Memory Analysis Grundlagen
|
||||
|
||||
Dieses Tutorial zeigt die Grundlagen der Speicher-Analyse...
|
||||
|
||||
## Installation
|
||||
|
||||
Volatility 3 kann über pip installiert werden:
|
||||
|
||||
```bash
|
||||
pip install volatility3
|
||||
```
|
||||
|
||||
## Erste Schritte
|
||||
|
||||
### Memory Dump laden
|
||||
|
||||
```bash
|
||||
vol -f memory.dmp windows.info
|
||||
```
|
||||
|
||||
### Prozesse auflisten
|
||||
|
||||
```bash
|
||||
vol -f memory.dmp windows.pslist
|
||||
```
|
||||
|
||||
## Video-Demonstration
|
||||
|
||||
<video src="/videos/volatility-basics.mp4" title="Volatility Grundlagen Tutorial" controls preload="metadata"></video>
|
||||
|
||||
## Weiterführende Links
|
||||
|
||||
- [Offizielle Dokumentation](https://volatility3.readthedocs.io/)
|
||||
- [Cheat Sheet](/downloads/volatility-cheat-sheet.pdf)
|
||||
```
|
||||
|
||||
## Content-Features
|
||||
|
||||
### Markdown-Unterstützung
|
||||
|
||||
- Standard Markdown-Syntax
|
||||
- Code-Blöcke mit Syntax-Highlighting
|
||||
- Tabellen
|
||||
- Listen und verschachtelte Inhalte
|
||||
- Automatische Inhaltsverzeichnis-Generierung
|
||||
|
||||
### Video-Einbindung
|
||||
|
||||
Videos können direkt in Markdown eingebettet werden und werden automatisch mit responsiven Containern erweitert:
|
||||
|
||||
#### Basis-Video-Einbindung
|
||||
|
||||
```html
|
||||
<video src="/videos/demo.mp4" title="Tool-Demonstration" controls></video>
|
||||
```
|
||||
|
||||
#### Vollständige Video-Konfiguration
|
||||
|
||||
```html
|
||||
<video
|
||||
src="/videos/advanced-tutorial.mp4"
|
||||
title="Erweiterte Analysefunktionen"
|
||||
controls
|
||||
preload="metadata"
|
||||
width="720"
|
||||
height="405"
|
||||
muted
|
||||
poster="/images/video-thumbnail.jpg"
|
||||
>
|
||||
<p>Ihr Browser unterstützt das Video-Element nicht.</p>
|
||||
</video>
|
||||
```
|
||||
|
||||
#### Unterstützte Video-Attribute
|
||||
|
||||
**Basis-Attribute:**
|
||||
- `src`: **Erforderlich** - Pfad zur Videodatei (relativ zu `/public/`)
|
||||
- `title`: **Empfohlen** - Beschreibung für Metadaten und Accessibility
|
||||
- `controls`: Zeigt Player-Steuerung (Standard-Empfehlung)
|
||||
|
||||
**Erweiterte Attribute:**
|
||||
- `autoplay`: Automatisches Abspielen (nicht empfohlen für UX)
|
||||
- `muted`: Stummgeschaltet (erforderlich für Autoplay in den meisten Browsern)
|
||||
- `loop`: Endlosschleife
|
||||
- `preload`: `"none"` | `"metadata"` | `"auto"` (Standard: `"metadata"`)
|
||||
- `poster`: Vorschaubild-URL
|
||||
- `width`/`height`: Feste Dimensionen (Optional, responsive Container anpasst sich automatisch)
|
||||
|
||||
**Accessibility-Attribute:**
|
||||
- `aria-label`: Alternative Beschreibung
|
||||
- `aria-describedby`: ID eines Elements mit detaillierter Beschreibung
|
||||
|
||||
#### iframe-Einbindung (YouTube, Vimeo, etc.)
|
||||
|
||||
```html
|
||||
<iframe
|
||||
src="https://www.youtube.com/embed/VIDEO_ID"
|
||||
title="YouTube-Tutorial: Forensic Analysis mit Tool XYZ"
|
||||
width="560"
|
||||
height="315"
|
||||
frameborder="0"
|
||||
allowfullscreen
|
||||
></iframe>
|
||||
```
|
||||
|
||||
**iframe-Attribute:**
|
||||
- `src`: **Erforderlich** - Embed-URL des Video-Dienstes
|
||||
- `title`: **Erforderlich** - Beschreibung für Accessibility
|
||||
- `width`/`height`: Empfohlene Dimensionen (werden responsive angepasst)
|
||||
- `frameborder`: Auf `"0"` setzen für modernen Look
|
||||
- `allowfullscreen`: Vollbild-Modus erlauben
|
||||
- `loading`: `"lazy"` für Performance-Optimierung
|
||||
|
||||
### Automatische Video-Verarbeitung
|
||||
|
||||
Das System erweitert Video-Tags automatisch:
|
||||
|
||||
**Input:**
|
||||
```html
|
||||
<video src="/videos/demo.mp4" title="Demo" controls></video>
|
||||
```
|
||||
|
||||
**Output (automatisch generiert):**
|
||||
```html
|
||||
<div class="video-container">
|
||||
<video
|
||||
src="/videos/demo.mp4"
|
||||
title="Demo"
|
||||
controls
|
||||
preload="metadata"
|
||||
data-video-title="Demo"
|
||||
>
|
||||
<p>Your browser does not support the video element.</p>
|
||||
</video>
|
||||
<div class="video-metadata">
|
||||
<div class="video-title">Demo</div>
|
||||
</div>
|
||||
</div>
|
||||
```
|
||||
|
||||
### Firefox-Kompatibilität
|
||||
|
||||
**Wichtiger Hinweis:** Videos müssen in Firefox-kompatiblen Formaten bereitgestellt werden:
|
||||
|
||||
#### Empfohlene Formate
|
||||
|
||||
**Primäre Formate (höchste Kompatibilität):**
|
||||
- **MP4 (H.264/AVC)**: `.mp4` - Beste Kompatibilität across Browser
|
||||
- **WebM (VP8/VP9)**: `.webm` - Moderne Browser, gute Kompression
|
||||
|
||||
**Sekundäre Formate:**
|
||||
- **OGG Theora**: `.ogv` - Fallback für ältere Firefox-Versionen
|
||||
|
||||
#### Format-Konvertierung
|
||||
|
||||
```bash
|
||||
# Mit ffmpeg zu Firefox-kompatiblem MP4 konvertieren
|
||||
ffmpeg -i input.mov -c:v libx264 -c:a aac -movflags +faststart output.mp4
|
||||
|
||||
# Mit ffmpeg zu WebM konvertieren
|
||||
ffmpeg -i input.mov -c:v libvpx-vp9 -c:a libvorbis output.webm
|
||||
|
||||
# Multi-Format-Bereitstellung
|
||||
<video title="Demo" controls>
|
||||
<source src="/videos/demo.mp4" type="video/mp4">
|
||||
<source src="/videos/demo.webm" type="video/webm">
|
||||
<p>Ihr Browser unterstützt das Video-Element nicht.</p>
|
||||
</video>
|
||||
```
|
||||
|
||||
#### Firefox-spezifische Probleme
|
||||
|
||||
Das System erkennt automatisch Firefox und implementiert Error-Recovery:
|
||||
|
||||
- **Automatische Fehlererkennung** für nicht unterstützte Formate
|
||||
- **Fallback-Mechanismen** bei Codec-Problemen
|
||||
- **Erweiterte Logging** für Debugging
|
||||
|
||||
**Bekannte Firefox-Probleme:**
|
||||
- H.265/HEVC nicht unterstützt
|
||||
- Proprietäre Codecs teilweise eingeschränkt
|
||||
- MIME-Type-Sensitivität höher als bei Chrome
|
||||
|
||||
### Video-Datei-Management
|
||||
|
||||
#### Dateistruktur
|
||||
|
||||
```
|
||||
public/
|
||||
├── videos/
|
||||
│ ├── tools/
|
||||
│ │ ├── autopsy-basics.mp4
|
||||
│ │ ├── volatility-tutorial.webm
|
||||
│ │ └── yara-rules-demo.mp4
|
||||
│ ├── methods/
|
||||
│ │ ├── timeline-analysis.mp4
|
||||
│ │ └── disk-imaging.mp4
|
||||
│ └── concepts/
|
||||
│ ├── hash-functions.mp4
|
||||
│ └── chain-custody.mp4
|
||||
└── images/
|
||||
└── video-thumbnails/
|
||||
├── autopsy-thumb.jpg
|
||||
└── volatility-thumb.jpg
|
||||
```
|
||||
|
||||
#### Dateigröße-Empfehlungen
|
||||
|
||||
- **Streaming-Qualität**: 5-15 MB/Minute (720p)
|
||||
- **High-Quality Tutorials**: 20-40 MB/Minute (1080p)
|
||||
- **Mobile-Optimiert**: 2-8 MB/Minute (480p)
|
||||
|
||||
#### Konventionen
|
||||
|
||||
**Dateinamen:**
|
||||
- Lowercase mit Bindestrichen: `tool-autopsy-installation.mp4`
|
||||
- Präfix nach Kategorie: `tool-`, `method-`, `concept-`
|
||||
- Beschreibender Suffix: `-basics`, `-advanced`, `-troubleshooting`
|
||||
|
||||
**Video-Titel:**
|
||||
- Beschreibend und suchfreundlich
|
||||
- Tool/Methode im Titel erwähnen
|
||||
- Skill-Level angeben: "Grundlagen", "Erweitert", "Expertenlevel"
|
||||
|
||||
### Code-Blöcke
|
||||
|
||||
```bash
|
||||
# Bash-Beispiel
|
||||
volatility -f memory.dmp --profile=Win7SP1x64 pslist
|
||||
```
|
||||
|
||||
```python
|
||||
# Python-Beispiel
|
||||
import volatility.conf as conf
|
||||
import volatility.registry as registry
|
||||
```
|
||||
|
||||
### Tabellen
|
||||
|
||||
| Plugin | Beschreibung | Video-Tutorial |
|
||||
|--------|--------------|----------------|
|
||||
| pslist | Prozesse auflisten | [Tutorial ansehen](/videos/pslist-demo.mp4) |
|
||||
| malfind | Malware finden | [Demo](/videos/malfind-basics.mp4) |
|
||||
|
||||
## Artikel-Typen
|
||||
|
||||
### 1. Tool-spezifische Artikel (`tool-*.md`)
|
||||
|
||||
Artikel die einem konkreten Software-Tool zugeordnet sind:
|
||||
|
||||
```yaml
|
||||
tool_name: "Autopsy" # Muss exakt mit tools.yaml übereinstimmen
|
||||
```
|
||||
|
||||
### 2. Methoden-Artikel (`method-*.md`)
|
||||
|
||||
Artikel zu forensischen Methoden und Vorgehensweisen:
|
||||
|
||||
```yaml
|
||||
tool_name: "Timeline Analysis" # Verweis auf method-type in tools.yaml
|
||||
categories: ["Methodology", "Best Practices"]
|
||||
```
|
||||
|
||||
### 3. Konzept-Artikel (`concept-*.md`)
|
||||
|
||||
Artikel zu theoretischen Konzepten und Grundlagen:
|
||||
|
||||
```yaml
|
||||
tool_name: "Hash Functions & Digital Signatures" # Verweis auf concept-type in tools.yaml
|
||||
categories: ["Theory", "Fundamentals"]
|
||||
```
|
||||
|
||||
Alle Typen erscheinen:
|
||||
- In der Knowledgebase-Übersicht
|
||||
- Bei gesetztem `tool_name`: In der Tool-Detailansicht
|
||||
- Mit entsprechenden Icons und Badges
|
||||
|
||||
### 4. Geschützte Artikel
|
||||
|
||||
Unabhängig vom Typ können Artikel Authentifizierung erfordern:
|
||||
|
||||
```yaml
|
||||
gated_content: true
|
||||
```
|
||||
|
||||
Erscheinen mit 🔒-Symbol und erfordern Anmeldung.
|
||||
|
||||
## Verknüpfungen
|
||||
|
||||
### Related Tools
|
||||
|
||||
Tools aus dem `related_tools` Array werden automatisch verlinkt:
|
||||
|
||||
```yaml
|
||||
related_tools:
|
||||
- "YARA" # Wird zu Tool-Details verlinkt
|
||||
- "Wireshark" # Muss in tools.yaml existieren
|
||||
```
|
||||
|
||||
### Interne Links
|
||||
|
||||
```markdown
|
||||
- [Knowledgebase](/knowledgebase)
|
||||
- [Tool-Übersicht](/#tools-grid)
|
||||
- [Anderer Artikel](/knowledgebase/artikel-slug)
|
||||
```
|
||||
|
||||
## SEO und Metadaten
|
||||
|
||||
### Automatische Generierung
|
||||
|
||||
- URL: `/knowledgebase/{dateiname-ohne-extension}`
|
||||
- Meta-Description: Aus `description`-Feld
|
||||
- Breadcrumbs: Automatisch generiert
|
||||
- Reading-Time: Automatisch berechnet
|
||||
|
||||
### Social Sharing
|
||||
|
||||
Jeder Artikel erhält automatisch Share-Buttons für:
|
||||
- URL-Kopieren
|
||||
- Tool-spezifische Verlinkung
|
||||
|
||||
## Validierung
|
||||
|
||||
### Pflichtfeld-Prüfung
|
||||
|
||||
Das System validiert automatisch:
|
||||
- ✅ `title` ist gesetzt
|
||||
- ✅ `description` ist gesetzt
|
||||
- ✅ `last_updated` ist gültiges Datum
|
||||
- ✅ `difficulty` ist gültiger Wert
|
||||
- ✅ `tool_name` existiert in tools.yaml (falls gesetzt)
|
||||
|
||||
### Content-Validierung
|
||||
|
||||
- Automatische HTML-Escaping für Sicherheit
|
||||
- Video-URLs werden validiert
|
||||
- Broken Links werden geloggt (development)
|
||||
- Dateinamen-Präfixe helfen bei der Organisation und Verknüpfung
|
||||
|
||||
### Video-Validierung
|
||||
|
||||
- Dateipfade auf Existenz geprüft (development)
|
||||
- Format-Kompatibilität gewarnt
|
||||
- Firefox-spezifische Warnings bei problematischen Formaten
|
||||
|
||||
## Deployment
|
||||
|
||||
1. Artikel von Nextcloud-Share herunterladen: https://cloud.cc24.dev/f/47971
|
||||
2. Videos manuell in `public/videos/` bereitstellen (siehe `public/videos/README.md`)
|
||||
3. Artikel in `src/content/knowledgebase/` ablegen (flache Struktur mit Präfixen)
|
||||
4. Frontmatter nach Schema überprüfen/anpassen
|
||||
5. Build-Prozess validiert automatisch
|
||||
6. Artikel erscheint in Knowledgebase-Übersicht
|
||||
|
||||
### Troubleshooting
|
||||
|
||||
**Artikel erscheint nicht:**
|
||||
- `published: true` gesetzt?
|
||||
- Frontmatter-Syntax korrekt?
|
||||
- Datei in `src/content/knowledgebase/` (flache Struktur)?
|
||||
- Dateiname folgt Konvention (`tool-*`, `method-*`, `concept-*`)?
|
||||
|
||||
**Tool-Verknüpfung funktioniert nicht:**
|
||||
- `tool_name` exakt wie in tools.yaml?
|
||||
- Groß-/Kleinschreibung beachten
|
||||
|
||||
**Video lädt nicht:**
|
||||
- Pfad korrekt? (beginnt mit `/videos/`)
|
||||
- Datei im `public/videos/` Ordner?
|
||||
- Unterstütztes Format? (mp4, webm, ogg)
|
||||
- Firefox-kompatibel? (H.264/AVC für MP4)
|
||||
|
||||
**Firefox-Video-Probleme:**
|
||||
- H.265/HEVC-Codecs vermeiden
|
||||
- Multiple `<source>`-Tags für Fallbacks nutzen
|
||||
- Browser-Console auf Codec-Fehler prüfen
|
||||
- MIME-Types korrekt gesetzt?
|
||||
|
||||
## Beispiel-Ordnerstruktur
|
||||
|
||||
```
|
||||
src/content/knowledgebase/
|
||||
├── tool-autopsy-timeline-analysis.md
|
||||
├── tool-volatility-basic-commands.md
|
||||
├── tool-yara-rule-writing.md
|
||||
├── method-timeline-analysis-fundamentals.md
|
||||
├── method-disk-imaging-best-practices.md
|
||||
├── concept-hash-functions-digital-signatures.md
|
||||
├── concept-regex-pattern-matching.md
|
||||
└── concept-chain-of-custody.md
|
||||
|
||||
public/videos/
|
||||
├── tools/
|
||||
│ ├── autopsy-timeline-tutorial.mp4
|
||||
│ ├── volatility-basics-demo.mp4
|
||||
│ └── yara-rules-advanced.webm
|
||||
├── methods/
|
||||
│ ├── timeline-analysis-walkthrough.mp4
|
||||
│ └── disk-imaging-best-practices.mp4
|
||||
└── concepts/
|
||||
├── hash-functions-explained.mp4
|
||||
└── chain-custody-procedures.mp4
|
||||
```
|
||||
@@ -16,7 +16,7 @@ const knowledgebaseCollection = defineCollection({
|
||||
tags: z.array(z.string()).default([]),
|
||||
|
||||
published: z.boolean().default(true),
|
||||
gated_content: z.boolean().default(false), // NEW: Gated content flag
|
||||
gated_content: z.boolean().default(false),
|
||||
|
||||
})
|
||||
});
|
||||
|
||||
@@ -1,616 +0,0 @@
|
||||
---
|
||||
title: "Digital Evidence Chain of Custody: Lückenlose Beweisführung in der digitalen Forensik"
|
||||
description: "Umfassender Leitfaden für die rechtssichere Dokumentation digitaler Beweise von der Sicherstellung bis zur Gerichtsverhandlung. Praktische Umsetzung von ISO 27037, Dokumentationsstandards und häufige Fallstricke."
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
last_updated: 2025-08-10
|
||||
difficulty: advanced
|
||||
categories: ["standards", "documentation", "legal-compliance", "case-management"]
|
||||
tags: ["chain-of-custody", "iso-27037", "court-admissible", "audit-trail", "hash-verification", "tamper-evidence", "legal-compliance", "documentation", "process-management", "evidence-handling"]
|
||||
published: true
|
||||
---
|
||||
|
||||
# Digital Evidence Chain of Custody: Lückenlose Beweisführung in der digitalen Forensik
|
||||
|
||||
Die **Chain of Custody** (Beweiskette) ist das Rückgrat jeder forensischen Untersuchung und entscheidet oft über Erfolg oder Misserfolg vor Gericht. Dieser Leitfaden erklärt die rechtssicheren Verfahren für die lückenlose Dokumentation digitaler Beweise von der Sicherstellung bis zur Gerichtsverhandlung.
|
||||
|
||||
## Warum ist die Chain of Custody entscheidend?
|
||||
|
||||
In der digitalen Forensik können Beweise innerhalb von Sekunden manipuliert, gelöscht oder verfälscht werden. Eine ordnungsgemäße Chain of Custody gewährleistet:
|
||||
|
||||
- **Gerichtliche Verwertbarkeit** der Beweise
|
||||
- **Nachweis der Authentizität** und Integrität
|
||||
- **Schutz vor Manipulationsvorwürfen**
|
||||
- **Rechtssicherheit** für alle Beteiligten
|
||||
- **Compliance** mit internationalen Standards
|
||||
|
||||
> **Warnung**: Bereits kleine Fehler in der Beweiskette können zur kompletten Verwerfung der Beweise führen und jahrelange Ermittlungsarbeit zunichte machen.
|
||||
|
||||
## Rechtliche Grundlagen und Standards
|
||||
|
||||
### Internationale Standards
|
||||
|
||||
**ISO/IEC 27037:2012** - "Guidelines for identification, collection, acquisition and preservation of digital evidence"
|
||||
- Definiert Best Practices für digitale Beweismittel
|
||||
- International anerkannter Standard
|
||||
- Basis für nationale Implementierungen
|
||||
|
||||
**ISO/IEC 27041:2015** - "Guidance on assuring suitability and adequacy of incident investigative method"
|
||||
- Ergänzt ISO 27037 um Qualitätssicherung
|
||||
- Fokus auf Angemessenheit der Methoden
|
||||
|
||||
### Nationale Rahmenwerke
|
||||
|
||||
**Deutschland**:
|
||||
- § 81a StPO (Körperliche Untersuchung)
|
||||
- § 94 ff. StPO (Beschlagnahme)
|
||||
- BSI-Standards zur IT-Forensik
|
||||
|
||||
**USA**:
|
||||
- Federal Rules of Evidence (Rule 901, 902)
|
||||
- NIST Special Publication 800-86
|
||||
|
||||
**EU**:
|
||||
- GDPR-Compliance bei der Beweissicherung
|
||||
- eIDAS-Verordnung für digitale Signaturen
|
||||
|
||||
## Die vier Säulen der Chain of Custody
|
||||
|
||||
### 1. Authentizität (Echtheit)
|
||||
**Definition**: Nachweis, dass die Beweise tatsächlich von der behaupteten Quelle stammen.
|
||||
|
||||
**Praktische Umsetzung**:
|
||||
```bash
|
||||
# Cryptographic Hash Generation
|
||||
sha256sum /dev/sdb1 > evidence_hash.txt
|
||||
md5sum /dev/sdb1 >> evidence_hash.txt
|
||||
|
||||
# Mit Zeitstempel
|
||||
echo "$(date -u +%Y-%m-%dT%H:%M:%SZ): $(sha256sum /dev/sdb1)" >> chain_log.txt
|
||||
```
|
||||
|
||||
### 2. Integrität (Unversehrtheit)
|
||||
**Definition**: Sicherstellung, dass die Beweise seit der Sicherstellung unverändert geblieben sind.
|
||||
|
||||
**Maßnahmen**:
|
||||
- **Write-Blocker** bei allen Zugriffen
|
||||
- **Hash-Verifizierung** vor und nach jeder Bearbeitung
|
||||
- **Versionskontrolle** für alle Arbeitskopien
|
||||
|
||||
### 3. Nachvollziehbarkeit (Traceability)
|
||||
**Definition**: Lückenlose Dokumentation aller Personen, die Zugang zu den Beweisen hatten.
|
||||
|
||||
**Dokumentationspflicht**: Wer, Was, Wann, Wo, Warum
|
||||
|
||||
### 4. Nicht-Abstreitbarkeit (Non-Repudiation)
|
||||
**Definition**: Verhinderung, dass Beteiligte ihre Handlungen später abstreiten können.
|
||||
|
||||
**Technische Lösung**: Digitale Signaturen, Blockchain-Timestamping
|
||||
|
||||
## Praktische Implementierung: Schritt-für-Schritt
|
||||
|
||||
### Phase 1: Vorbereitung der Sicherstellung
|
||||
|
||||
**Equipment-Check**:
|
||||
```checklist
|
||||
□ Kalibrierte Write-Blocker
|
||||
□ Forensische Imaging-Tools
|
||||
□ Chain of Custody Formulare
|
||||
□ Tamper-evident Bags/Labels
|
||||
□ Digitalkamera für Dokumentation
|
||||
□ Messgeräte (falls erforderlich)
|
||||
□ Backup-Ausrüstung
|
||||
```
|
||||
|
||||
**Dokumentation vor Ort**:
|
||||
1. **Umgebungsfotografie** (360°-Dokumentation)
|
||||
2. **Hardware-Identifikation** (Seriennummern, Labels)
|
||||
3. **Netzwerkzustand** (aktive Verbindungen)
|
||||
4. **Bildschirmzustand** (Screenshots vor Herunterfahren)
|
||||
|
||||
### Phase 2: Sichere Akquisition
|
||||
|
||||
**Write-Blocker Setup**:
|
||||
```bash
|
||||
# Hardware Write-Blocker Verification
|
||||
lsblk -o NAME,SIZE,RO,TYPE,MOUNTPOINT
|
||||
# RO sollte "1" anzeigen für geschützte Devices
|
||||
|
||||
# Software Write-Blocker (Linux)
|
||||
blockdev --setro /dev/sdb
|
||||
blockdev --getro /dev/sdb # Should return 1
|
||||
```
|
||||
|
||||
**Imaging mit Integrity Check**:
|
||||
```bash
|
||||
# dd mit Hash-Berechnung
|
||||
dd if=/dev/sdb | tee >(sha256sum > image.sha256) | dd of=evidence.dd
|
||||
|
||||
# Oder mit dcfldd für bessere Forensik-Features
|
||||
dcfldd if=/dev/sdb of=evidence.dd hash=sha256,md5 hashlog=hashlog.txt bs=4096
|
||||
```
|
||||
|
||||
### Phase 3: Dokumentation und Versiegelung
|
||||
|
||||
**Chain of Custody Form - Kernelemente**:
|
||||
|
||||
```
|
||||
DIGITAL EVIDENCE CUSTODY FORM
|
||||
|
||||
Fall-ID: _______________ Datum: _______________
|
||||
Ermittler: _______________ Badge/ID: _______________
|
||||
|
||||
BEWEISMITTEL DETAILS:
|
||||
- Beschreibung: ________________________________
|
||||
- Seriennummer: _______________________________
|
||||
- Hersteller/Modell: ___________________________
|
||||
- Kapazität: __________________________________
|
||||
- Hash-Werte:
|
||||
* SHA256: ___________________________________
|
||||
* MD5: _____________________________________
|
||||
|
||||
CUSTODY CHAIN:
|
||||
[Datum/Zeit] [Übernommen von] [Übergeben an] [Zweck] [Unterschrift]
|
||||
_________________________________________________________________
|
||||
_________________________________________________________________
|
||||
|
||||
INTEGRITÄT BESTÄTIGT:
|
||||
□ Write-Blocker verwendet
|
||||
□ Hash-Werte verifiziert
|
||||
□ Tamper-evident versiegelt
|
||||
□ Fotos dokumentiert
|
||||
```
|
||||
|
||||
**Versiegelung**:
|
||||
```
|
||||
Tamper-Evident Label Nummer: ______________
|
||||
Siegeltyp: _______________________________
|
||||
Platzierung: _____________________________
|
||||
Foto-Referenz: ___________________________
|
||||
```
|
||||
|
||||
### Phase 4: Transport und Lagerung
|
||||
|
||||
**Sichere Aufbewahrung**:
|
||||
- **Klimakontrollierte Umgebung** (15-25°C, <60% Luftfeuchtigkeit)
|
||||
- **Elektromagnetische Abschirmung** (Faraday-Käfig)
|
||||
- **Zugangskontrolle** (Biometrie, Kartenleser)
|
||||
- **Überwachung** (24/7 Video, Alarme)
|
||||
|
||||
**Transport-Protokoll**:
|
||||
```
|
||||
TRANSPORT LOG
|
||||
|
||||
Von: ______________________ Nach: ______________________
|
||||
Datum/Zeit Start: _____________ Ankunft: _______________
|
||||
Transportmittel: ___________________________________
|
||||
Begleitpersonen: ___________________________________
|
||||
Spezielle Vorkehrungen: ____________________________
|
||||
|
||||
Integrität bei Ankunft:
|
||||
□ Siegel unversehrt
|
||||
□ Hash-Werte überprüft
|
||||
□ Keine physischen Schäden
|
||||
□ Dokumentation vollständig
|
||||
|
||||
Empfänger: _________________ Unterschrift: _____________
|
||||
```
|
||||
|
||||
## Digitale Chain of Custody Tools
|
||||
|
||||
### Laboratory Information Management Systems (LIMS)
|
||||
|
||||
**Kommerzielle Lösungen**:
|
||||
- **FRED (Forensic Recovery of Evidence Device)**
|
||||
- **CaseGuard** von AccessData
|
||||
- **EnCase Legal** von OpenText
|
||||
|
||||
**Open Source Alternativen**:
|
||||
```python
|
||||
# Beispiel: Python-basierte CoC Tracking
|
||||
import hashlib
|
||||
import datetime
|
||||
import json
|
||||
from cryptography.fernet import Fernet
|
||||
|
||||
class ChainOfCustody:
|
||||
def __init__(self):
|
||||
self.evidence_log = []
|
||||
self.key = Fernet.generate_key()
|
||||
self.cipher = Fernet(self.key)
|
||||
|
||||
def add_custody_event(self, evidence_id, handler, action, location):
|
||||
event = {
|
||||
'timestamp': datetime.datetime.utcnow().isoformat(),
|
||||
'evidence_id': evidence_id,
|
||||
'handler': handler,
|
||||
'action': action,
|
||||
'location': location,
|
||||
'hash': self.calculate_hash(evidence_id)
|
||||
}
|
||||
|
||||
# Encrypt sensitive data
|
||||
encrypted_event = self.cipher.encrypt(json.dumps(event).encode())
|
||||
self.evidence_log.append(encrypted_event)
|
||||
|
||||
return event
|
||||
|
||||
def calculate_hash(self, evidence_path):
|
||||
"""Calculate SHA256 hash of evidence file"""
|
||||
hash_sha256 = hashlib.sha256()
|
||||
with open(evidence_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_sha256.update(chunk)
|
||||
return hash_sha256.hexdigest()
|
||||
```
|
||||
|
||||
### Blockchain-basierte Lösungen
|
||||
|
||||
**Unveränderliche Timestamps**:
|
||||
```solidity
|
||||
// Ethereum Smart Contract Beispiel
|
||||
pragma solidity ^0.8.0;
|
||||
|
||||
contract EvidenceChain {
|
||||
struct CustodyEvent {
|
||||
uint256 timestamp;
|
||||
string evidenceId;
|
||||
string handler;
|
||||
string action;
|
||||
string hashValue;
|
||||
}
|
||||
|
||||
mapping(string => CustodyEvent[]) public evidenceChain;
|
||||
|
||||
event CustodyTransfer(
|
||||
string indexed evidenceId,
|
||||
string handler,
|
||||
uint256 timestamp
|
||||
);
|
||||
|
||||
function addCustodyEvent(
|
||||
string memory _evidenceId,
|
||||
string memory _handler,
|
||||
string memory _action,
|
||||
string memory _hashValue
|
||||
) public {
|
||||
evidenceChain[_evidenceId].push(CustodyEvent({
|
||||
timestamp: block.timestamp,
|
||||
evidenceId: _evidenceId,
|
||||
handler: _handler,
|
||||
action: _action,
|
||||
hashValue: _hashValue
|
||||
}));
|
||||
|
||||
emit CustodyTransfer(_evidenceId, _handler, block.timestamp);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Häufige Fehler und Fallstricke
|
||||
|
||||
### Kritische Dokumentationsfehler
|
||||
|
||||
**1. Unvollständige Handler-Information**
|
||||
```
|
||||
❌ Falsch: "IT-Abteilung"
|
||||
✅ Richtig: "Max Mustermann, IT-Administrator, Badge #12345, Abteilung IT-Security"
|
||||
```
|
||||
|
||||
**2. Unspezifische Aktionsbeschreibungen**
|
||||
```
|
||||
❌ Falsch: "Analyse durchgeführt"
|
||||
✅ Richtig: "Keyword-Suche nach 'vertraulich' mit EnCase v21.2,
|
||||
Read-Only Zugriff, Image Hash vor/nach verifiziert"
|
||||
```
|
||||
|
||||
**3. Lückenhafte Zeiterfassung**
|
||||
```
|
||||
❌ Falsch: "15:30"
|
||||
✅ Richtig: "2024-01-15T15:30:27Z (UTC), Zeitzone CET+1"
|
||||
```
|
||||
|
||||
### Technische Fallstricke
|
||||
|
||||
**Hash-Algorithmus Schwächen**:
|
||||
```bash
|
||||
# Vermeide MD5 für neue Fälle (Kollisionsanfällig)
|
||||
❌ md5sum evidence.dd
|
||||
|
||||
# Verwende stärkere Algorithmen
|
||||
✅ sha256sum evidence.dd
|
||||
✅ sha3-256sum evidence.dd # Noch sicherer
|
||||
```
|
||||
|
||||
**Write-Blocker Bypass**:
|
||||
```bash
|
||||
# Prüfe IMMER Write-Protection
|
||||
blockdev --getro /dev/sdb
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "Write protection AKTIV"
|
||||
else
|
||||
echo "WARNUNG: Write protection NICHT aktiv!"
|
||||
exit 1
|
||||
fi
|
||||
```
|
||||
|
||||
### Rechtliche Fallstricke
|
||||
|
||||
**GDPR-Compliance bei EU-Fällen**:
|
||||
- **Datenschutz-Folgenabschätzung** vor Imaging
|
||||
- **Zweckbindung** der Beweiserhebung
|
||||
- **Löschfristen** nach Verfahrensabschluss
|
||||
|
||||
**Jurisdiktionsprobleme**:
|
||||
- **Cloud-Evidence** in verschiedenen Ländern
|
||||
- **Verschiedene Beweisstandards** (Common Law vs. Civil Law)
|
||||
- **Internationale Rechtshilfe** erforderlich
|
||||
|
||||
## Qualitätssicherung und Audit
|
||||
|
||||
### Peer Review Verfahren
|
||||
|
||||
**4-Augen-Prinzip**:
|
||||
```
|
||||
Imaging-Protokoll:
|
||||
Techniker A: _________________ (Durchführung)
|
||||
Techniker B: _________________ (Verifikation)
|
||||
Supervisor: __________________ (Freigabe)
|
||||
```
|
||||
|
||||
**Hash-Verifikation Zeitplan**:
|
||||
```
|
||||
Initial: SHA256 bei Akquisition
|
||||
Transport: Hash-Check vor/nach Transport
|
||||
Labor: Hash-Check bei Laborankunft
|
||||
Analyse: Hash-Check vor jeder Analyse
|
||||
Archiv: Hash-Check bei Archivierung
|
||||
Vernichtung: Final Hash-Check vor Vernichtung
|
||||
```
|
||||
|
||||
### Continuous Monitoring
|
||||
|
||||
**Automated Integrity Checks**:
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# integrity_monitor.sh
|
||||
|
||||
EVIDENCE_DIR="/secure/evidence"
|
||||
LOG_FILE="/var/log/evidence_integrity.log"
|
||||
|
||||
for evidence_file in "$EVIDENCE_DIR"/*.dd; do
|
||||
stored_hash=$(cat "${evidence_file}.sha256")
|
||||
current_hash=$(sha256sum "$evidence_file" | cut -d' ' -f1)
|
||||
|
||||
if [ "$stored_hash" != "$current_hash" ]; then
|
||||
echo "ALERT: Integrity violation detected for $evidence_file" | \
|
||||
tee -a "$LOG_FILE"
|
||||
# Send immediate alert
|
||||
mail -s "Evidence Integrity Alert" admin@forensics.org < \
|
||||
"$LOG_FILE"
|
||||
fi
|
||||
done
|
||||
```
|
||||
|
||||
## Internationale Gerichtspraxis
|
||||
|
||||
### Deutschland - BGH Rechtsprechung
|
||||
|
||||
**BGH 1 StR 142/18** (2018):
|
||||
- Digitale Beweise müssen **nachvollziehbar erhoben** werden
|
||||
- **Hash-Werte allein** reichen nicht aus
|
||||
- **Gesamter Erhebungsprozess** muss dokumentiert sein
|
||||
|
||||
### USA - Federal Courts
|
||||
|
||||
**United States v. Tank (2018)**:
|
||||
- **Authentication** unter Federal Rule 901(b)(9)
|
||||
- **Best Practices** sind nicht immer **rechtlich erforderlich**
|
||||
- **Totality of circumstances** entscheidet
|
||||
|
||||
### EU - EuGH Rechtsprechung
|
||||
|
||||
**Rechtssache C-203/15** (2016):
|
||||
- **Grundrechte** vs. **Strafverfolgung**
|
||||
- **Verhältnismäßigkeit** der Beweiserhebung
|
||||
- **GDPR-Compliance** auch bei strafrechtlichen Ermittlungen
|
||||
|
||||
## Fallstudien aus der Praxis
|
||||
|
||||
### Case Study 1: Ransomware-Angriff Automobilhersteller
|
||||
|
||||
**Szenario**:
|
||||
Ransomware-Angriff auf Produktionssysteme, 50+ Systeme betroffen
|
||||
|
||||
**CoC-Herausforderungen**:
|
||||
- **Zeitdruck** durch Produktionsstillstand
|
||||
- **Verschiedene Standorte** (Deutschland, Tschechien, Mexiko)
|
||||
- **Rechtliche Anforderungen** in 3 Jurisdiktionen
|
||||
|
||||
**Lösung**:
|
||||
```
|
||||
Parallel Teams:
|
||||
- Team 1: Incident Response (Live-Analyse)
|
||||
- Team 2: Evidence Preservation (Imaging)
|
||||
- Team 3: Documentation (CoC-Protokoll)
|
||||
|
||||
Zentrale Koordination:
|
||||
- Shared CoC-Database (Cloud-basiert)
|
||||
- Video-Calls für Custody-Transfers
|
||||
- Digital Signatures für Remote-Bestätigung
|
||||
```
|
||||
|
||||
**Lessons Learned**:
|
||||
- **Vorab-Planung** für Multi-Jurisdiktion essentiell
|
||||
- **Remote-CoC-Verfahren** erforderlich
|
||||
- **24/7-Verfügbarkeit** der Dokumentationssysteme
|
||||
|
||||
### Case Study 2: Betrugsermittlung Finanzdienstleister
|
||||
|
||||
**Szenario**:
|
||||
Verdacht auf Insiderhandel, E-Mail-Analyse von 500+ Mitarbeitern
|
||||
|
||||
**CoC-Komplexität**:
|
||||
- **Privacy Laws** (GDPR, Bankengeheimnis)
|
||||
- **Privileged Communications** (Anwalt-Mandant)
|
||||
- **Regulatory Oversight** (BaFin, SEC)
|
||||
|
||||
**Chain of Custody Strategie**:
|
||||
```
|
||||
Segregated Processing:
|
||||
1. Initial Triage (Automated)
|
||||
2. Legal Review (Attorney-Client Privilege)
|
||||
3. Regulatory Notification (Compliance)
|
||||
4. Technical Analysis (Forensik-Team)
|
||||
|
||||
Access Controls:
|
||||
- Role-based Evidence Access
|
||||
- Need-to-know Principle
|
||||
- Audit Log for every Access
|
||||
```
|
||||
|
||||
## Technologie-Trends und Zukunftsausblick
|
||||
|
||||
### KI-basierte CoC-Automatisierung
|
||||
|
||||
**Machine Learning für Anomalie-Erkennung**:
|
||||
```python
|
||||
from sklearn.ensemble import IsolationForest
|
||||
import pandas as pd
|
||||
|
||||
# CoC Event Anomaly Detection
|
||||
def detect_custody_anomalies(custody_events):
|
||||
"""
|
||||
Detect unusual patterns in custody transfers
|
||||
"""
|
||||
features = pd.DataFrame(custody_events)
|
||||
|
||||
# Feature Engineering
|
||||
features['time_delta'] = features['timestamp'].diff()
|
||||
features['handler_changes'] = features['handler'].ne(features['handler'].shift())
|
||||
|
||||
# Anomaly Detection
|
||||
model = IsolationForest(contamination=0.1)
|
||||
anomalies = model.fit_predict(features.select_dtypes(include=[np.number]))
|
||||
|
||||
return features[anomalies == -1]
|
||||
```
|
||||
|
||||
### Quantum-Safe Cryptography
|
||||
|
||||
**Vorbereitung auf Post-Quantum Era**:
|
||||
```
|
||||
Current: RSA-2048, SHA-256
|
||||
Transitional: RSA-4096, SHA-3
|
||||
Future: Lattice-based, Hash-based Signatures
|
||||
```
|
||||
|
||||
### Cloud-Native Evidence Management
|
||||
|
||||
**Container-basierte Forensik-Pipelines**:
|
||||
```yaml
|
||||
# docker-compose.yml für Forensik-Lab
|
||||
version: '3.8'
|
||||
services:
|
||||
evidence-intake:
|
||||
image: forensics/evidence-intake:v2.1
|
||||
volumes:
|
||||
- ./evidence:/data
|
||||
environment:
|
||||
- AUTO_HASH=true
|
||||
- BLOCKCHAIN_LOGGING=true
|
||||
|
||||
chain-tracker:
|
||||
image: forensics/chain-tracker:v1.5
|
||||
depends_on:
|
||||
- postgres
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://user:pass@postgres:5432/custody
|
||||
```
|
||||
|
||||
## Best Practices Zusammenfassung
|
||||
|
||||
### Präventive Maßnahmen
|
||||
|
||||
**1. Standardisierte Verfahren**
|
||||
```
|
||||
□ SOPs für alle Custody-Schritte
|
||||
□ Regelmäßige Team-Schulungen
|
||||
□ Tool-Kalibrierung und -Wartung
|
||||
□ Backup-Verfahren für Ausfälle
|
||||
```
|
||||
|
||||
**2. Technische Safeguards**
|
||||
```
|
||||
□ Redundante Hash-Algorithmen
|
||||
□ Automated Integrity Monitoring
|
||||
□ Secure Transport Protocols
|
||||
□ Environmental Monitoring
|
||||
```
|
||||
|
||||
**3. Rechtliche Compliance**
|
||||
```
|
||||
□ Jurisdiction-spezifische SOPs
|
||||
□ Regular Legal Updates
|
||||
□ Attorney Consultation Process
|
||||
□ International Cooperation Agreements
|
||||
```
|
||||
|
||||
### Reaktive Maßnahmen
|
||||
|
||||
**Incident Response bei CoC-Verletzungen**:
|
||||
```
|
||||
1. Immediate Containment
|
||||
- Stop all evidence processing
|
||||
- Secure affected items
|
||||
- Document incident details
|
||||
|
||||
2. Impact Assessment
|
||||
- Determine scope of compromise
|
||||
- Identify affected cases
|
||||
- Assess legal implications
|
||||
|
||||
3. Remediation
|
||||
- Re-establish chain where possible
|
||||
- Alternative evidence strategies
|
||||
- Legal notification requirements
|
||||
|
||||
4. Prevention
|
||||
- Root cause analysis
|
||||
- Process improvements
|
||||
- Additional controls
|
||||
```
|
||||
|
||||
## Fazit
|
||||
|
||||
Die Chain of Custody ist mehr als eine administrative Pflicht - sie ist das **Fundament der digitalen Forensik**. Ohne ordnungsgemäße Beweiskette können selbst die stärksten technischen Beweise vor Gericht wertlos werden.
|
||||
|
||||
**Schlüsselprinzipien für den Erfolg**:
|
||||
|
||||
1. **Vorbereitung ist alles** - SOPs und Tools vor dem Incident
|
||||
2. **Dokumentation über alles** - Im Zweifel mehr dokumentieren
|
||||
3. **Technologie als Enabler** - Automatisierung wo möglich
|
||||
4. **Menschen im Fokus** - Training und Awareness entscheidend
|
||||
5. **Kontinuierliche Verbesserung** - Lessons Learned Integration
|
||||
|
||||
Die Investition in robuste Chain of Custody Verfahren zahlt sich langfristig aus - durch höhere Erfolgsraten vor Gericht, reduzierte Compliance-Risiken und erhöhte Glaubwürdigkeit der forensischen Arbeit.
|
||||
|
||||
> **Merksatz**: "Eine Kette ist nur so stark wie ihr schwächstes Glied - in der digitalen Forensik ist das oft die menschliche Komponente, nicht die technische."
|
||||
|
||||
## Weiterführende Ressourcen
|
||||
|
||||
**Standards und Guidelines**:
|
||||
- [ISO/IEC 27037:2012](https://www.iso.org/standard/44381.html) - Digital Evidence Guidelines
|
||||
- [NIST SP 800-86](https://csrc.nist.gov/publications/detail/sp/800-86/final) - Computer Forensics Guide
|
||||
- [RFC 3227](https://tools.ietf.org/html/rfc3227) - Evidence Collection Guidelines
|
||||
|
||||
**Training und Zertifizierung**:
|
||||
- SANS FOR500 (Windows Forensic Analysis)
|
||||
- SANS FOR508 (Advanced Incident Response)
|
||||
- IACIS Certified Forensic Computer Examiner (CFCE)
|
||||
- CISSP (Chain of Custody Domain)
|
||||
|
||||
**Tools und Software**:
|
||||
- [FTK Imager](https://www.exterro.com/digital-forensics-software/ftk-imager) - Free Imaging Tool
|
||||
- [Autopsy](https://www.sleuthkit.org/autopsy/) - Open Source Platform
|
||||
- [MSAB XRY](https://www.msab.com/) - Mobile Forensics
|
||||
- [Cellebrite UFED](https://www.cellebrite.com/) - Mobile Evidence Extraction
|
||||
@@ -1,471 +0,0 @@
|
||||
---
|
||||
title: "Dateisystem-Forensik: Von NTFS-Strukturen bis Cloud-Storage-Artefakten"
|
||||
description: "Umfassender Leitfaden zur forensischen Analyse von Dateisystemen - NTFS-Metadaten, ext4-Journaling, APFS-Snapshots und Cloud-Storage-Forensik für professionelle Datenrekonstruktion"
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
last_updated: 2025-08-10
|
||||
difficulty: intermediate
|
||||
categories: ["analysis", "configuration", "troubleshooting"]
|
||||
tags: ["filesystem-analysis", "metadata-extraction", "deleted-data-recovery", "slack-space", "journaling-analysis", "timestamp-forensics", "partition-analysis", "cloud-storage", "ntfs", "ext4", "apfs", "data-carving"]
|
||||
tool_name: "File Systems & Storage Forensics"
|
||||
related_tools: ["Autopsy", "The Sleuth Kit", "FTK Imager", "Volatility", "X-Ways Forensics"]
|
||||
published: true
|
||||
---
|
||||
|
||||
# Dateisystem-Forensik: Von NTFS-Strukturen bis Cloud-Storage-Artefakten
|
||||
|
||||
Die forensische Analyse von Dateisystemen bildet das Fundament moderner Digital Forensics. Dieser umfassende Leitfaden behandelt die kritischen Aspekte der Dateisystem-Forensik von traditionellen lokalen Speichermedien bis hin zu modernen Cloud-Storage-Umgebungen.
|
||||
|
||||
## Grundlagen der Dateisystem-Forensik
|
||||
|
||||
### Was ist Dateisystem-Forensik?
|
||||
|
||||
Dateisystem-Forensik umfasst die systematische Untersuchung von Speicherstrukturen zur Rekonstruktion digitaler Beweise. Dabei werden nicht nur sichtbare Dateien analysiert, sondern auch Metadaten, gelöschte Inhalte und versteckte Artefakte untersucht.
|
||||
|
||||
### Zentrale forensische Konzepte
|
||||
|
||||
**Metadaten-Analyse**: Jedes Dateisystem speichert umfangreiche Metadaten über Dateien, Verzeichnisse und Systemaktivitäten. Diese Informationen sind oft aussagekräftiger als der eigentliche Dateiinhalt.
|
||||
|
||||
**Slack Space**: Der ungenutzte Bereich zwischen dem Ende einer Datei und dem Ende des zugewiesenen Clusters kann Reste vorheriger Dateien enthalten.
|
||||
|
||||
**Journaling**: Moderne Dateisysteme protokollieren Änderungen in Journal-Dateien, die wertvolle Timeline-Informationen liefern.
|
||||
|
||||
**Timeline-Rekonstruktion**: Durch Kombination verschiedener Timestamp-Quellen lassen sich detaillierte Aktivitätszeitlinien erstellen.
|
||||
|
||||
## NTFS-Forensik: Das Windows-Dateisystem im Detail
|
||||
|
||||
### Master File Table (MFT) Analyse
|
||||
|
||||
Die MFT ist das Herzstück von NTFS und enthält Einträge für jede Datei und jeden Ordner auf dem Volume.
|
||||
|
||||
**Struktur eines MFT-Eintrags:**
|
||||
```
|
||||
Offset 0x00: FILE-Signatur
|
||||
Offset 0x04: Update Sequence Array Offset
|
||||
Offset 0x06: Update Sequence Array Größe
|
||||
Offset 0x08: $LogFile Sequence Number (LSN)
|
||||
Offset 0x10: Sequence Number
|
||||
Offset 0x12: Hard Link Count
|
||||
Offset 0x14: Erste Attribut-Offset
|
||||
```
|
||||
|
||||
**Forensisch relevante Attribute:**
|
||||
- `$STANDARD_INFORMATION`: Timestamps, Dateiberechtigungen
|
||||
- `$FILE_NAME`: Dateiname, zusätzliche Timestamps
|
||||
- `$DATA`: Dateiinhalt oder Cluster-Referenzen
|
||||
- `$SECURITY_DESCRIPTOR`: Zugriffsberechtigungen
|
||||
|
||||
**Praktische Analyse-Techniken:**
|
||||
|
||||
1. **Gelöschte MFT-Einträge identifizieren**: Einträge mit FILE0-Signatur sind oft gelöschte Dateien
|
||||
2. **Timeline-Anomalien erkennen**: Vergleich zwischen $STANDARD_INFORMATION und $FILE_NAME Timestamps
|
||||
3. **Resident vs. Non-Resident Data**: Kleine Dateien (< 700 Bytes) werden direkt in der MFT gespeichert
|
||||
|
||||
### $LogFile Analyse für Aktivitäts-Tracking
|
||||
|
||||
Das NTFS-Journal protokolliert alle Dateisystem-Änderungen und ermöglicht detaillierte Aktivitäts-Rekonstruktion.
|
||||
|
||||
**Relevante Log-Record-Typen:**
|
||||
- `CreateFile`: Datei-/Ordnererstellung
|
||||
- `DeleteFile`: Löschvorgänge
|
||||
- `RenameFile`: Umbenennungen
|
||||
- `SetInformationFile`: Metadaten-Änderungen
|
||||
|
||||
**Analyse-Workflow:**
|
||||
```bash
|
||||
# Mit istat (Sleuth Kit) MFT-Eintrag analysieren
|
||||
istat /dev/sda1 5 # MFT-Eintrag 5 anzeigen
|
||||
|
||||
# Mit fls gelöschte Dateien auflisten
|
||||
fls -r -d /dev/sda1
|
||||
|
||||
# Mit tsk_recover gelöschte Dateien wiederherstellen
|
||||
tsk_recover /dev/sda1 /recovery/
|
||||
```
|
||||
|
||||
### Alternate Data Streams (ADS) Detection
|
||||
|
||||
ADS können zur Datenverbergung missbraucht werden und sind oft übersehen.
|
||||
|
||||
**Erkennungsstrategien:**
|
||||
1. **MFT-Analyse auf mehrere $DATA-Attribute**: Dateien mit ADS haben multiple $DATA-Einträge
|
||||
2. **Powershell-Erkennung**: `Get-Item -Path C:\file.txt -Stream *`
|
||||
3. **Forensik-Tools**: Autopsy zeigt ADS automatisch in der File-Analyse
|
||||
|
||||
### Volume Shadow Copies für Timeline-Rekonstruktion
|
||||
|
||||
VSCs bieten Snapshots des Dateisystems zu verschiedenen Zeitpunkten.
|
||||
|
||||
**Forensische Relevanz:**
|
||||
- Wiederherstellung gelöschter/überschriebener Dateien
|
||||
- Timeline-Rekonstruktion über längere Zeiträume
|
||||
- Registry-Hive-Vergleiche zwischen Snapshots
|
||||
|
||||
**Zugriff auf VSCs:**
|
||||
```cmd
|
||||
# VSCs auflisten
|
||||
vssadmin list shadows
|
||||
|
||||
# VSC mounten
|
||||
vshadow -p C: -script=shadow.cmd
|
||||
```
|
||||
|
||||
## ext4-Forensik: Linux-Dateisystem-Analyse
|
||||
|
||||
### Ext4-Journal-Analyse
|
||||
|
||||
Das ext4-Journal (`/journal`) protokolliert Transaktionen und bietet wertvolle forensische Artefakte.
|
||||
|
||||
**Journal-Struktur:**
|
||||
- **Descriptor Blocks**: Beschreiben bevorstehende Transaktionen
|
||||
- **Data Blocks**: Enthalten die eigentlichen Datenänderungen
|
||||
- **Commit Blocks**: Markieren abgeschlossene Transaktionen
|
||||
- **Revoke Blocks**: Listen widerrufene Blöcke auf
|
||||
|
||||
**Praktische Analyse:**
|
||||
```bash
|
||||
# Journal-Informationen anzeigen
|
||||
tune2fs -l /dev/sda1 | grep -i journal
|
||||
|
||||
# Mit debugfs Journal untersuchen
|
||||
debugfs /dev/sda1
|
||||
debugfs: logdump -a journal_file
|
||||
|
||||
# Ext4-Metadaten extrahieren
|
||||
icat /dev/sda1 8 > journal.raw # Inode 8 ist typisch das Journal
|
||||
```
|
||||
|
||||
### Inode-Struktur und Deleted-File-Recovery
|
||||
|
||||
**Ext4-Inode-Aufbau:**
|
||||
```
|
||||
struct ext4_inode {
|
||||
__le16 i_mode; # Dateityp und Berechtigungen
|
||||
__le16 i_uid; # Benutzer-ID
|
||||
__le32 i_size; # Dateigröße
|
||||
__le32 i_atime; # Letzter Zugriff
|
||||
__le32 i_ctime; # Inode-Änderung
|
||||
__le32 i_mtime; # Letzte Modifikation
|
||||
__le32 i_dtime; # Löschzeitpunkt
|
||||
...
|
||||
__le32 i_block[EXT4_N_BLOCKS]; # Block-Pointer
|
||||
};
|
||||
```
|
||||
|
||||
**Recovery-Techniken:**
|
||||
1. **Inode-Scanning**: Suche nach Inodes mit gesetztem dtime aber erhaltenen Blöcken
|
||||
2. **Journal-Recovery**: Replay von Journal-Einträgen vor Löschzeitpunkt
|
||||
3. **Directory-Entry-Recovery**: Undelfs-Techniken für kürzlich gelöschte Dateien
|
||||
|
||||
### Extended Attributes (xattr) Forensik
|
||||
|
||||
Extended Attributes speichern zusätzliche Metadaten und Sicherheitskontext.
|
||||
|
||||
**Forensisch relevante xattrs:**
|
||||
- `security.selinux`: SELinux-Kontext
|
||||
- `user.*`: Benutzerdefinierte Attribute
|
||||
- `system.posix_acl_*`: ACL-Informationen
|
||||
- `security.capability`: File-Capabilities
|
||||
|
||||
```bash
|
||||
# Alle xattrs einer Datei anzeigen
|
||||
getfattr -d /path/to/file
|
||||
|
||||
# Spezifisches Attribut extrahieren
|
||||
getfattr -n user.comment /path/to/file
|
||||
```
|
||||
|
||||
## APFS und HFS+ Forensik: macOS-Dateisysteme
|
||||
|
||||
### APFS-Snapshots für Point-in-Time-Analysis
|
||||
|
||||
APFS erstellt automatisch Snapshots, die forensische Goldgruben darstellen.
|
||||
|
||||
**Snapshot-Management:**
|
||||
```bash
|
||||
# Snapshots auflisten
|
||||
tmutil listlocalsnapshots /
|
||||
|
||||
# Snapshot mounten
|
||||
diskutil apfs mount -snapshot snapshot_name
|
||||
|
||||
# Snapshot-Metadaten analysieren
|
||||
diskutil apfs list
|
||||
```
|
||||
|
||||
**Forensische Anwendung:**
|
||||
- Vergleich von Dateisystem-Zuständen über Zeit
|
||||
- Recovery von gelöschten/modifizierten Dateien
|
||||
- Malware-Persistenz-Analyse
|
||||
|
||||
### HFS+-Katalog-Datei-Forensik
|
||||
|
||||
Die Katalog-Datei ist das Äquivalent zur NTFS-MFT in HFS+.
|
||||
|
||||
**Struktur:**
|
||||
- **Header Node**: Baum-Metadaten
|
||||
- **Index Nodes**: Verweise auf Leaf Nodes
|
||||
- **Leaf Nodes**: Eigentliche Datei-/Ordner-Records
|
||||
- **Map Nodes**: Freie/belegte Nodes
|
||||
|
||||
**Forensische Techniken:**
|
||||
```bash
|
||||
# Mit hfsdump Katalog analysieren
|
||||
hfsdump -c /dev/disk1s1
|
||||
|
||||
# Gelöschte Dateien suchen
|
||||
fls -r -f hfsplus /dev/disk1s1
|
||||
```
|
||||
|
||||
## Cloud Storage Forensics
|
||||
|
||||
### OneDrive-Artefakt-Analyse
|
||||
|
||||
**Lokale Artefakte:**
|
||||
- `%USERPROFILE%\OneDrive\*`: Synchronisierte Dateien
|
||||
- Registry: `HKCU\Software\Microsoft\OneDrive`
|
||||
- Event Logs: OneDrive-spezifische Ereignisse
|
||||
|
||||
**Forensische Analyse-Punkte:**
|
||||
1. **Sync-Status**: Welche Dateien wurden synchronisiert?
|
||||
2. **Conflict-Resolution**: Wie wurden Konflikte gelöst?
|
||||
3. **Version-History**: Zugriff auf vorherige Datei-Versionen
|
||||
4. **Sharing-Activities**: Geteilte Dateien und Berechtigungen
|
||||
|
||||
```powershell
|
||||
# OneDrive-Status abfragen
|
||||
Get-ItemProperty -Path "HKCU:\Software\Microsoft\OneDrive\Accounts\*"
|
||||
|
||||
# Sync-Engine-Logs analysieren
|
||||
Get-WinEvent -LogName "Microsoft-Windows-OneDrive/Operational"
|
||||
```
|
||||
|
||||
### Google Drive Forensik
|
||||
|
||||
**Client-seitige Artefakte:**
|
||||
- `%LOCALAPPDATA%\Google\Drive\*`: Lokaler Cache
|
||||
- SQLite-Datenbanken: Sync-Metadaten
|
||||
- Temporary Files: Unvollständige Downloads
|
||||
|
||||
**Wichtige Datenbanken:**
|
||||
- `sync_config.db`: Sync-Konfiguration
|
||||
- `cloud_graph.db`: Cloud-Dateienstruktur
|
||||
- `metadata_database`: Datei-Metadaten
|
||||
|
||||
```bash
|
||||
# SQLite-Datenbank analysieren
|
||||
sqlite3 sync_config.db
|
||||
.tables
|
||||
SELECT * FROM data WHERE key LIKE '%sync%';
|
||||
```
|
||||
|
||||
### Dropbox-Forensik
|
||||
|
||||
**Forensische Artefakte:**
|
||||
- `%APPDATA%\Dropbox\*`: Konfiguration und Logs
|
||||
- `.dropbox.cache\*`: Lokaler Cache
|
||||
- Database-Dateien: Sync-Historie
|
||||
|
||||
**Wichtige Dateien:**
|
||||
- `config.dbx`: Verschlüsselte Konfiguration
|
||||
- `filecache.dbx`: Datei-Cache-Informationen
|
||||
- `deleted.dbx`: Gelöschte Dateien-Tracking
|
||||
|
||||
## File Carving und Datenrekonstruktion
|
||||
|
||||
### Header/Footer-basiertes Carving
|
||||
|
||||
**Klassische Ansätze:**
|
||||
```bash
|
||||
# Mit foremost File-Carving durchführen
|
||||
foremost -t jpg,pdf,doc -i /dev/sda1 -o /recovery/
|
||||
|
||||
# Mit scalpel erweiterte Pattern verwenden
|
||||
scalpel -b -o /recovery/ /dev/sda1
|
||||
|
||||
# Mit photorec interaktives Recovery
|
||||
photorec /dev/sda1
|
||||
```
|
||||
|
||||
**Custom Carving-Patterns:**
|
||||
```
|
||||
# scalpel.conf Beispiel
|
||||
jpg y 200000000 \xff\xd8\xff\xe0\x00\x10 \xff\xd9
|
||||
pdf y 200000000 %PDF- %%EOF\x0d
|
||||
zip y 100000000 PK\x03\x04 PK\x05\x06
|
||||
```
|
||||
|
||||
### Fragmentierte Datei-Rekonstruktion
|
||||
|
||||
**Bifragment-Gap-Carving:**
|
||||
1. Identifikation von Header-Fragmenten
|
||||
2. Berechnung wahrscheinlicher Fragment-Größen
|
||||
3. Gap-Analyse zwischen Fragmenten
|
||||
4. Reassembly mit Plausibilitätsprüfung
|
||||
|
||||
**Smart-Carving-Techniken:**
|
||||
- Semantic-aware Carving für Office-Dokumente
|
||||
- JPEG-Quantization-Table-Matching
|
||||
- Video-Keyframe-basierte Rekonstruktion
|
||||
|
||||
## Timestamp-Manipulation und -Analyse
|
||||
|
||||
### MACB-Timeline-Erstellung
|
||||
|
||||
**Timestamp-Kategorien:**
|
||||
- **M** (Modified): Letzter Schreibzugriff auf Dateiinhalt
|
||||
- **A** (Accessed): Letzter Lesezugriff (oft deaktiviert)
|
||||
- **C** (Changed): Metadaten-Änderung (Inode/MFT)
|
||||
- **B** (Born): Erstellungszeitpunkt
|
||||
|
||||
```bash
|
||||
# Mit fls Timeline erstellen
|
||||
fls -r -m C: > timeline.bodyfile
|
||||
mactime -d -b timeline.bodyfile > timeline.csv
|
||||
|
||||
# Mit log2timeline umfassende Timeline
|
||||
log2timeline.py --storage-file timeline.plaso image.dd
|
||||
psort.py -o l2tcsv -w timeline_full.csv timeline.plaso
|
||||
```
|
||||
|
||||
### Timestamp-Manipulation-Detection
|
||||
|
||||
**Erkennungsstrategien:**
|
||||
1. **Chronologie-Anomalien**: Created > Modified Timestamps
|
||||
2. **Präzisions-Analyse**: Unnatürliche Rundung auf Sekunden/Minuten
|
||||
3. **Filesystem-Vergleich**: Inkonsistenzen zwischen verschiedenen Timestamp-Quellen
|
||||
4. **Batch-Manipulation**: Verdächtige Muster bei mehreren Dateien
|
||||
|
||||
**Registry-basierte Evidenz:**
|
||||
```
|
||||
HKLM\SYSTEM\CurrentControlSet\Control\FileSystem\NtfsDisableLastAccessUpdate
|
||||
```
|
||||
|
||||
## Häufige Herausforderungen und Lösungsansätze
|
||||
|
||||
### Performance-Optimierung bei großen Images
|
||||
|
||||
**Problem**: Analyse von Multi-TB-Images dauert Tage
|
||||
**Lösungen**:
|
||||
1. **Selective Processing**: Nur relevante Partitionen analysieren
|
||||
2. **Parallel Processing**: Multi-threaded Tools verwenden
|
||||
3. **Hardware-Optimierung**: NVMe-SSDs für temporäre Dateien
|
||||
4. **Cloud-Processing**: Verteilte Analyse in der Cloud
|
||||
|
||||
### Verschlüsselte Container und Volumes
|
||||
|
||||
**BitLocker-Forensik**:
|
||||
```bash
|
||||
# Mit dislocker BitLocker-Volume mounten
|
||||
dislocker -r -V /dev/sda1 -p password -- /tmp/bitlocker
|
||||
|
||||
# Recovery-Key-basierter Zugriff
|
||||
dislocker -r -V /dev/sda1 -k recovery.key -- /tmp/bitlocker
|
||||
```
|
||||
|
||||
**VeraCrypt-Analyse**:
|
||||
- Header-Backup-Analyse für mögliche Passwort-Recovery
|
||||
- Hidden-Volume-Detection durch Entropie-Analyse
|
||||
- Keyfile-basierte Entschlüsselung
|
||||
|
||||
### Anti-Forensik-Techniken erkennen
|
||||
|
||||
**Wiping-Detection**:
|
||||
- Pattern-Analyse für DoD 5220.22-M Wiping
|
||||
- Random-Data vs. Encrypted-Data Unterscheidung
|
||||
- Unvollständige Wiping-Artefakte
|
||||
|
||||
**Timestomp-Detection**:
|
||||
```bash
|
||||
# Mit analyzeMFT.py Timestamp-Anomalien finden
|
||||
analyzeMFT.py -f $MFT -o analysis.csv
|
||||
# Analyse der $SI vs. $FN Timestamp-Diskrepanzen
|
||||
```
|
||||
|
||||
## Tool-Integration und Workflows
|
||||
|
||||
### Autopsy-Integration
|
||||
|
||||
**Workflow-Setup**:
|
||||
1. **Image-Import**: E01/DD-Images mit Hash-Verifikation
|
||||
2. **Ingest-Module**: File-Type-Detection, Hash-Lookup, Timeline-Creation
|
||||
3. **Analysis**: Keyword-Search, Timeline-Analysis, File-Category-Review
|
||||
4. **Reporting**: Automatisierte Report-Generierung
|
||||
|
||||
### TSK-Kommandozeilen-Pipeline
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Vollständiger Dateisystem-Analyse-Workflow
|
||||
|
||||
IMAGE="/cases/evidence.dd"
|
||||
OUTPUT="/analysis/case001"
|
||||
|
||||
# 1. Partitionstabelle analysieren
|
||||
mmls "$IMAGE" > "$OUTPUT/partitions.txt"
|
||||
|
||||
# 2. Dateisystem-Info extrahieren
|
||||
fsstat "$IMAGE" > "$OUTPUT/filesystem_info.txt"
|
||||
|
||||
# 3. Timeline erstellen
|
||||
fls -r -m "$IMAGE" > "$OUTPUT/timeline.bodyfile"
|
||||
mactime -d -b "$OUTPUT/timeline.bodyfile" > "$OUTPUT/timeline.csv"
|
||||
|
||||
# 4. Gelöschte Dateien auflisten
|
||||
fls -r -d "$IMAGE" > "$OUTPUT/deleted_files.txt"
|
||||
|
||||
# 5. File-Carving durchführen
|
||||
foremost -t all -i "$IMAGE" -o "$OUTPUT/carved/"
|
||||
|
||||
# 6. Hash-Analyse
|
||||
hfind -i nsrl "$OUTPUT/timeline.bodyfile" > "$OUTPUT/known_files.txt"
|
||||
```
|
||||
|
||||
## Best Practices und Methodologie
|
||||
|
||||
### Dokumentation und Chain of Custody
|
||||
|
||||
**Kritische Dokumentationspunkte**:
|
||||
1. **Acquisition-Details**: Tool, Version, Hash-Werte, Zeitstempel
|
||||
2. **Analysis-Methodik**: Verwendete Tools und Parameter
|
||||
3. **Findings-Dokumentation**: Screenshots, Befund-Zusammenfassung
|
||||
4. **Timeline-Rekonstruktion**: Chronologische Ereignis-Dokumentation
|
||||
|
||||
### Qualitätssicherung
|
||||
|
||||
**Verifikations-Checkliste**:
|
||||
- [ ] Hash-Integrität von Original-Images
|
||||
- [ ] Tool-Version-Dokumentation
|
||||
- [ ] Kreuz-Validierung mit verschiedenen Tools
|
||||
- [ ] Timeline-Plausibilitätsprüfung
|
||||
- [ ] Anti-Forensik-Artefakt-Suche
|
||||
|
||||
### Rechtliche Aspekte
|
||||
|
||||
**Admissibility-Faktoren**:
|
||||
1. **Tool-Reliability**: Verwendung etablierter, validierter Tools
|
||||
2. **Methodology-Documentation**: Nachvollziehbare Analyse-Schritte
|
||||
3. **Error-Rate-Analysis**: Bekannte Limitationen dokumentieren
|
||||
4. **Expert-Qualification**: Forensiker-Qualifikation nachweisen
|
||||
|
||||
## Weiterführende Ressourcen
|
||||
|
||||
### Spezialisierte Tools
|
||||
- **X-Ways Forensics**: Kommerzielle All-in-One-Lösung
|
||||
- **EnCase**: Enterprise-Forensik-Platform
|
||||
- **AXIOM**: Mobile und Computer-Forensik
|
||||
- **Oxygen Detective**: Mobile-Spezialist
|
||||
- **BlackBag**: macOS-Forensik-Spezialist
|
||||
|
||||
### Fortgeschrittene Techniken
|
||||
- **Memory-Forensics**: Volatility für RAM-Analyse
|
||||
- **Network-Forensics**: Wireshark für Netzwerk-Traffic
|
||||
- **Mobile-Forensics**: Cellebrite/Oxygen für Smartphone-Analyse
|
||||
- **Cloud-Forensics**: KAPE für Cloud-Artefakt-Collection
|
||||
|
||||
### Continuous Learning
|
||||
- **SANS FOR508**: Advanced Digital Forensics
|
||||
- **Volatility Training**: Memory-Forensics-Spezialisierung
|
||||
- **FIRST Conference**: Internationale Forensik-Community
|
||||
- **DFRWS**: Digital Forensics Research Workshop
|
||||
|
||||
Die moderne Dateisystem-Forensik erfordert ein tiefes Verständnis verschiedener Speichertechnologien und deren forensischer Artefakte. Durch systematische Anwendung der beschriebenen Techniken und kontinuierliche Weiterbildung können Forensiker auch komplexeste Fälle erfolgreich bearbeiten und gerichtsfeste Beweise sicherstellen.
|
||||
@@ -1,377 +0,0 @@
|
||||
---
|
||||
title: "Hash-Funktionen und digitale Signaturen: Grundlagen der digitalen Beweissicherung"
|
||||
description: "Umfassender Leitfaden zu kryptographischen Hash-Funktionen, digitalen Signaturen und deren praktischer Anwendung in der digitalen Forensik für Integritätsprüfung und Beweissicherung"
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
last_updated: 2025-08-10
|
||||
difficulty: advanced
|
||||
categories: ["analysis", "configuration", "case-study"]
|
||||
tags: ["hashing", "integrity-check", "chain-of-custody", "standards-compliant", "deduplication", "known-bad-detection", "fuzzy-hashing", "digital-signatures", "timestamping", "blockchain-evidence", "md5", "sha256", "ssdeep"]
|
||||
tool_name: "Hash Functions & Digital Signatures"
|
||||
published: true
|
||||
---
|
||||
|
||||
# Hash-Funktionen und digitale Signaturen: Grundlagen der digitalen Beweissicherung
|
||||
|
||||
Hash-Funktionen und digitale Signaturen bilden das fundamentale Rückgrat der digitalen Forensik. Sie gewährleisten die Integrität von Beweismitteln, ermöglichen die Authentifizierung von Daten und sind essentiell für die rechtssichere Dokumentation forensischer Untersuchungen.
|
||||
|
||||
## Was sind kryptographische Hash-Funktionen?
|
||||
|
||||
Eine kryptographische Hash-Funktion ist ein mathematisches Verfahren, das aus beliebig großen Eingabedaten einen festen, eindeutigen "Fingerabdruck" (Hash-Wert) erzeugt. Dieser Wert verändert sich drastisch, wenn auch nur ein einzelnes Bit der Eingabe modifiziert wird.
|
||||
|
||||
### Eigenschaften einer kryptographischen Hash-Funktion
|
||||
|
||||
**Einwegfunktion (One-Way Function)**
|
||||
- Aus dem Hash-Wert kann nicht auf die ursprünglichen Daten geschlossen werden
|
||||
- Mathematisch praktisch irreversibel
|
||||
|
||||
**Determinismus**
|
||||
- Identische Eingabe erzeugt immer identischen Hash-Wert
|
||||
- Reproduzierbare Ergebnisse für forensische Dokumentation
|
||||
|
||||
**Kollisionsresistenz**
|
||||
- Extrem schwierig, zwei verschiedene Eingaben zu finden, die denselben Hash erzeugen
|
||||
- Gewährleistet Eindeutigkeit in forensischen Anwendungen
|
||||
|
||||
**Lawineneffekt**
|
||||
- Minimale Änderung der Eingabe führt zu völlig anderem Hash-Wert
|
||||
- Erkennung von Manipulationen
|
||||
|
||||
## Wichtige Hash-Algorithmen in der Forensik
|
||||
|
||||
### MD5 (Message Digest Algorithm 5)
|
||||
```bash
|
||||
# MD5-Hash berechnen
|
||||
md5sum evidence.dd
|
||||
# Output: 5d41402abc4b2a76b9719d911017c592 evidence.dd
|
||||
```
|
||||
|
||||
**Eigenschaften:**
|
||||
- 128-Bit Hash-Wert (32 Hexadezimal-Zeichen)
|
||||
- Entwickelt 1991, kryptographisch gebrochen seit 2004
|
||||
- **Nicht mehr sicher**, aber weit verbreitet in Legacy-Systemen
|
||||
- Kollisionen sind praktisch erzeugbar
|
||||
|
||||
**Forensische Relevanz:**
|
||||
- Noch in vielen bestehenden Systemen verwendet
|
||||
- Für forensische Zwecke nur bei bereits vorhandenen MD5-Hashes
|
||||
- Niemals für neue forensische Implementierungen verwenden
|
||||
|
||||
### SHA-1 (Secure Hash Algorithm 1)
|
||||
```bash
|
||||
# SHA-1-Hash berechnen
|
||||
sha1sum evidence.dd
|
||||
# Output: aaf4c61ddcc5e8a2dabede0f3b482cd9aea9434d evidence.dd
|
||||
```
|
||||
|
||||
**Eigenschaften:**
|
||||
- 160-Bit Hash-Wert (40 Hexadezimal-Zeichen)
|
||||
- Entwickelt von NSA, standardisiert 1995
|
||||
- **Deprecated seit 2017** aufgrund praktischer Kollisionsangriffe
|
||||
- SHAttered-Angriff bewies Schwachstellen 2017
|
||||
|
||||
### SHA-2-Familie (SHA-256, SHA-512)
|
||||
```bash
|
||||
# SHA-256-Hash berechnen
|
||||
sha256sum evidence.dd
|
||||
# Output: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 evidence.dd
|
||||
|
||||
# SHA-512-Hash berechnen
|
||||
sha512sum evidence.dd
|
||||
```
|
||||
|
||||
**SHA-256 Eigenschaften:**
|
||||
- 256-Bit Hash-Wert (64 Hexadezimal-Zeichen)
|
||||
- Aktueller Standard für forensische Anwendungen
|
||||
- NIST-approved, FIPS 180-4 konform
|
||||
- Keine bekannten praktischen Angriffe
|
||||
|
||||
**SHA-512 Eigenschaften:**
|
||||
- 512-Bit Hash-Wert (128 Hexadezimal-Zeichen)
|
||||
- Höhere Sicherheit, aber größerer Hash-Wert
|
||||
- Optimal für hochsensible Ermittlungen
|
||||
|
||||
### SHA-3 (Keccak)
|
||||
- Neuester Standard (seit 2015)
|
||||
- Andere mathematische Grundlage als SHA-2
|
||||
- Zukünftiger Standard bei SHA-2-Kompromittierung
|
||||
|
||||
## Forensische Anwendungen von Hash-Funktionen
|
||||
|
||||
### 1. Datenträger-Imaging und Verifikation
|
||||
|
||||
**Vor dem Imaging:**
|
||||
```bash
|
||||
# Original-Datenträger hashen
|
||||
sha256sum /dev/sdb > original_hash.txt
|
||||
```
|
||||
|
||||
**Nach dem Imaging:**
|
||||
```bash
|
||||
# Image-Datei hashen
|
||||
sha256sum evidence.dd > image_hash.txt
|
||||
|
||||
# Vergleichen
|
||||
diff original_hash.txt image_hash.txt
|
||||
```
|
||||
|
||||
**Best Practice:**
|
||||
- Immer mehrere Hash-Algorithmen verwenden (SHA-256 + SHA-512)
|
||||
- Hash-Berechnung vor, während und nach dem Imaging
|
||||
- Dokumentation in Chain-of-Custody-Protokoll
|
||||
|
||||
### 2. Deduplizierung mit Hash-Sets
|
||||
|
||||
Hash-Sets ermöglichen die Identifikation bekannter Dateien zur Effizienzsteigerung:
|
||||
|
||||
**NSRL (National Software Reference Library)**
|
||||
```bash
|
||||
# NSRL-Hash-Set laden
|
||||
autopsy --load-hashset /path/to/nsrl/NSRLFile.txt
|
||||
|
||||
# Bekannte Dateien ausschließen
|
||||
hashdeep -s -e nsrl_hashes.txt /evidence/mount/
|
||||
```
|
||||
|
||||
**Eigene Hash-Sets erstellen:**
|
||||
```bash
|
||||
# Hash-Set von bekannten guten Dateien
|
||||
hashdeep -r /clean_system/ > clean_system_hashes.txt
|
||||
|
||||
# Vergleich mit verdächtigem System
|
||||
hashdeep -s -e clean_system_hashes.txt /suspect_system/
|
||||
```
|
||||
|
||||
### 3. Known-Bad-Erkennung
|
||||
|
||||
**Malware-Hash-Datenbanken:**
|
||||
- VirusTotal API-Integration
|
||||
- Threat Intelligence Feeds
|
||||
- Custom IoC-Listen
|
||||
|
||||
```python
|
||||
# Beispiel: Datei-Hash gegen Known-Bad-Liste prüfen
|
||||
import hashlib
|
||||
|
||||
def check_malware_hash(filepath, malware_hashes):
|
||||
with open(filepath, 'rb') as f:
|
||||
file_hash = hashlib.sha256(f.read()).hexdigest()
|
||||
|
||||
if file_hash in malware_hashes:
|
||||
return True, file_hash
|
||||
return False, file_hash
|
||||
```
|
||||
|
||||
### 4. Fuzzy Hashing mit ssdeep
|
||||
|
||||
Fuzzy Hashing erkennt ähnliche, aber nicht identische Dateien:
|
||||
|
||||
```bash
|
||||
# ssdeep-Hash berechnen
|
||||
ssdeep malware.exe
|
||||
# Output: 768:gQA1M2Ua3QqQm8+1QV7Q8+1QG8+1Q:gQ1Ma3qmP1QV7P1QGP1Q
|
||||
|
||||
# Ähnlichkeit zwischen Dateien prüfen
|
||||
ssdeep -d malware_v1.exe malware_v2.exe
|
||||
# Output: 85 (85% Ähnlichkeit)
|
||||
```
|
||||
|
||||
**Anwendungsfälle:**
|
||||
- Erkennung von Malware-Varianten
|
||||
- Identifikation modifizierter Dokumente
|
||||
- Versionsverfolgung von Dateien
|
||||
|
||||
### 5. Timeline-Analyse und Integritätsprüfung
|
||||
|
||||
```bash
|
||||
# Erweiterte Metadaten mit Hashes
|
||||
find /evidence/mount -type f -exec stat -c "%Y %n" {} \; | while read timestamp file; do
|
||||
hash=$(sha256sum "$file" | cut -d' ' -f1)
|
||||
echo "$timestamp $hash $file"
|
||||
done > timeline_with_hashes.txt
|
||||
```
|
||||
|
||||
## Digitale Signaturen in der Forensik
|
||||
|
||||
Digitale Signaturen verwenden asymmetrische Kryptographie zur Authentifizierung und Integritätssicherung.
|
||||
|
||||
### Funktionsweise digitaler Signaturen
|
||||
|
||||
1. **Erstellung:**
|
||||
- Hash des Dokuments wird mit privatem Schlüssel verschlüsselt
|
||||
- Verschlüsselter Hash = digitale Signatur
|
||||
|
||||
2. **Verifikation:**
|
||||
- Signatur wird mit öffentlichem Schlüssel entschlüsselt
|
||||
- Entschlüsselter Hash wird mit neuem Hash des Dokuments verglichen
|
||||
|
||||
### Certificate Chain Analysis
|
||||
|
||||
**X.509-Zertifikate untersuchen:**
|
||||
```bash
|
||||
# Zertifikat-Details anzeigen
|
||||
openssl x509 -in certificate.crt -text -noout
|
||||
|
||||
# Zertifikatskette verfolgen
|
||||
openssl verify -CAfile ca-bundle.crt -untrusted intermediate.crt certificate.crt
|
||||
```
|
||||
|
||||
**Forensische Relevanz:**
|
||||
- Authentizität von Software-Downloads
|
||||
- Erkennung gefälschter Zertifikate
|
||||
- APT-Gruppenattribution durch Code-Signing-Zertifikate
|
||||
|
||||
### Timestamping für Chain-of-Custody
|
||||
|
||||
**RFC 3161-Zeitstempel:**
|
||||
```bash
|
||||
# Zeitstempel für Beweisdatei erstellen
|
||||
openssl ts -query -data evidence.dd -no_nonce -sha256 -out request.tsq
|
||||
openssl ts -verify -in response.tsr -data evidence.dd -CAfile tsa-ca.crt
|
||||
```
|
||||
|
||||
**Blockchain-basierte Zeitstempel:**
|
||||
- Unveränderliche Zeitstempel in öffentlichen Blockchains
|
||||
- OriginStamp, OpenTimestamps für forensische Anwendungen
|
||||
|
||||
## Praktische Tools und Integration
|
||||
|
||||
### Autopsy Integration
|
||||
```xml
|
||||
<!-- Autopsy Hash Database Configuration -->
|
||||
<hashDb>
|
||||
<dbType>NSRL</dbType>
|
||||
<dbPath>/usr/share/autopsy/nsrl/NSRLFile.txt</dbPath>
|
||||
<searchDuringIngest>true</searchDuringIngest>
|
||||
</hashDb>
|
||||
```
|
||||
|
||||
### YARA-Integration mit Hash-Regeln
|
||||
```yara
|
||||
rule Malware_Hash_Detection {
|
||||
condition:
|
||||
hash.sha256(0, filesize) == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
|
||||
}
|
||||
```
|
||||
|
||||
### FTK Imager Hash-Verifikation
|
||||
- Automatische Hash-Berechnung während Imaging
|
||||
- MD5, SHA-1, SHA-256 parallel
|
||||
- Verify-Funktion für Image-Integrität
|
||||
|
||||
## Advanced Topics
|
||||
|
||||
### Rainbow Table Attacks
|
||||
**Funktionsweise:**
|
||||
- Vorberechnete Hash-Tabellen für Passwort-Cracking
|
||||
- Trade-off zwischen Speicher und Rechenzeit
|
||||
- Effektiv gegen unsalted Hashes
|
||||
|
||||
**Forensische Anwendung:**
|
||||
```bash
|
||||
# Hashcat mit Rainbow Tables
|
||||
hashcat -m 0 -a 0 hashes.txt wordlist.txt
|
||||
|
||||
# John the Ripper mit Rainbow Tables
|
||||
john --format=NT --wordlist=rockyou.txt ntlm_hashes.txt
|
||||
```
|
||||
|
||||
### Blockchain Evidence Management
|
||||
**Konzept:**
|
||||
- Unveränderliche Speicherung von Hash-Werten
|
||||
- Distributed Ledger für Chain-of-Custody
|
||||
- Smart Contracts für automatisierte Verifikation
|
||||
|
||||
**Implementierung:**
|
||||
```solidity
|
||||
// Ethereum Smart Contract für Evidence Hashes
|
||||
contract EvidenceRegistry {
|
||||
mapping(bytes32 => bool) public evidenceHashes;
|
||||
|
||||
function registerEvidence(bytes32 _hash) public {
|
||||
evidenceHashes[_hash] = true;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Häufige Probleme und Lösungsansätze
|
||||
|
||||
### Hash-Kollisionen
|
||||
**Problem:** Zwei verschiedene Dateien mit identischem Hash
|
||||
**Lösung:**
|
||||
- Verwendung mehrerer Hash-Algorithmen
|
||||
- Sichere Algorithmen (SHA-256+) verwenden
|
||||
- Bei Verdacht: Bitweise Vergleich der Originaldateien
|
||||
|
||||
### Performance bei großen Datenmengen
|
||||
**Problem:** Langsame Hash-Berechnung bei TB-großen Images
|
||||
**Optimierung:**
|
||||
```bash
|
||||
# Parallele Hash-Berechnung
|
||||
hashdeep -r -j 8 /large_dataset/ # 8 Threads
|
||||
|
||||
# Hardware-beschleunigte Hashing
|
||||
sha256sum --tag /dev/nvme0n1 # NVMe für bessere I/O
|
||||
```
|
||||
|
||||
### Rechtliche Anforderungen
|
||||
**Problem:** Verschiedene Standards in verschiedenen Jurisdiktionen
|
||||
**Lösung:**
|
||||
- NIST-konforme Algorithmen verwenden
|
||||
- Dokumentation aller verwendeten Verfahren
|
||||
- Regelmäßige Algorithmus-Updates
|
||||
|
||||
## Best Practices
|
||||
|
||||
### 1. Algorithmus-Auswahl
|
||||
- **Neu:** SHA-256 oder SHA-3 verwenden
|
||||
- **Legacy:** MD5/SHA-1 nur bei vorhandenen Systemen
|
||||
- **High-Security:** SHA-512 oder SHA-3-512
|
||||
|
||||
### 2. Dokumentation
|
||||
```text
|
||||
Evidence Hash Verification Report
|
||||
=================================
|
||||
Evidence ID: CASE-2024-001-HDD
|
||||
Original Hash (SHA-256): a1b2c3d4...
|
||||
Image Hash (SHA-256): a1b2c3d4...
|
||||
Verification Status: VERIFIED
|
||||
Timestamp: 2024-01-15 14:30:00 UTC
|
||||
Investigator: John Doe
|
||||
```
|
||||
|
||||
### 3. Redundanz
|
||||
- Mindestens zwei verschiedene Hash-Algorithmen
|
||||
- Mehrfache Verifikation zu verschiedenen Zeitpunkten
|
||||
- Verschiedene Tools für Cross-Validation
|
||||
|
||||
### 4. Automation
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Automatisiertes Hash-Verification-Script
|
||||
EVIDENCE_FILE="$1"
|
||||
LOG_FILE="hash_verification.log"
|
||||
|
||||
echo "Starting hash verification for $EVIDENCE_FILE" >> $LOG_FILE
|
||||
MD5_HASH=$(md5sum "$EVIDENCE_FILE" | cut -d' ' -f1)
|
||||
SHA256_HASH=$(sha256sum "$EVIDENCE_FILE" | cut -d' ' -f1)
|
||||
SHA512_HASH=$(sha512sum "$EVIDENCE_FILE" | cut -d' ' -f1)
|
||||
|
||||
echo "MD5: $MD5_HASH" >> $LOG_FILE
|
||||
echo "SHA-256: $SHA256_HASH" >> $LOG_FILE
|
||||
echo "SHA-512: $SHA512_HASH" >> $LOG_FILE
|
||||
echo "Verification completed at $(date)" >> $LOG_FILE
|
||||
```
|
||||
|
||||
## Zukunftsperspektiven
|
||||
|
||||
### Quantum-Resistant Hashing
|
||||
- Vorbereitung auf Quantum Computing
|
||||
- NIST Post-Quantum Cryptography Standards
|
||||
- Migration bestehender Systeme
|
||||
|
||||
### AI/ML-Integration
|
||||
- Anomalie-Erkennung in Hash-Mustern
|
||||
- Automated Similarity Analysis
|
||||
- Intelligent Deduplizierung
|
||||
|
||||
Hash-Funktionen und digitale Signaturen sind und bleiben das Fundament der digitalen Forensik. Das Verständnis ihrer mathematischen Grundlagen, praktischen Anwendungen und rechtlichen Implikationen unterscheidet professionelle Forensiker von Amateuren. Mit der kontinuierlichen Weiterentwicklung der Technologie müssen auch forensische Praktiken angepasst werden, um die Integrität und Authentizität digitaler Beweise zu gewährleisten.
|
||||
@@ -1,666 +0,0 @@
|
||||
---
|
||||
title: "Memory Forensics und Process Analysis: Advanced Malware Detection in Volatile Memory"
|
||||
description: "Umfassender Leitfaden zur forensischen Analyse von Arbeitsspeicher-Strukturen, Process-Injection-Techniken und Advanced-Malware-Detection. Von Kernel-Analysis bis Cross-Platform-Memory-Forensik."
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
last_updated: 2025-08-10
|
||||
difficulty: advanced
|
||||
categories: ["analysis", "advanced-techniques", "malware-investigation"]
|
||||
tags: ["memory-structures", "process-injection", "rootkit-detection", "kernel-analysis", "address-space", "live-analysis", "malware-hiding", "system-internals", "volatility", "dll-hollowing", "process-ghosting"]
|
||||
related_tools: ["Volatility 3", "Rekall", "WinDbg", "GDB"]
|
||||
published: true
|
||||
---
|
||||
|
||||
# Memory Forensics und Process Analysis: Advanced Malware Detection in Volatile Memory
|
||||
|
||||
Memory Forensics stellt eine der komplexesten und gleichzeitig aufschlussreichsten Disziplinen der digitalen Forensik dar. Während traditionelle Festplatten-Forensik auf persistente Daten zugreift, ermöglicht die Analyse des Arbeitsspeichers Einblicke in aktive Prozesse, verschleierte Malware und Angriffstechniken, die keine Spuren auf der Festplatte hinterlassen.
|
||||
|
||||
## Einführung in Memory Forensics
|
||||
|
||||
### Was ist Memory Forensics?
|
||||
|
||||
Memory Forensics ist die Wissenschaft der Analyse von Computer-Arbeitsspeicher (RAM) zur Aufdeckung digitaler Artefakte. Im Gegensatz zur traditionellen Festplatten-Forensik konzentriert sich Memory Forensics auf volatile Daten, die nur temporär im Speicher existieren.
|
||||
|
||||
**Zentrale Vorteile:**
|
||||
- Erkennung von Malware, die nur im Speicher residiert
|
||||
- Aufdeckung von Process-Injection und Code-Hiding-Techniken
|
||||
- Analyse von verschlüsselten oder obfuscierten Prozessen
|
||||
- Rekonstruktion von Netzwerkverbindungen und Benutzeraktivitäten
|
||||
- Untersuchung von Kernel-Level-Rootkits
|
||||
|
||||
### Virtual Memory Layout verstehen
|
||||
|
||||
Das Virtual Memory System moderner Betriebssysteme bildet die Grundlage für Memory Forensics. Jeder Prozess erhält einen eigenen virtuellen Adressraum, der in verschiedene Segmente unterteilt ist:
|
||||
|
||||
**Windows Virtual Memory Layout:**
|
||||
```
|
||||
0x00000000 - 0x7FFFFFFF: User Space (2GB)
|
||||
0x80000000 - 0xFFFFFFFF: Kernel Space (2GB)
|
||||
|
||||
User Space Segmente:
|
||||
- 0x00000000 - 0x0000FFFF: NULL Pointer Region
|
||||
- 0x00010000 - 0x7FFEFFFF: User Code und Data
|
||||
- 0x7FFF0000 - 0x7FFFFFFF: System DLLs (ntdll.dll)
|
||||
```
|
||||
|
||||
**Linux Virtual Memory Layout:**
|
||||
```
|
||||
0x00000000 - 0xBFFFFFFF: User Space (3GB)
|
||||
0xC0000000 - 0xFFFFFFFF: Kernel Space (1GB)
|
||||
|
||||
User Space Segmente:
|
||||
- Text Segment: Executable Code
|
||||
- Data Segment: Initialized Variables
|
||||
- BSS Segment: Uninitialized Variables
|
||||
- Heap: Dynamic Memory Allocation
|
||||
- Stack: Function Calls und Local Variables
|
||||
```
|
||||
|
||||
## Process Internals und Strukturen
|
||||
|
||||
### Process Control Blocks (PCB)
|
||||
|
||||
Jeder Prozess wird durch eine zentrale Datenstruktur repräsentiert, die alle relevanten Informationen enthält:
|
||||
|
||||
**Windows EPROCESS Structure:**
|
||||
```c
|
||||
typedef struct _EPROCESS {
|
||||
KPROCESS Pcb; // Process Control Block
|
||||
EX_PUSH_LOCK ProcessLock; // Process Lock
|
||||
LARGE_INTEGER CreateTime; // Creation Timestamp
|
||||
LARGE_INTEGER ExitTime; // Exit Timestamp
|
||||
EX_RUNDOWN_REF RundownProtect; // Rundown Protection
|
||||
HANDLE UniqueProcessId; // Process ID (PID)
|
||||
LIST_ENTRY ActiveProcessLinks; // Double Linked List
|
||||
RTL_AVL_TREE VadRoot; // Virtual Address Descriptors
|
||||
// ... weitere Felder
|
||||
} EPROCESS, *PEPROCESS;
|
||||
```
|
||||
|
||||
**Wichtige Felder für Forensik:**
|
||||
- `ImageFileName`: Name der ausführbaren Datei
|
||||
- `Peb`: Process Environment Block Pointer
|
||||
- `VadRoot`: Virtual Address Descriptor Tree
|
||||
- `Token`: Security Token des Prozesses
|
||||
- `HandleTable`: Tabelle geöffneter Handles
|
||||
|
||||
### Thread Control Blocks (TCB)
|
||||
|
||||
Threads sind die ausführbaren Einheiten innerhalb eines Prozesses:
|
||||
|
||||
**Windows ETHREAD Structure:**
|
||||
```c
|
||||
typedef struct _ETHREAD {
|
||||
KTHREAD Tcb; // Thread Control Block
|
||||
LARGE_INTEGER CreateTime; // Thread Creation Time
|
||||
LIST_ENTRY ThreadListEntry; // Process Thread List
|
||||
EX_RUNDOWN_REF RundownProtect; // Rundown Protection
|
||||
PEPROCESS ThreadsProcess; // Parent Process Pointer
|
||||
PVOID StartAddress; // Thread Start Address
|
||||
// ... weitere Felder
|
||||
} ETHREAD, *PETHREAD;
|
||||
```
|
||||
|
||||
## Advanced Malware Detection Techniken
|
||||
|
||||
### Process Injection Erkennung
|
||||
|
||||
Process Injection ist eine häufig verwendete Technik zur Umgehung von Security-Lösungen. Verschiedene Injection-Methoden erfordern spezifische Erkennungsansätze:
|
||||
|
||||
#### DLL Injection Detection
|
||||
|
||||
**Erkennungsmerkmale:**
|
||||
```bash
|
||||
# Volatility 3 Command
|
||||
python vol.py -f memory.dmp windows.dlllist.DllList --pid 1234
|
||||
|
||||
# Verdächtige Indikatoren:
|
||||
# - Ungewöhnliche DLL-Pfade
|
||||
# - DLLs ohne digitale Signatur
|
||||
# - Temporäre oder versteckte Pfade
|
||||
# - Diskrepanzen zwischen Image und Memory
|
||||
```
|
||||
|
||||
**Manuelle Verifikation:**
|
||||
```python
|
||||
# Pseudocode für DLL-Validierung
|
||||
def validate_dll_integrity(dll_base, dll_path):
|
||||
memory_hash = calculate_memory_hash(dll_base)
|
||||
disk_hash = calculate_file_hash(dll_path)
|
||||
|
||||
if memory_hash != disk_hash:
|
||||
return "POTENTIAL_INJECTION_DETECTED"
|
||||
return "CLEAN"
|
||||
```
|
||||
|
||||
#### Process Hollowing Detection
|
||||
|
||||
Process Hollowing ersetzt den ursprünglichen Code eines legitimen Prozesses:
|
||||
|
||||
**Erkennungsmerkmale:**
|
||||
- Diskrepanz zwischen ImageFileName und tatsächlichem Code
|
||||
- Ungewöhnliche Memory Protection Flags
|
||||
- Fehlende oder modifizierte PE Header
|
||||
- Unerwartete Entry Points
|
||||
|
||||
**Volatility Detection:**
|
||||
```bash
|
||||
# Process Hollowing Indicators
|
||||
python vol.py -f memory.dmp windows.malfind.Malfind
|
||||
python vol.py -f memory.dmp windows.vadinfo.VadInfo --pid 1234
|
||||
```
|
||||
|
||||
#### Process Ghosting Detection
|
||||
|
||||
Eine der neuesten Evasion-Techniken, die Prozesse ohne korrespondierende Dateien auf der Festplatte erstellt:
|
||||
|
||||
**Erkennungsmerkmale:**
|
||||
```bash
|
||||
# File Object Analysis
|
||||
python vol.py -f memory.dmp windows.handles.Handles --pid 1234
|
||||
|
||||
# Suche nach:
|
||||
# - Deleted File Objects
|
||||
# - Processes ohne korrespondierende Image Files
|
||||
# - Ungewöhnliche Creation Patterns
|
||||
```
|
||||
|
||||
### DLL Hollowing und Memory Manipulation
|
||||
|
||||
DLL Hollowing überschreibt legitimierte DLL-Sektionen mit malicious Code:
|
||||
|
||||
**Detection Workflow:**
|
||||
1. **Section Analysis:**
|
||||
```bash
|
||||
python vol.py -f memory.dmp windows.vadinfo.VadInfo --pid 1234
|
||||
```
|
||||
|
||||
2. **Memory Permission Analysis:**
|
||||
```bash
|
||||
# Suche nach ungewöhnlichen Permissions
|
||||
# RWX (Read-Write-Execute) Bereiche sind verdächtig
|
||||
```
|
||||
|
||||
3. **Entropy Analysis:**
|
||||
```python
|
||||
def calculate_section_entropy(memory_region):
|
||||
entropy = 0
|
||||
for byte_value in range(256):
|
||||
probability = memory_region.count(byte_value) / len(memory_region)
|
||||
if probability > 0:
|
||||
entropy += probability * math.log2(probability)
|
||||
return -entropy
|
||||
```
|
||||
|
||||
## Kernel-Level Analysis
|
||||
|
||||
### System Call Hooking Detection
|
||||
|
||||
Rootkits manipulieren häufig System Call Tables (SSDT):
|
||||
|
||||
**Windows SSDT Analysis:**
|
||||
```bash
|
||||
# System Service Descriptor Table
|
||||
python vol.py -f memory.dmp windows.ssdt.SSDT
|
||||
|
||||
# Verdächtige Indikatoren:
|
||||
# - Hooks außerhalb bekannter Module
|
||||
# - Ungewöhnliche Sprungadressen
|
||||
# - Modifizierte System Call Nummern
|
||||
```
|
||||
|
||||
**Linux System Call Table:**
|
||||
```bash
|
||||
# System Call Table Analysis für Linux
|
||||
python vol.py -f linux.dmp linux.check_syscall.Check_syscall
|
||||
```
|
||||
|
||||
### Driver Analysis
|
||||
|
||||
Kernel-Mode-Rootkits nutzen Device Driver für persistente Angriffe:
|
||||
|
||||
**Windows Driver Enumeration:**
|
||||
```bash
|
||||
# Loaded Modules Analysis
|
||||
python vol.py -f memory.dmp windows.modules.Modules
|
||||
|
||||
# Driver IRP Analysis
|
||||
python vol.py -f memory.dmp windows.driverscan.DriverScan
|
||||
```
|
||||
|
||||
**Verdächtige Driver-Eigenschaften:**
|
||||
- Fehlende Code-Signierung
|
||||
- Ungewöhnliche Load-Adressen
|
||||
- Versteckte oder gelöschte Driver-Files
|
||||
- Modifizierte IRP (I/O Request Packet) Handler
|
||||
|
||||
### Rootkit Detection Methoden
|
||||
|
||||
#### Direct Kernel Object Manipulation (DKOM)
|
||||
|
||||
DKOM-Rootkits manipulieren Kernel-Datenstrukturen direkt:
|
||||
|
||||
**Process Hiding Detection:**
|
||||
```bash
|
||||
# Process Scan vs. Process List Comparison
|
||||
python vol.py -f memory.dmp windows.psscan.PsScan > psscan.txt
|
||||
python vol.py -f memory.dmp windows.pslist.PsList > pslist.txt
|
||||
|
||||
# Vergleich zeigt versteckte Prozesse
|
||||
diff psscan.txt pslist.txt
|
||||
```
|
||||
|
||||
#### EPROCESS Link Manipulation
|
||||
|
||||
```python
|
||||
# Pseudocode für EPROCESS Validation
|
||||
def validate_process_links(eprocess_list):
|
||||
for process in eprocess_list:
|
||||
flink = process.ActiveProcessLinks.Flink
|
||||
blink = process.ActiveProcessLinks.Blink
|
||||
|
||||
# Validate bidirectional links
|
||||
if flink.Blink != process or blink.Flink != process:
|
||||
return "LINK_MANIPULATION_DETECTED"
|
||||
```
|
||||
|
||||
## Memory Dump Acquisition Strategien
|
||||
|
||||
### Live Memory Acquisition
|
||||
|
||||
**Windows Memory Acquisition:**
|
||||
```bash
|
||||
# DumpIt (Comae)
|
||||
DumpIt.exe /output C:\memory.dmp
|
||||
|
||||
# WinPmem
|
||||
winpmem-2.1.post4.exe C:\memory.raw
|
||||
|
||||
# Magnet RAM Capture
|
||||
MRCv1.20.exe /go /output C:\memory.dmp
|
||||
```
|
||||
|
||||
**Linux Memory Acquisition:**
|
||||
```bash
|
||||
# LiME (Linux Memory Extractor)
|
||||
insmod lime.ko "path=/tmp/memory.lime format=lime"
|
||||
|
||||
# AVML (Azure Virtual Machine Memory Extractor)
|
||||
./avml memory.dmp
|
||||
|
||||
# dd (für /dev/mem falls verfügbar)
|
||||
dd if=/dev/mem of=memory.dd bs=1M
|
||||
```
|
||||
|
||||
### Memory Acquisition Challenges
|
||||
|
||||
**Volatility Considerations:**
|
||||
- Memory-Inhalte ändern sich kontinuierlich
|
||||
- Acquisition-Tools können Memory-Layout beeinflussen
|
||||
- Anti-Forensic-Techniken können Acquisition verhindern
|
||||
- Verschlüsselte Memory-Bereiche
|
||||
|
||||
**Best Practices:**
|
||||
- Multiple Acquisition-Methoden verwenden
|
||||
- Acquisition-Logs dokumentieren
|
||||
- Hash-Werte für Integrität generieren
|
||||
- Timestamp-Synchronisation
|
||||
|
||||
## Address Space Reconstruction
|
||||
|
||||
### Virtual Address Translation
|
||||
|
||||
Das Verständnis der Address Translation ist essentiell für Memory Forensics:
|
||||
|
||||
**Windows Page Table Walkthrough:**
|
||||
```
|
||||
Virtual Address (32-bit):
|
||||
┌─────────────┬─────────────┬──────────────┐
|
||||
│ PDE (10bit) │ PTE (10bit) │ Offset(12bit)│
|
||||
└─────────────┴─────────────┴──────────────┘
|
||||
|
||||
1. Page Directory Entry → Page Table Base
|
||||
2. Page Table Entry → Physical Page Frame
|
||||
3. Offset → Byte within Physical Page
|
||||
```
|
||||
|
||||
**Linux Page Table Structure:**
|
||||
```
|
||||
Virtual Address (64-bit):
|
||||
┌───┬───┬───┬───┬──────────┐
|
||||
│PGD│PUD│PMD│PTE│ Offset │
|
||||
└───┴───┴───┴───┴──────────┘
|
||||
|
||||
4-Level Page Table (x86_64):
|
||||
- PGD: Page Global Directory
|
||||
- PUD: Page Upper Directory
|
||||
- PMD: Page Middle Directory
|
||||
- PTE: Page Table Entry
|
||||
```
|
||||
|
||||
### Memory Mapping Analysis
|
||||
|
||||
**Windows VAD (Virtual Address Descriptor) Trees:**
|
||||
```bash
|
||||
# VAD Tree Analysis
|
||||
python vol.py -f memory.dmp windows.vadinfo.VadInfo --pid 1234
|
||||
|
||||
# Memory Mapping Details
|
||||
python vol.py -f memory.dmp windows.memmap.Memmap --pid 1234
|
||||
```
|
||||
|
||||
**Linux Memory Maps:**
|
||||
```bash
|
||||
# Process Memory Maps
|
||||
python vol.py -f linux.dmp linux.proc_maps.Maps --pid 1234
|
||||
```
|
||||
|
||||
## Cross-Platform Memory Forensics
|
||||
|
||||
### Windows-Specific Artefakte
|
||||
|
||||
**Registry in Memory:**
|
||||
```bash
|
||||
# Registry Hives
|
||||
python vol.py -f memory.dmp windows.registry.hivelist.HiveList
|
||||
|
||||
# Registry Keys
|
||||
python vol.py -f memory.dmp windows.registry.printkey.PrintKey --key "Software\Microsoft\Windows\CurrentVersion\Run"
|
||||
```
|
||||
|
||||
**Windows Event Logs:**
|
||||
```bash
|
||||
# Event Log Analysis
|
||||
python vol.py -f memory.dmp windows.evtlogs.EvtLogs
|
||||
```
|
||||
|
||||
### Linux-Specific Artefakte
|
||||
|
||||
**Process Environment:**
|
||||
```bash
|
||||
# Environment Variables
|
||||
python vol.py -f linux.dmp linux.envars.Envars
|
||||
|
||||
# Process Arguments
|
||||
python vol.py -f linux.dmp linux.psaux.PsAux
|
||||
```
|
||||
|
||||
**Network Connections:**
|
||||
```bash
|
||||
# Network Sockets
|
||||
python vol.py -f linux.dmp linux.netstat.Netstat
|
||||
```
|
||||
|
||||
### macOS Memory Forensics
|
||||
|
||||
**Darwin Kernel Structures:**
|
||||
```bash
|
||||
# Process List (macOS)
|
||||
python vol.py -f macos.dmp mac.pslist.PsList
|
||||
|
||||
# Network Connections
|
||||
python vol.py -f macos.dmp mac.netstat.Netstat
|
||||
```
|
||||
|
||||
## Live Analysis vs. Dead Analysis
|
||||
|
||||
### Live Memory Analysis
|
||||
|
||||
**Vorteile:**
|
||||
- Vollständige System-Sicht
|
||||
- Kontinuierliche Überwachung möglich
|
||||
- Interaktive Analysis-Möglichkeiten
|
||||
- Integration mit Incident Response
|
||||
|
||||
**Tools für Live Analysis:**
|
||||
- Rekall (Live Mode)
|
||||
- WinDbg (Live Debugging)
|
||||
- GDB (Linux Live Debugging)
|
||||
- Volatility mit Live Memory Plugins
|
||||
|
||||
**Live Analysis Workflow:**
|
||||
```bash
|
||||
# Rekall Live Analysis
|
||||
rekall --live Memory
|
||||
|
||||
# Memory-basierte Malware Detection
|
||||
rekall> pslist
|
||||
rekall> malfind
|
||||
rekall> hollowfind
|
||||
```
|
||||
|
||||
### Dead Memory Analysis
|
||||
|
||||
**Vorteile:**
|
||||
- Stabile Analysis-Umgebung
|
||||
- Reproduzierbare Ergebnisse
|
||||
- Tiefere forensische Untersuchung
|
||||
- Legal-konforme Beweisführung
|
||||
|
||||
**Typical Workflow:**
|
||||
```bash
|
||||
# 1. Memory Dump Analysis
|
||||
python vol.py -f memory.dmp windows.info.Info
|
||||
|
||||
# 2. Process Analysis
|
||||
python vol.py -f memory.dmp windows.pslist.PsList
|
||||
python vol.py -f memory.dmp windows.pstree.PsTree
|
||||
|
||||
# 3. Malware Detection
|
||||
python vol.py -f memory.dmp windows.malfind.Malfind
|
||||
|
||||
# 4. Network Analysis
|
||||
python vol.py -f memory.dmp windows.netstat.NetStat
|
||||
|
||||
# 5. Registry Analysis
|
||||
python vol.py -f memory.dmp windows.registry.hivelist.HiveList
|
||||
```
|
||||
|
||||
## Encrypted Memory Handling
|
||||
|
||||
### Windows BitLocker Memory
|
||||
|
||||
BitLocker-verschlüsselte Systeme stellen besondere Herausforderungen dar:
|
||||
|
||||
**Memory Encryption Bypass:**
|
||||
- Cold Boot Attacks auf Encryption Keys
|
||||
- DMA (Direct Memory Access) Attacks
|
||||
- Hibernation File Analysis
|
||||
|
||||
### Full Memory Encryption (TME)
|
||||
|
||||
Intel Total Memory Encryption (TME) verschlüsselt den gesamten Arbeitsspeicher:
|
||||
|
||||
**Forensic Implications:**
|
||||
- Hardware-basierte Key-Extraktion erforderlich
|
||||
- Firmware-Level-Access notwendig
|
||||
- Acquisition vor Memory-Locking
|
||||
|
||||
## Advanced Analysis Techniken
|
||||
|
||||
### Machine Learning in Memory Forensics
|
||||
|
||||
**Anomaly Detection:**
|
||||
```python
|
||||
# Pseudocode für ML-basierte Process Analysis
|
||||
def detect_process_anomalies(memory_dump):
|
||||
features = extract_process_features(memory_dump)
|
||||
# Features: Memory Permissions, API Calls, Network Connections
|
||||
|
||||
model = load_trained_model('process_anomaly_detection.pkl')
|
||||
anomalies = model.predict(features)
|
||||
|
||||
return anomalies
|
||||
```
|
||||
|
||||
### Timeline Reconstruction
|
||||
|
||||
**Memory-basierte Timeline:**
|
||||
```bash
|
||||
# Process Creation Timeline
|
||||
python vol.py -f memory.dmp windows.pslist.PsList --output-format=timeline
|
||||
|
||||
# File Object Timeline
|
||||
python vol.py -f memory.dmp windows.handles.Handles --object-type=File
|
||||
```
|
||||
|
||||
### Memory Forensics Automation
|
||||
|
||||
**Automated Analysis Framework:**
|
||||
```python
|
||||
#!/usr/bin/env python3
|
||||
class MemoryForensicsAutomation:
|
||||
def __init__(self, memory_dump):
|
||||
self.dump = memory_dump
|
||||
self.results = {}
|
||||
|
||||
def run_baseline_analysis(self):
|
||||
# Basic System Information
|
||||
self.results['info'] = self.run_volatility_plugin('windows.info.Info')
|
||||
|
||||
# Process Analysis
|
||||
self.results['processes'] = self.run_volatility_plugin('windows.pslist.PsList')
|
||||
|
||||
# Malware Detection
|
||||
self.results['malware'] = self.run_volatility_plugin('windows.malfind.Malfind')
|
||||
|
||||
# Network Analysis
|
||||
self.results['network'] = self.run_volatility_plugin('windows.netstat.NetStat')
|
||||
|
||||
return self.results
|
||||
|
||||
def detect_anomalies(self):
|
||||
# Implementation für automatisierte Anomaly Detection
|
||||
pass
|
||||
```
|
||||
|
||||
## Häufige Herausforderungen und Lösungsansätze
|
||||
|
||||
### Anti-Forensic Techniken
|
||||
|
||||
**Memory Wiping:**
|
||||
- Erkennung durch Memory Allocation Patterns
|
||||
- Analyse von Memory Page Timestamps
|
||||
- Reconstruction durch Memory Slack
|
||||
|
||||
**Process Masquerading:**
|
||||
- PE Header Validation
|
||||
- Import Address Table (IAT) Analysis
|
||||
- Code Signing Verification
|
||||
|
||||
**Timing Attacks:**
|
||||
- Memory Acquisition Race Conditions
|
||||
- Process Termination während Acquisition
|
||||
- Kontinuierliche Monitoring-Strategien
|
||||
|
||||
### Performance Optimierung
|
||||
|
||||
**Large Memory Dumps:**
|
||||
```bash
|
||||
# Parallel Processing
|
||||
python vol.py -f memory.dmp --parallel=4 windows.pslist.PsList
|
||||
|
||||
# Targeted Analysis
|
||||
python vol.py -f memory.dmp windows.pslist.PsList --pid 1234,5678
|
||||
```
|
||||
|
||||
**Memory Usage Optimization:**
|
||||
- Streaming Analysis für große Dumps
|
||||
- Indexed Memory Access
|
||||
- Selective Plugin Execution
|
||||
|
||||
## Tools und Framework Integration
|
||||
|
||||
### Volatility 3 Framework
|
||||
|
||||
**Plugin Development:**
|
||||
```python
|
||||
class CustomMalwareDetector(interfaces.plugins.PluginInterface):
|
||||
"""Custom Plugin für Advanced Malware Detection"""
|
||||
|
||||
@classmethod
|
||||
def get_requirements(cls):
|
||||
return [requirements.TranslationLayerRequirement(name='primary'),
|
||||
requirements.SymbolTableRequirement(name="nt_symbols")]
|
||||
|
||||
def run(self):
|
||||
# Implementation der Detection-Logik
|
||||
pass
|
||||
```
|
||||
|
||||
### Integration mit SIEM-Systemen
|
||||
|
||||
**ElasticSearch Integration:**
|
||||
```python
|
||||
def export_to_elasticsearch(memory_analysis_results):
|
||||
es = Elasticsearch(['localhost:9200'])
|
||||
|
||||
for artifact in memory_analysis_results:
|
||||
doc = {
|
||||
'timestamp': artifact.timestamp,
|
||||
'process_name': artifact.process_name,
|
||||
'suspicious_score': artifact.score,
|
||||
'detection_method': artifact.method
|
||||
}
|
||||
es.index(index='memory-forensics', body=doc)
|
||||
```
|
||||
|
||||
## Best Practices und Empfehlungen
|
||||
|
||||
### Forensic Methodology
|
||||
|
||||
1. **Preservation First**: Memory Dump Acquisition vor anderen Aktionen
|
||||
2. **Documentation**: Vollständige Dokumentation aller Analysis-Schritte
|
||||
3. **Validation**: Cross-Referencing verschiedener Evidence Sources
|
||||
4. **Chain of Custody**: Lückenlose Beweiskette
|
||||
5. **Reproducibility**: Wiederholbare Analysis-Prozesse
|
||||
|
||||
### Quality Assurance
|
||||
|
||||
**Hash Verification:**
|
||||
```bash
|
||||
# MD5/SHA256 Hashes für Memory Dumps
|
||||
md5sum memory.dmp > memory.dmp.md5
|
||||
sha256sum memory.dmp > memory.dmp.sha256
|
||||
```
|
||||
|
||||
**Analysis Documentation:**
|
||||
```markdown
|
||||
# Memory Forensics Analysis Report
|
||||
|
||||
## System Information
|
||||
- OS Version: Windows 10 Pro 1909
|
||||
- Architecture: x64
|
||||
- Memory Size: 16GB
|
||||
- Acquisition Time: 2024-01-15 14:30:00 UTC
|
||||
|
||||
## Tools Used
|
||||
- Volatility 3.2.0
|
||||
- Rekall 1.7.2
|
||||
- Custom Scripts: malware_detector.py
|
||||
|
||||
## Key Findings
|
||||
1. Process Injection detected in explorer.exe (PID 1234)
|
||||
2. Unknown driver loaded: malicious.sys
|
||||
3. Network connections to suspicious IPs
|
||||
```
|
||||
|
||||
## Fazit
|
||||
|
||||
Memory Forensics stellt ein mächtiges Werkzeug für die Aufdeckung komplexer Angriffe dar, die traditionelle Festplatten-Forensik umgehen. Die kontinuierliche Weiterentwicklung von Angriffstechniken erfordert eine entsprechende Evolution der forensischen Methoden.
|
||||
|
||||
**Zukünftige Entwicklungen:**
|
||||
- Hardware-basierte Memory Protection Bypass
|
||||
- Machine Learning für Automated Threat Detection
|
||||
- Cloud Memory Forensics
|
||||
- Containerized Environment Analysis
|
||||
- Real-time Memory Threat Hunting
|
||||
|
||||
Die Beherrschung von Memory Forensics erfordert ein tiefes Verständnis von Betriebssystem-Internals, Malware-Techniken und forensischen Methoden. Kontinuierliche Weiterbildung und praktische Erfahrung sind essentiell für erfolgreiche Memory-basierte Investigations.
|
||||
|
||||
## Weiterführende Ressourcen
|
||||
|
||||
- **Volatility Labs Blog**: Aktuelle Research zu Memory Forensics
|
||||
- **SANS FOR508**: Advanced Incident Response und Digital Forensics
|
||||
- **Black Hat/DEF CON**: Security Conference Presentations
|
||||
- **Academic Papers**: IEEE Security & Privacy, USENIX Security
|
||||
- **Open Source Tools**: GitHub Repositories für Custom Plugins
|
||||
@@ -1,517 +0,0 @@
|
||||
---
|
||||
title: "Netzwerkprotokoll-Analyse für forensische Untersuchungen"
|
||||
description: "Umfassender Leitfaden zur forensischen Analyse von Netzwerkprotokollen Layer 2-7, Session-Rekonstruktion aus PCAP-Dateien, C2-Kommunikations-Pattern-Erkennung und APT-Hunting-Techniken für Incident Response."
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
last_updated: 2025-08-10
|
||||
difficulty: intermediate
|
||||
categories: ["analysis", "troubleshooting", "case-study"]
|
||||
tags: ["protocol-analysis", "packet-inspection", "session-reconstruction", "c2-analysis", "traffic-patterns", "network-baseline", "payload-extraction", "anomaly-detection", "incident-response", "apt-hunting"]
|
||||
tool_name: "Network Protocols & Packet Analysis"
|
||||
related_tools: ["Wireshark", "NetworkMiner", "tcpdump"]
|
||||
published: true
|
||||
---
|
||||
|
||||
# Netzwerkprotokoll-Analyse für forensische Untersuchungen
|
||||
|
||||
Die forensische Analyse von Netzwerkprotokollen ist ein fundamentaler Baustein moderner Incident Response und APT-Hunting-Aktivitäten. Dieser Leitfaden vermittelt systematische Methoden zur Untersuchung von Netzwerkverkehr von Layer 2 bis Layer 7 des OSI-Modells.
|
||||
|
||||
## Warum Netzwerkprotokoll-Forensik?
|
||||
|
||||
In komplexen Cyberangriffen hinterlassen Angreifer Spuren in der Netzwerkkommunikation, die oft die einzigen verfügbaren Beweise darstellen. Command & Control (C2) Kommunikation, Datenexfiltration und laterale Bewegungen manifestieren sich als charakteristische Netzwerkmuster, die durch systematische Protokoll-Analyse erkennbar werden.
|
||||
|
||||
## Voraussetzungen
|
||||
|
||||
### Technische Kenntnisse
|
||||
- Grundverständnis des OSI-7-Schichten-Modells
|
||||
- TCP/IP-Stack-Funktionsweise
|
||||
- HTTP/HTTPS-Request/Response-Struktur
|
||||
- DNS-Query-Mechanismen
|
||||
- Grundlagen der Kryptographie (TLS/SSL)
|
||||
|
||||
### Systemanforderungen
|
||||
- Wireshark 4.0+ oder vergleichbare Packet-Analyzer
|
||||
- Leistungsfähiges System für große PCAP-Analysen (16GB+ RAM)
|
||||
- NetworkMiner oder ähnliche Session-Rekonstruktions-Tools
|
||||
- Python 3.8+ für Automatisierungsskripte
|
||||
|
||||
### Rechtliche Überlegungen
|
||||
- Erforderliche Genehmigungen für Netzwerk-Monitoring
|
||||
- Datenschutzbestimmungen bei der Payload-Analyse
|
||||
- Chain-of-Custody-Anforderungen für Netzwerk-Evidence
|
||||
|
||||
## Fundamentale Protokoll-Analyse-Methodik
|
||||
|
||||
### Layer 2 - Data Link Layer Forensik
|
||||
|
||||
**Ethernet-Frame-Analyse für Asset-Discovery:**
|
||||
|
||||
```bash
|
||||
# MAC-Adressen-Inventarisierung aus PCAP
|
||||
tshark -r capture.pcap -T fields -e eth.src -e eth.dst | sort -u
|
||||
```
|
||||
|
||||
**Switch-Infrastruktur-Mapping:**
|
||||
- Spanning Tree Protocol (STP) Topologie-Rekonstruktion
|
||||
- VLAN-Segmentierung-Analyse
|
||||
- ARP-Spoofing-Detection durch MAC-IP-Binding-Inkonsistenzen
|
||||
|
||||
**Kritische Anomalien:**
|
||||
- Unerwartete MAC-Präfixe (OUI-Analysis)
|
||||
- ARP-Reply ohne vorhergehende ARP-Request
|
||||
- Broadcast-Storm-Patterns bei DDoS-Aktivitäten
|
||||
|
||||
### Layer 3 - Network Layer Investigation
|
||||
|
||||
**IP-Header-Forensik für Geolocation und Routing:**
|
||||
|
||||
```python
|
||||
# IP-Geolocation-Mapping mit Python
|
||||
import ipaddress
|
||||
from geolite2 import geolite2
|
||||
|
||||
def analyze_ip_origins(pcap_ips):
|
||||
reader = geolite2.reader()
|
||||
for ip in pcap_ips:
|
||||
if not ipaddress.ip_address(ip).is_private:
|
||||
location = reader.get(ip)
|
||||
print(f"{ip}: {location['country']['names']['en']}")
|
||||
```
|
||||
|
||||
**TTL-Fingerprinting für OS-Detection:**
|
||||
- Windows: TTL 128 (typisch 128, 64, 32)
|
||||
- Linux/Unix: TTL 64
|
||||
- Cisco/Network-Equipment: TTL 255
|
||||
|
||||
**Fragmentierungs-Analyse:**
|
||||
- Evil Fragmentation für IDS-Evasion
|
||||
- Teardrop-Attack-Patterns
|
||||
- Fragment-Overlap-Anomalien
|
||||
|
||||
### Layer 4 - Transport Layer Forensik
|
||||
|
||||
**TCP-Session-Rekonstruktion:**
|
||||
|
||||
```bash
|
||||
# TCP-Streams extrahieren und analysieren
|
||||
tshark -r capture.pcap -q -z follow,tcp,ascii,0
|
||||
```
|
||||
|
||||
**TCP-Fingerprinting-Techniken:**
|
||||
- Initial Window Size (IWS) Analysis
|
||||
- TCP-Options-Sequenz-Patterns
|
||||
- Maximum Segment Size (MSS) Charakteristika
|
||||
|
||||
**UDP-Traffic-Anomalien:**
|
||||
- DNS-Tunneling über ungewöhnliche Record-Types
|
||||
- VoIP-Protokoll-Missbrauch für Datenexfiltration
|
||||
- TFTP-basierte Malware-Distribution
|
||||
|
||||
## HTTP/HTTPS-Forensik für Web-basierte Angriffe
|
||||
|
||||
### HTTP-Header-Deep-Dive
|
||||
|
||||
**User-Agent-String-Forensik:**
|
||||
```python
|
||||
# Verdächtige User-Agent-Patterns
|
||||
suspicious_agents = [
|
||||
"curl/", # Command-line tools
|
||||
"python-requests", # Scripted access
|
||||
"Nikto", # Vulnerability scanners
|
||||
"sqlmap" # SQL injection tools
|
||||
]
|
||||
```
|
||||
|
||||
**HTTP-Method-Anomalien:**
|
||||
- PUT/DELETE-Requests auf produktiven Servern
|
||||
- TRACE-Method für XSS-Exploitation
|
||||
- Nicht-standard Methods (PATCH, OPTIONS) Analysis
|
||||
|
||||
**Content-Type-Diskrepanzen:**
|
||||
- Executable-Content mit image/jpeg MIME-Type
|
||||
- JavaScript-Code in PDF-Dateien
|
||||
- Suspicious Content-Length vs. Actual-Payload-Size
|
||||
|
||||
### HTTPS-Traffic-Analysis ohne Decryption
|
||||
|
||||
**TLS-Handshake-Fingerprinting:**
|
||||
```bash
|
||||
# TLS-Version und Cipher-Suite-Analyse
|
||||
tshark -r capture.pcap -Y "tls.handshake.type == 1" \
|
||||
-T fields -e tls.handshake.version -e tls.handshake.ciphersuites
|
||||
```
|
||||
|
||||
**Certificate-Chain-Investigation:**
|
||||
- Self-signed Certificate-Anomalien
|
||||
- Certificate-Transparency-Log-Validation
|
||||
- Subject Alternative Name (SAN) Missbrauch
|
||||
|
||||
**Encrypted-Traffic-Patterns:**
|
||||
- Packet-Size-Distribution-Analysis
|
||||
- Inter-arrival-Time-Patterns
|
||||
- Burst-Communication vs. Steady-State-Traffic
|
||||
|
||||
## DNS-Forensik und Tunneling-Detection
|
||||
|
||||
### DNS-Query-Pattern-Analysis
|
||||
|
||||
**DNS-Tunneling-Indicators:**
|
||||
```python
|
||||
# DNS-Query-Length-Distribution-Analysis
|
||||
def analyze_dns_queries(pcap_file):
|
||||
queries = extract_dns_queries(pcap_file)
|
||||
avg_length = sum(len(q) for q in queries) / len(queries)
|
||||
|
||||
# Normal DNS: 15-30 chars, Tunneling: 50+ chars
|
||||
if avg_length > 50:
|
||||
return "POTENTIAL_TUNNELING"
|
||||
```
|
||||
|
||||
**Subdomain-Enumeration-Detection:**
|
||||
- Excessive NXDOMAIN-Responses
|
||||
- Sequential-Subdomain-Queries
|
||||
- High-Entropy-Subdomain-Names
|
||||
|
||||
**DNS-over-HTTPS (DoH) Investigation:**
|
||||
- DoH-Provider-Identification (Cloudflare, Google, Quad9)
|
||||
- Encrypted-DNS-vs-Clear-DNS-Ratio-Analysis
|
||||
- Bootstrap-DNS-Query-Patterns
|
||||
|
||||
## Command & Control (C2) Communication-Patterns
|
||||
|
||||
### C2-Channel-Identification
|
||||
|
||||
**HTTP-basierte C2-Kommunikation:**
|
||||
```bash
|
||||
# Beaconing-Pattern-Detection
|
||||
tshark -r capture.pcap -T fields -e frame.time_epoch -e ip.dst \
|
||||
-Y "http" | awk 'script für regelmäßige Intervalle'
|
||||
```
|
||||
|
||||
**Timing-Analysis für Beaconing:**
|
||||
- Jitter-Analyse bei Sleep-Intervallen
|
||||
- Callback-Frequency-Patterns
|
||||
- Network-Outage-Response-Behavior
|
||||
|
||||
**Payload-Obfuscation-Techniques:**
|
||||
- Base64-encoded Commands in HTTP-Bodies
|
||||
- Steganographie in Bilddateien
|
||||
- JSON/XML-Structure-Abuse für Command-Transport
|
||||
|
||||
### Advanced Persistent Threat (APT) Network-Signatures
|
||||
|
||||
**Long-Duration-Connection-Analysis:**
|
||||
```python
|
||||
# Langzeit-Verbindungs-Identifikation
|
||||
def find_persistent_connections(pcap_data):
|
||||
for session in tcp_sessions:
|
||||
duration = session.end_time - session.start_time
|
||||
if duration > timedelta(hours=24):
|
||||
analyze_session_behavior(session)
|
||||
```
|
||||
|
||||
**Multi-Stage-Payload-Delivery:**
|
||||
- Initial-Compromise-Vector-Analysis
|
||||
- Secondary-Payload-Download-Patterns
|
||||
- Lateral-Movement-Network-Signatures
|
||||
|
||||
## Protokoll-Anomalie-Detection-Algorithmen
|
||||
|
||||
### Statistical-Baseline-Establishment
|
||||
|
||||
**Traffic-Volume-Baselines:**
|
||||
```python
|
||||
# Netzwerk-Baseline-Erstellung
|
||||
def establish_baseline(historical_data):
|
||||
baseline = {
|
||||
'avg_bandwidth': calculate_average_bps(historical_data),
|
||||
'peak_hours': identify_peak_traffic_windows(historical_data),
|
||||
'protocol_distribution': analyze_protocol_ratios(historical_data)
|
||||
}
|
||||
return baseline
|
||||
```
|
||||
|
||||
**Port-Usage-Pattern-Analysis:**
|
||||
- Unexpected-Port-Combinations
|
||||
- High-Port-Range-Communication (> 32768)
|
||||
- Service-Port-Mismatches (HTTP on Port 443 without TLS)
|
||||
|
||||
### Machine-Learning-Enhanced-Detection
|
||||
|
||||
**Traffic-Classification-Models:**
|
||||
- Protocol-Identification via Payload-Analysis
|
||||
- Encrypted-Traffic-Classification
|
||||
- Anomaly-Score-Calculation für Unknown-Traffic
|
||||
|
||||
## Session-Rekonstruktion und Payload-Extraktion
|
||||
|
||||
### TCP-Stream-Reassembly
|
||||
|
||||
**Bidirectional-Communication-Timeline:**
|
||||
```bash
|
||||
# Vollständige Session-Rekonstruktion
|
||||
mkdir session_analysis
|
||||
cd session_analysis
|
||||
|
||||
# TCP-Streams einzeln extrahieren
|
||||
for stream in $(tshark -r ../capture.pcap -T fields -e tcp.stream | sort -u); do
|
||||
tshark -r ../capture.pcap -q -z follow,tcp,raw,$stream > stream_$stream.raw
|
||||
done
|
||||
```
|
||||
|
||||
**File-Carving aus Network-Streams:**
|
||||
- HTTP-File-Download-Reconstruction
|
||||
- Email-Attachment-Extraction via SMTP/POP3
|
||||
- FTP-Data-Channel-File-Recovery
|
||||
|
||||
### Application-Layer-Protocol-Parsing
|
||||
|
||||
**Custom-Protocol-Analysis:**
|
||||
```python
|
||||
# Proprietary-Protocol-Reverse-Engineering
|
||||
def analyze_custom_protocol(payload):
|
||||
# Header-Structure-Identification
|
||||
if len(payload) > 8:
|
||||
magic_bytes = payload[:4]
|
||||
length_field = struct.unpack('>I', payload[4:8])[0]
|
||||
|
||||
if validate_structure(magic_bytes, length_field, payload):
|
||||
return parse_protocol_fields(payload)
|
||||
```
|
||||
|
||||
## Verschlüsselte Protokoll-Forensik
|
||||
|
||||
### TLS/SSL-Traffic-Analysis
|
||||
|
||||
**Certificate-Chain-Validation:**
|
||||
```bash
|
||||
# Certificate-Extraktion aus PCAP
|
||||
tshark -r capture.pcap -Y "tls.handshake.certificate" \
|
||||
-T fields -e tls.handshake.certificate > certificates.hex
|
||||
|
||||
# Certificate-Parsing
|
||||
xxd -r -p certificates.hex | openssl x509 -inform DER -text
|
||||
```
|
||||
|
||||
**TLS-Version-Downgrade-Attacks:**
|
||||
- Forced-SSLv3-Negotiation-Detection
|
||||
- Weak-Cipher-Suite-Selection-Patterns
|
||||
- Certificate-Pinning-Bypass-Indicators
|
||||
|
||||
### VPN-Traffic-Characterization
|
||||
|
||||
**VPN-Protocol-Identification:**
|
||||
- OpenVPN: UDP Port 1194, specific packet-patterns
|
||||
- IPSec: ESP (Protocol 50), IKE (UDP 500)
|
||||
- WireGuard: UDP mit characteristic handshake-patterns
|
||||
|
||||
**VPN-Tunnel-Analysis:**
|
||||
```python
|
||||
# VPN-Endpoint-Discovery
|
||||
def identify_vpn_endpoints(pcap_data):
|
||||
potential_endpoints = []
|
||||
for packet in pcap_data:
|
||||
if detect_vpn_signature(packet):
|
||||
potential_endpoints.append(packet.src_ip)
|
||||
return analyze_endpoint_patterns(potential_endpoints)
|
||||
```
|
||||
|
||||
## Häufige Herausforderungen und Troubleshooting
|
||||
|
||||
### Performance-Optimierung bei großen PCAP-Dateien
|
||||
|
||||
**Memory-Management:**
|
||||
```bash
|
||||
# Große PCAP-Dateien in kleinere Segmente aufteilen
|
||||
editcap -c 100000 large_capture.pcap segment.pcap
|
||||
|
||||
# Zeitbasierte Segmentierung
|
||||
editcap -A "2024-01-01 00:00:00" -B "2024-01-01 01:00:00" \
|
||||
large_capture.pcap hour_segment.pcap
|
||||
```
|
||||
|
||||
**Selective-Filtering:**
|
||||
```bash
|
||||
# Nur relevanten Traffic extrahieren
|
||||
tshark -r large_capture.pcap -w filtered.pcap \
|
||||
-Y "ip.addr == 192.168.1.100 or dns or http"
|
||||
```
|
||||
|
||||
### False-Positive-Reduction
|
||||
|
||||
**Legitimate-Traffic-Whitelisting:**
|
||||
- Corporate-Application-Signatures
|
||||
- Known-Good-Certificate-Authorities
|
||||
- Approved-Remote-Access-Solutions
|
||||
|
||||
**Context-Aware-Analysis:**
|
||||
```python
|
||||
# Business-Context-Integration
|
||||
def validate_alert(network_event, business_context):
|
||||
if is_maintenance_window(network_event.timestamp):
|
||||
return False
|
||||
if is_authorized_admin(network_event.source_ip):
|
||||
return validate_admin_action(network_event)
|
||||
return True
|
||||
```
|
||||
|
||||
## Praktische Anwendungsszenarien
|
||||
|
||||
### Szenario 1: Data Exfiltration Detection
|
||||
|
||||
**Ausgangslage:** Verdacht auf Datendiebstahl aus dem Unternehmensnetzwerk
|
||||
|
||||
**Analyse-Workflow:**
|
||||
1. **Baseline-Establishment:** Normale ausgehende Datenvolumen ermitteln
|
||||
2. **Spike-Detection:** Ungewöhnlich hohe Upload-Aktivitäten identifizieren
|
||||
3. **Destination-Analysis:** Externe Ziele der Datenübertragungen
|
||||
4. **Content-Classification:** Art der übertragenen Daten (soweit möglich)
|
||||
|
||||
```bash
|
||||
# Ausgehende Datenvolumen-Analyse
|
||||
tshark -r capture.pcap -q -z io,stat,300 \
|
||||
-Y "ip.src == 192.168.0.0/16 and ip.dst != 192.168.0.0/16"
|
||||
```
|
||||
|
||||
### Szenario 2: APT-Lateral-Movement-Investigation
|
||||
|
||||
**Ausgangslage:** Kompromittierter Host, Verdacht auf laterale Bewegung
|
||||
|
||||
**Detection-Methoden:**
|
||||
- SMB-Authentication-Patterns (Pass-the-Hash-Attacks)
|
||||
- RDP-Session-Establishment-Chains
|
||||
- WMI/PowerShell-Remote-Execution-Signatures
|
||||
|
||||
```python
|
||||
# Lateral-Movement-Timeline-Construction
|
||||
def construct_movement_timeline(network_data):
|
||||
timeline = []
|
||||
for connection in extract_internal_connections(network_data):
|
||||
if detect_admin_protocols(connection):
|
||||
timeline.append({
|
||||
'timestamp': connection.start_time,
|
||||
'source': connection.src_ip,
|
||||
'target': connection.dst_ip,
|
||||
'protocol': connection.protocol,
|
||||
'confidence': calculate_suspicion_score(connection)
|
||||
})
|
||||
return sort_by_timestamp(timeline)
|
||||
```
|
||||
|
||||
### Szenario 3: Malware C2 Communication Analysis
|
||||
|
||||
**Ausgangslage:** Identifizierte Malware-Infection, C2-Channel-Mapping erforderlich
|
||||
|
||||
**Systematic C2-Analysis:**
|
||||
1. **Beaconing-Pattern-Identification**
|
||||
2. **C2-Server-Geolocation**
|
||||
3. **Command-Structure-Reverse-Engineering**
|
||||
4. **Kill-Chain-Reconstruction**
|
||||
|
||||
```bash
|
||||
# C2-Communication-Timeline
|
||||
tshark -r malware_capture.pcap -T fields \
|
||||
-e frame.time -e ip.src -e ip.dst -e tcp.dstport \
|
||||
-Y "ip.src == <infected_host>" | \
|
||||
awk '{print $1, $4}' | sort | uniq -c
|
||||
```
|
||||
|
||||
## Erweiterte Analyse-Techniken
|
||||
|
||||
### Protocol-State-Machine-Analysis
|
||||
|
||||
**TCP-State-Tracking:**
|
||||
```python
|
||||
class TCPStateAnalyzer:
|
||||
def __init__(self):
|
||||
self.connections = {}
|
||||
|
||||
def process_packet(self, packet):
|
||||
key = (packet.src_ip, packet.src_port, packet.dst_ip, packet.dst_port)
|
||||
|
||||
if key not in self.connections:
|
||||
self.connections[key] = TCPConnection()
|
||||
|
||||
conn = self.connections[key]
|
||||
conn.update_state(packet.tcp_flags)
|
||||
|
||||
if conn.is_anomalous():
|
||||
self.flag_suspicious_connection(key, conn)
|
||||
```
|
||||
|
||||
**Application-Protocol-State-Validation:**
|
||||
- HTTP-Request/Response-Pairing-Validation
|
||||
- DNS-Query/Response-Correlation
|
||||
- SMTP-Session-Command-Sequence-Analysis
|
||||
|
||||
### Geospatial-Network-Analysis
|
||||
|
||||
**IP-Geolocation-Correlation:**
|
||||
```python
|
||||
# Geographische Anomalie-Detection
|
||||
def detect_geographic_anomalies(connections):
|
||||
for conn in connections:
|
||||
src_country = geolocate_ip(conn.src_ip)
|
||||
dst_country = geolocate_ip(conn.dst_ip)
|
||||
|
||||
if calculate_distance(src_country, dst_country) > 10000: # km
|
||||
if not is_known_global_service(conn.dst_ip):
|
||||
flag_suspicious_connection(conn)
|
||||
```
|
||||
|
||||
## Automatisierung und Tool-Integration
|
||||
|
||||
### SIEM-Integration
|
||||
|
||||
**Log-Format-Standardization:**
|
||||
```python
|
||||
# Network-Events zu SIEM-Format
|
||||
def convert_to_siem_format(network_event):
|
||||
return {
|
||||
'timestamp': network_event.time_iso,
|
||||
'event_type': 'network_connection',
|
||||
'source_ip': network_event.src_ip,
|
||||
'destination_ip': network_event.dst_ip,
|
||||
'protocol': network_event.protocol,
|
||||
'risk_score': calculate_risk_score(network_event),
|
||||
'indicators': extract_iocs(network_event)
|
||||
}
|
||||
```
|
||||
|
||||
### Threat-Intelligence-Integration
|
||||
|
||||
**IOC-Matching:**
|
||||
```bash
|
||||
# Threat-Feed-Integration
|
||||
curl -s "https://threatfeed.example.com/api/ips" | \
|
||||
tee threat_ips.txt
|
||||
|
||||
tshark -r capture.pcap -T fields -e ip.dst | \
|
||||
sort -u | \
|
||||
grep -f threat_ips.txt
|
||||
```
|
||||
|
||||
## Nächste Schritte und Vertiefung
|
||||
|
||||
### Weiterführende Analyse-Techniken
|
||||
- **Behavioral-Analysis:** Machine-Learning-basierte Anomalie-Detection
|
||||
- **Graph-Analysis:** Netzwerk-Relationship-Mapping
|
||||
- **Temporal-Analysis:** Time-Series-basierte Pattern-Recognition
|
||||
|
||||
### Spezialisierung-Richtungen
|
||||
- **Cloud-Network-Forensics:** AWS VPC Flow Logs, Azure NSG Analysis
|
||||
- **IoT-Network-Analysis:** Constrained-Device-Communication-Patterns
|
||||
- **Industrial-Network-Security:** SCADA/Modbus-Protocol-Forensics
|
||||
|
||||
### Tool-Ecosystem-Erweiterung
|
||||
- **Zeek (Bro):** Scriptable Network Security Monitor
|
||||
- **Suricata:** IDS/IPS mit Network-Forensik-Capabilities
|
||||
- **Moloch:** Full-Packet-Capture und Search-Platform
|
||||
|
||||
Die systematische Netzwerkprotokoll-Analyse bildet das Fundament moderner Cyber-Forensik. Durch die Kombination von Deep-Protocol-Knowledge, statistischer Analyse und Threat-Intelligence entsteht ein mächtiges Arsenal für die Aufdeckung und Untersuchung von Cyberangriffen.
|
||||
|
||||
**Empfohlene Übungen:**
|
||||
1. Analysieren Sie einen selbst erzeugten Netzwerk-Capture mit bekanntem "böswilligem" Traffic
|
||||
2. Implementieren Sie ein automatisiertes C2-Detection-Script
|
||||
3. Führen Sie eine komplette APT-Simulation durch und dokumentieren Sie die Netzwerk-Artefakte
|
||||
|
||||
Die kontinuierliche Weiterentwicklung von Angriffstechniken erfordert permanente Aktualisierung der Analyse-Methoden. Bleiben Sie über aktuelle Threat-Research und neue Protocol-Exploitation-Techniques informiert.
|
||||
@@ -1,556 +0,0 @@
|
||||
---
|
||||
title: "Regular Expressions in der Digitalen Forensik: Vom Grundmuster zur Beweisextraktion"
|
||||
description: "Umfassender Leitfaden für Regex-Anwendungen in der forensischen Analyse: IP-Adressen, E-Mails, Hashes und komplexe Logparser-Patterns für effiziente Beweissammlung"
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
last_updated: 2025-08-10
|
||||
difficulty: intermediate
|
||||
categories: ["analysis", "automation", "log-analysis"]
|
||||
tags: ["regex", "pattern-matching", "log-analysis", "data-extraction", "text-processing", "automation", "yara-rules", "grep", "powershell", "python"]
|
||||
tool_name: "Regular Expressions (Regex)"
|
||||
related_tools: ["YARA", "Grep", "PowerShell", "Python"]
|
||||
published: true
|
||||
---
|
||||
|
||||
# Regular Expressions in der Digitalen Forensik: Vom Grundmuster zur Beweisextraktion
|
||||
|
||||
Regular Expressions (Regex) sind das Schweizer Taschenmesser der digitalen Forensik. Diese universelle Mustererkennungssprache ermöglicht es Forensikern, komplexe Textsuchen durchzuführen, relevante Daten aus Terabytes von Logs zu extrahieren und Beweise systematisch zu identifizieren. Von der einfachen IP-Adressen-Suche bis zur komplexen Malware-Signaturerstellung - Regex-Kenntnisse unterscheiden oft einen guten von einem großartigen Forensiker.
|
||||
|
||||
## Warum Regex in der Forensik unverzichtbar ist
|
||||
|
||||
In modernen Untersuchungen konfrontieren uns massive Datenmengen: Gigabytes von Logfiles, Speicherabbilder, Netzwerkverkehr und Dateisysteme mit Millionen von Einträgen. Manuelle Durchsuchung ist unmöglich - hier kommt Regex ins Spiel:
|
||||
|
||||
- **Präzise Mustersuche**: Findet spezifische Datenformate (IP-Adressen, E-Mails, Hashes) in unstrukturierten Texten
|
||||
- **Automatisierung**: Ermöglicht Skripterstellung für wiederkehrende Analysemuster
|
||||
- **Tool-Integration**: Kernfunktionalität in allen Major-Forensik-Tools
|
||||
- **Effizienzsteigerung**: Reduziert Analysezeit von Stunden auf Minuten
|
||||
|
||||
## Forensik-relevante Regex-Grundlagen
|
||||
|
||||
### Grundlegende Metacharakter
|
||||
|
||||
```regex
|
||||
. # Beliebiges Zeichen (außer Newline)
|
||||
* # 0 oder mehr Wiederholungen des vorherigen Elements
|
||||
+ # 1 oder mehr Wiederholungen
|
||||
? # 0 oder 1 Wiederholung (optional)
|
||||
^ # Zeilenanfang
|
||||
$ # Zeilenende
|
||||
[] # Zeichenklasse
|
||||
() # Gruppierung
|
||||
| # ODER-Verknüpfung
|
||||
\ # Escape-Zeichen
|
||||
```
|
||||
|
||||
### Quantifizierer für präzise Treffer
|
||||
|
||||
```regex
|
||||
{n} # Exakt n Wiederholungen
|
||||
{n,} # Mindestens n Wiederholungen
|
||||
{n,m} # Zwischen n und m Wiederholungen
|
||||
{,m} # Maximal m Wiederholungen
|
||||
```
|
||||
|
||||
### Zeichenklassen für strukturierte Daten
|
||||
|
||||
```regex
|
||||
\d # Ziffer (0-9)
|
||||
\w # Wort-Zeichen (a-z, A-Z, 0-9, _)
|
||||
\s # Whitespace (Leerzeichen, Tab, Newline)
|
||||
\D # Nicht-Ziffer
|
||||
\W # Nicht-Wort-Zeichen
|
||||
\S # Nicht-Whitespace
|
||||
[a-z] # Kleinbuchstaben
|
||||
[A-Z] # Großbuchstaben
|
||||
[0-9] # Ziffern
|
||||
[^abc] # Alles außer a, b, c
|
||||
```
|
||||
|
||||
## Forensische Standardmuster
|
||||
|
||||
### IP-Adressen (IPv4)
|
||||
|
||||
```regex
|
||||
# Basis-Pattern (weniger präzise)
|
||||
\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}
|
||||
|
||||
# Präzise IPv4-Validierung
|
||||
^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$
|
||||
|
||||
# Praktisches Pattern für Log-Analyse
|
||||
(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)
|
||||
```
|
||||
|
||||
**Anwendungsbeispiel**: Extraktion aller IP-Adressen aus IIS-Logs:
|
||||
```bash
|
||||
grep -oE '(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)' access.log | sort | uniq -c | sort -nr
|
||||
```
|
||||
|
||||
### E-Mail-Adressen
|
||||
|
||||
```regex
|
||||
# Einfaches Pattern für schnelle Suche
|
||||
[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}
|
||||
|
||||
# RFC-konforme E-Mail (vereinfacht)
|
||||
^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$
|
||||
|
||||
# Für Forensik optimiert (weniger strikt)
|
||||
\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b
|
||||
```
|
||||
|
||||
### Hash-Werte
|
||||
|
||||
```regex
|
||||
# MD5 (32 Hexadezimalzeichen)
|
||||
\b[a-fA-F0-9]{32}\b
|
||||
|
||||
# SHA-1 (40 Hexadezimalzeichen)
|
||||
\b[a-fA-F0-9]{40}\b
|
||||
|
||||
# SHA-256 (64 Hexadezimalzeichen)
|
||||
\b[a-fA-F0-9]{64}\b
|
||||
|
||||
# Universelles Hash-Pattern
|
||||
\b[a-fA-F0-9]{32,64}\b
|
||||
```
|
||||
|
||||
### Bitcoin-Adressen
|
||||
|
||||
```regex
|
||||
# Legacy Bitcoin-Adressen (P2PKH und P2SH)
|
||||
\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b
|
||||
|
||||
# Bech32 (SegWit) Adressen
|
||||
\bbc1[a-z0-9]{39,59}\b
|
||||
|
||||
# Kombiniert
|
||||
\b(?:[13][a-km-zA-HJ-NP-Z1-9]{25,34}|bc1[a-z0-9]{39,59})\b
|
||||
```
|
||||
|
||||
### Windows-Dateipfade
|
||||
|
||||
```regex
|
||||
# Vollständiger Windows-Pfad
|
||||
^[a-zA-Z]:\\(?:[^\\/:*?"<>|\r\n]+\\)*[^\\/:*?"<>|\r\n]*$
|
||||
|
||||
# UNC-Pfade
|
||||
^\\\\[^\\]+\\[^\\]+(?:\\[^\\]*)*$
|
||||
|
||||
# Für Log-Parsing (flexibler)
|
||||
[a-zA-Z]:\\[^"\s<>|]*
|
||||
```
|
||||
|
||||
### Kreditkartennummern
|
||||
|
||||
```regex
|
||||
# Visa (13-19 Ziffern, beginnt mit 4)
|
||||
4[0-9]{12,18}
|
||||
|
||||
# MasterCard (16 Ziffern, beginnt mit 5)
|
||||
5[1-5][0-9]{14}
|
||||
|
||||
# American Express (15 Ziffern, beginnt mit 34 oder 37)
|
||||
3[47][0-9]{13}
|
||||
|
||||
# Universell (mit optionalen Trennzeichen)
|
||||
(?:\d{4}[-\s]?){3,4}\d{4}
|
||||
```
|
||||
|
||||
## Tool-spezifische Regex-Implementierungen
|
||||
|
||||
### PowerShell-Integration
|
||||
|
||||
```powershell
|
||||
# Suche nach IP-Adressen in Eventlogs
|
||||
Get-WinEvent -LogName Security | Where-Object {
|
||||
$_.Message -match '\b(?:\d{1,3}\.){3}\d{1,3}\b'
|
||||
} | Select-Object TimeCreated, Id, Message
|
||||
|
||||
# E-Mail-Extraktion aus Speicherabbild
|
||||
Select-String -Path "memdump.raw" -Pattern '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' -AllMatches
|
||||
|
||||
# Hash-Werte aus Malware-Samples
|
||||
Get-ChildItem -Recurse | Get-FileHash | Where-Object {
|
||||
$_.Hash -match '^[a-fA-F0-9]{64}$'
|
||||
}
|
||||
```
|
||||
|
||||
### Grep-Anwendungen
|
||||
|
||||
```bash
|
||||
# Verdächtige ausführbare Dateien
|
||||
grep -r -E '\.(exe|dll|scr|bat|cmd)$' /mnt/evidence/
|
||||
|
||||
# Zeitstempel-Extraktion (ISO 8601)
|
||||
grep -oE '\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}' application.log
|
||||
|
||||
# Base64-kodierte Daten
|
||||
grep -oE '[A-Za-z0-9+/]{20,}={0,2}' suspicious.txt
|
||||
|
||||
# Windows-Ereignis-IDs
|
||||
grep -E 'Event ID: (4624|4625|4648|4656)' security.log
|
||||
```
|
||||
|
||||
### Python-Implementierung
|
||||
|
||||
```python
|
||||
import re
|
||||
import hashlib
|
||||
|
||||
# IP-Adressen mit Kontext extrahieren
|
||||
def extract_ips_with_context(text, context_chars=50):
|
||||
ip_pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
|
||||
matches = []
|
||||
|
||||
for match in re.finditer(ip_pattern, text):
|
||||
start = max(0, match.start() - context_chars)
|
||||
end = min(len(text), match.end() + context_chars)
|
||||
context = text[start:end]
|
||||
matches.append({
|
||||
'ip': match.group(),
|
||||
'position': match.start(),
|
||||
'context': context
|
||||
})
|
||||
|
||||
return matches
|
||||
|
||||
# Malware-Signaturen generieren
|
||||
def generate_yara_strings(binary_data, min_length=10):
|
||||
# Suche nach druckbaren ASCII-Strings
|
||||
ascii_pattern = rb'[ -~]{' + str(min_length).encode() + rb',}'
|
||||
strings = re.findall(ascii_pattern, binary_data)
|
||||
|
||||
yara_strings = []
|
||||
for i, string in enumerate(strings[:20]): # Erste 20 Strings
|
||||
# Escape problematische Zeichen
|
||||
escaped = string.decode('ascii').replace('\\', '\\\\').replace('"', '\\"')
|
||||
yara_strings.append(f'$s{i} = "{escaped}"')
|
||||
|
||||
return yara_strings
|
||||
```
|
||||
|
||||
## YARA-Rules mit Regex
|
||||
|
||||
```yara
|
||||
rule SuspiciousEmailPattern {
|
||||
strings:
|
||||
$email = /[a-zA-Z0-9._%+-]+@(tempmail|guerrillamail|10minutemail)\.(com|net|org)/ nocase
|
||||
$bitcoin = /\b[13][a-km-zA-HJ-NP-Z1-9]{25,34}\b/
|
||||
$ransom_msg = /your files have been encrypted/i
|
||||
|
||||
condition:
|
||||
$email and ($bitcoin or $ransom_msg)
|
||||
}
|
||||
|
||||
rule LogAnalysisPattern {
|
||||
strings:
|
||||
$failed_login = /Failed login.*from\s+(\d{1,3}\.){3}\d{1,3}/
|
||||
$brute_force = /authentication failure.*rhost=(\d{1,3}\.){3}\d{1,3}/
|
||||
$suspicious_ua = /User-Agent:.*(?:sqlmap|nikto|nmap|masscan)/i
|
||||
|
||||
condition:
|
||||
any of them
|
||||
}
|
||||
```
|
||||
|
||||
## Performance-Optimierung und Fallstricke
|
||||
|
||||
### Catastrophic Backtracking vermeiden
|
||||
|
||||
**Problematisch**:
|
||||
```regex
|
||||
(a+)+b # Exponentieller Zeitverbrauch bei "aaaa...c"
|
||||
(.*)* # Verschachtelte Quantifizierer
|
||||
```
|
||||
|
||||
**Optimiert**:
|
||||
```regex
|
||||
a+b # Atomare Gruppierung
|
||||
[^b]*b # Negierte Zeichenklasse statt .*
|
||||
```
|
||||
|
||||
### Anker für Effizienz nutzen
|
||||
|
||||
```regex
|
||||
# Langsam
|
||||
\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}
|
||||
|
||||
# Schneller mit Wortgrenzen
|
||||
\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b
|
||||
|
||||
# Am schnellsten für Zeilensuche
|
||||
^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$
|
||||
```
|
||||
|
||||
### Compiled Patterns verwenden
|
||||
|
||||
```python
|
||||
import re
|
||||
|
||||
# Einmal kompilieren, oft verwenden
|
||||
ip_pattern = re.compile(r'\b(?:\d{1,3}\.){3}\d{1,3}\b')
|
||||
email_pattern = re.compile(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}')
|
||||
|
||||
def analyze_log_file(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||
content = f.read()
|
||||
|
||||
ips = ip_pattern.findall(content)
|
||||
emails = email_pattern.findall(content)
|
||||
|
||||
return ips, emails
|
||||
```
|
||||
|
||||
## Praktische Forensik-Szenarien
|
||||
|
||||
### Incident Response: Lateral Movement Detection
|
||||
|
||||
```bash
|
||||
# Suche nach PsExec-Aktivitäten
|
||||
grep -E 'PSEXESVC.*started|PsExec.*\\\\[^\\]+\\' security.log
|
||||
|
||||
# Pass-the-Hash Angriffe
|
||||
grep -E 'Logon Type:\s+9.*NTLM.*[0-9a-fA-F]{32}' security.log
|
||||
|
||||
# WMI-basierte Ausführung
|
||||
grep -E 'WmiPrvSE.*ExecuteShellCommand|wmic.*process.*call.*create' system.log
|
||||
```
|
||||
|
||||
### Malware-Analyse: C2-Kommunikation
|
||||
|
||||
```python
|
||||
# Domain Generation Algorithm (DGA) Detection
|
||||
dga_pattern = re.compile(r'\b[a-z]{8,20}\.(com|net|org|info)\b')
|
||||
|
||||
def detect_suspicious_domains(pcap_text):
|
||||
# Extrahiere DNS-Queries
|
||||
dns_pattern = r'DNS.*query.*?([a-zA-Z0-9.-]+\.[a-zA-Z]{2,})'
|
||||
domains = re.findall(dns_pattern, pcap_text)
|
||||
|
||||
suspicious = []
|
||||
for domain in domains:
|
||||
# Prüfe auf DGA-Charakteristika
|
||||
if dga_pattern.match(domain.lower()):
|
||||
# Zusätzliche Heuristiken
|
||||
vowel_ratio = len(re.findall(r'[aeiou]', domain.lower())) / len(domain)
|
||||
if vowel_ratio < 0.2: # Wenige Vokale = verdächtig
|
||||
suspicious.append(domain)
|
||||
|
||||
return suspicious
|
||||
```
|
||||
|
||||
### Data Exfiltration: Ungewöhnliche Datenübertragungen
|
||||
|
||||
```regex
|
||||
# Base64-kodierte Daten in URLs
|
||||
[?&]data=([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?
|
||||
|
||||
# DNS-Tunneling (ungewöhnlich lange Subdomains)
|
||||
\b[a-z0-9]{20,}\.[a-z0-9.-]+\.[a-z]{2,}\b
|
||||
|
||||
# Hex-kodierte Dateninhalte
|
||||
[?&]payload=[0-9a-fA-F]{40,}
|
||||
```
|
||||
|
||||
## Debugging und Testing
|
||||
|
||||
### Online-Tools für Regex-Entwicklung
|
||||
|
||||
1. **regex101.com**: Interaktive Regex-Entwicklung mit Erklärungen
|
||||
2. **regexr.com**: Visuelle Regex-Darstellung
|
||||
3. **regexpal.com**: Schnelle Tests ohne Anmeldung
|
||||
|
||||
### Regex-Validierung in der Praxis
|
||||
|
||||
```python
|
||||
import re
|
||||
|
||||
def validate_regex_pattern(pattern, test_cases):
|
||||
"""
|
||||
Validiert Regex-Pattern gegen bekannte Test-Cases
|
||||
"""
|
||||
try:
|
||||
compiled = re.compile(pattern)
|
||||
except re.error as e:
|
||||
return False, f"Regex-Syntax-Fehler: {e}"
|
||||
|
||||
results = []
|
||||
for test_input, expected in test_cases:
|
||||
match = compiled.search(test_input)
|
||||
found = match.group() if match else None
|
||||
results.append({
|
||||
'input': test_input,
|
||||
'expected': expected,
|
||||
'found': found,
|
||||
'correct': found == expected
|
||||
})
|
||||
|
||||
return True, results
|
||||
|
||||
# Test-Cases für IP-Pattern
|
||||
ip_tests = [
|
||||
('192.168.1.1', '192.168.1.1'),
|
||||
('999.999.999.999', None), # Ungültige IP
|
||||
('text 10.0.0.1 more text', '10.0.0.1'),
|
||||
('no.ip.here', None)
|
||||
]
|
||||
|
||||
pattern = r'\b(?:(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(?:25[0-5]|2[0-4]\d|[01]?\d\d?)\b'
|
||||
valid, results = validate_regex_pattern(pattern, ip_tests)
|
||||
```
|
||||
|
||||
## Häufige Fehler und Lösungen
|
||||
|
||||
### Problem: Gierige vs. nicht-gierige Quantifizierer
|
||||
|
||||
```regex
|
||||
# Problematisch: Gierig
|
||||
<.*> # Matched "<tag>content</tag>" komplett
|
||||
|
||||
# Lösung: Nicht-gierig
|
||||
<.*?> # Matched nur "<tag>"
|
||||
|
||||
# Alternative: Spezifisch
|
||||
<[^>]*> # Matched keine ">" innerhalb
|
||||
```
|
||||
|
||||
### Problem: Unbeabsichtigte Metacharakter
|
||||
|
||||
```regex
|
||||
# Falsch: . als Literalzeichen gemeint
|
||||
192.168.1.1 # Matched auch "192x168x1x1"
|
||||
|
||||
# Richtig: Escape von Metacharaktern
|
||||
192\.168\.1\.1 # Matched nur echte IP
|
||||
```
|
||||
|
||||
### Problem: Fehlende Wortgrenzen
|
||||
|
||||
```regex
|
||||
# Problematisch: Matcht Teilstrings
|
||||
\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3} # Matched "1192.168.1.10"
|
||||
|
||||
# Lösung: Wortgrenzen verwenden
|
||||
\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b # Nur vollständige IPs
|
||||
```
|
||||
|
||||
## Integration in Forensik-Workflows
|
||||
|
||||
### Automatisierte Triage-Scripts
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# forensic_triage.sh - Automatisierte erste Analyse
|
||||
|
||||
LOG_DIR="/evidence/logs"
|
||||
OUTPUT_DIR="/analysis/regex_results"
|
||||
|
||||
# IP-Adressen extrahieren und häufigste finden
|
||||
echo "=== IP-Analyse ===" > $OUTPUT_DIR/summary.txt
|
||||
find $LOG_DIR -name "*.log" -exec grep -h -oE '\b(?:\d{1,3}\.){3}\d{1,3}\b' {} \; | \
|
||||
sort | uniq -c | sort -nr | head -20 >> $OUTPUT_DIR/summary.txt
|
||||
|
||||
# E-Mail-Adressen sammeln
|
||||
echo -e "\n=== E-Mail-Adressen ===" >> $OUTPUT_DIR/summary.txt
|
||||
find $LOG_DIR -name "*.log" -exec grep -h -oE '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' {} \; | \
|
||||
sort | uniq >> $OUTPUT_DIR/summary.txt
|
||||
|
||||
# Verdächtige Prozessnamen
|
||||
echo -e "\n=== Verdächtige Prozesse ===" >> $OUTPUT_DIR/summary.txt
|
||||
find $LOG_DIR -name "*.log" -exec grep -h -iE '(powershell|cmd|wmic|psexec|mimikatz)' {} \; | \
|
||||
head -50 >> $OUTPUT_DIR/summary.txt
|
||||
```
|
||||
|
||||
### PowerShell-Module für wiederkehrende Aufgaben
|
||||
|
||||
```powershell
|
||||
function Get-ForensicPatterns {
|
||||
param(
|
||||
[string]$Path,
|
||||
[string[]]$Patterns = @(
|
||||
'\b(?:\d{1,3}\.){3}\d{1,3}\b', # IP-Adressen
|
||||
'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', # E-Mails
|
||||
'\b[a-fA-F0-9]{32,64}\b' # Hash-Werte
|
||||
)
|
||||
)
|
||||
|
||||
$results = @{}
|
||||
|
||||
foreach ($pattern in $Patterns) {
|
||||
$matches = Select-String -Path $Path -Pattern $pattern -AllMatches
|
||||
$results[$pattern] = $matches | ForEach-Object {
|
||||
[PSCustomObject]@{
|
||||
File = $_.Filename
|
||||
Line = $_.LineNumber
|
||||
Match = $_.Matches.Value
|
||||
Context = $_.Line
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $results
|
||||
}
|
||||
```
|
||||
|
||||
## Weiterführende Techniken
|
||||
|
||||
### Lookahead und Lookbehind
|
||||
|
||||
```regex
|
||||
# Positive Lookahead: Password gefolgt von Ziffer
|
||||
password(?=.*\d)
|
||||
|
||||
# Negative Lookahead: IP nicht in private ranges
|
||||
(?!(?:10\.|192\.168\.|172\.(?:1[6-9]|2[0-9]|3[01])\.))(?:\d{1,3}\.){3}\d{1,3}
|
||||
|
||||
# Positive Lookbehind: Zahl nach "Port:"
|
||||
(?<=Port:)\d+
|
||||
|
||||
# Negative Lookbehind: Nicht nach "Comment:"
|
||||
(?<!Comment:).+@.+\..+
|
||||
```
|
||||
|
||||
### Named Capture Groups
|
||||
|
||||
```python
|
||||
import re
|
||||
|
||||
# Strukturierte Log-Parsing
|
||||
log_pattern = re.compile(
|
||||
r'(?P<timestamp>\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}) '
|
||||
r'\[(?P<level>\w+)\] '
|
||||
r'(?P<source>\w+): '
|
||||
r'(?P<message>.*)'
|
||||
)
|
||||
|
||||
def parse_log_entry(line):
|
||||
match = log_pattern.match(line)
|
||||
if match:
|
||||
return match.groupdict()
|
||||
return None
|
||||
|
||||
# Verwendung
|
||||
log_line = "2024-01-15 14:30:25 [ERROR] auth: Failed login from 192.168.1.100"
|
||||
parsed = parse_log_entry(log_line)
|
||||
# Result: {'timestamp': '2024-01-15 14:30:25', 'level': 'ERROR',
|
||||
# 'source': 'auth', 'message': 'Failed login from 192.168.1.100'}
|
||||
```
|
||||
|
||||
## Nächste Schritte
|
||||
|
||||
Nach diesem umfassenden Überblick können Sie:
|
||||
|
||||
1. **Praktische Übung**: Implementieren Sie die vorgestellten Patterns in Ihren aktuellen Untersuchungen
|
||||
2. **Tool-Integration**: Integrieren Sie Regex in Ihre bevorzugten Forensik-Tools
|
||||
3. **Automatisierung**: Entwickeln Sie Scripts für wiederkehrende Analysemuster
|
||||
4. **Spezialisierung**: Vertiefen Sie sich in tool-spezifische Regex-Implementierungen
|
||||
5. **Community**: Teilen Sie Ihre Patterns und lernen Sie von anderen Forensikern
|
||||
|
||||
### Weiterführende Ressourcen
|
||||
|
||||
- **SANS Regex Cheat Sheet**: Kompakte Referenz für Forensiker
|
||||
- **RegexBuddy**: Professionelle Regex-Entwicklungsumgebung
|
||||
- **Python re-Modul Dokumentation**: Detaillierte Syntax-Referenz
|
||||
- **YARA-Rules Repository**: Sammlung forensik-relevanter Regex-Patterns
|
||||
|
||||
Regular Expressions sind ein mächtiges Werkzeug, das Zeit spart und die Präzision forensischer Analysen erhöht. Die Investition in solide Regex-Kenntnisse zahlt sich in jeder Untersuchung aus und ermöglicht es, komplexe Muster zu erkennen, die manuell übersehen werden würden.
|
||||
@@ -1,770 +0,0 @@
|
||||
---
|
||||
title: "SQL in der digitalen Forensik: Von SQLite-Datenbanken zur Timeline-Analyse"
|
||||
description: "Umfassender Leitfaden für SQL-basierte Forensik-Analysen: SQLite-Datenbanken untersuchen, Timeline-Rekonstruktion durchführen, mobile App-Daten analysieren und komplexe Korrelationen aufdecken."
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
last_updated: 2025-08-10
|
||||
difficulty: intermediate
|
||||
categories: ["analysis", "configuration", "case-study"]
|
||||
tags: ["sqlite-viewer", "correlation-engine", "mobile-app-data", "browser-history", "data-extraction", "timeline-queries", "join-operations", "aggregate-analysis", "wal-analysis", "python-integration"]
|
||||
tool_name: "SQL"
|
||||
related_tools: ["DB Browser for SQLite", "Autopsy", "Cellebrite UFED"]
|
||||
published: true
|
||||
---
|
||||
|
||||
# SQL in der digitalen Forensik: Von SQLite-Datenbanken zur Timeline-Analyse
|
||||
|
||||
SQL (Structured Query Language) ist eine der mächtigsten und unterschätztesten Fähigkeiten in der modernen digitalen Forensik. Während viele Ermittler auf GUI-basierte Tools setzen, ermöglicht SQL direkten Zugriff auf Rohdaten und komplexe Analysen, die mit herkömmlichen Tools unmöglich wären.
|
||||
|
||||
## Warum SQL in der Forensik unverzichtbar ist
|
||||
|
||||
### SQLite dominiert die mobile Forensik
|
||||
- **WhatsApp-Chats**: Nachrichten, Metadaten, gelöschte Inhalte
|
||||
- **Browser-History**: Zeitstempel, Besuchshäufigkeit, Suchverläufe
|
||||
- **App-Daten**: Standortdaten, Nutzerverhalten, Cache-Inhalte
|
||||
- **System-Logs**: Verbindungsprotokoll, Fehleraufzeichnungen
|
||||
|
||||
### Vorteile gegenüber GUI-Tools
|
||||
- **Flexibilität**: Komplexe Abfragen jenseits vordefinierter Filter
|
||||
- **Performance**: Direkte Datenbankzugriffe ohne Interface-Overhead
|
||||
- **Automatisierung**: Skript-basierte Analysen für wiederkehrende Aufgaben
|
||||
- **Tiefe**: Zugriff auf Metadaten und versteckte Tabellenstrukturen
|
||||
|
||||
## Grundlagen: SQLite-Struktur verstehen
|
||||
|
||||
### Datenbank-Anatomie in der Forensik
|
||||
|
||||
```sql
|
||||
-- Tabellen einer WhatsApp-Datenbank analysieren
|
||||
.tables
|
||||
|
||||
-- Tabellenstruktur untersuchen
|
||||
.schema messages
|
||||
|
||||
-- Beispiel-Output:
|
||||
CREATE TABLE messages (
|
||||
_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
key_remote_jid TEXT,
|
||||
key_from_me INTEGER,
|
||||
key_id TEXT,
|
||||
status INTEGER,
|
||||
needs_push INTEGER,
|
||||
data TEXT,
|
||||
timestamp INTEGER,
|
||||
media_url TEXT,
|
||||
media_mime_type TEXT,
|
||||
media_wa_type INTEGER,
|
||||
media_size INTEGER,
|
||||
latitude REAL,
|
||||
longitude REAL
|
||||
);
|
||||
```
|
||||
|
||||
### SQLite-spezifische Forensik-Herausforderungen
|
||||
|
||||
**WAL-Mode (Write-Ahead Logging)**:
|
||||
```sql
|
||||
-- WAL-Datei auf nicht-committete Transaktionen prüfen
|
||||
PRAGMA journal_mode;
|
||||
|
||||
-- Temporäre Daten in WAL-Datei finden
|
||||
-- (Erfordert spezielle Tools wie sqlitewalreader)
|
||||
```
|
||||
|
||||
**Gelöschte Records**:
|
||||
```sql
|
||||
-- Freespace-Analyse für gelöschte Daten
|
||||
-- Hinweis: Erfordert spezialisierte Recovery-Tools
|
||||
```
|
||||
|
||||
## Timeline-Rekonstruktion: Der Forensik-Klassiker
|
||||
|
||||
### Grundlegende Timeline-Abfrage
|
||||
|
||||
```sql
|
||||
-- Chronologische Ereignisübersicht erstellen
|
||||
SELECT
|
||||
datetime(timestamp/1000, 'unixepoch', 'localtime') as ereignis_zeit,
|
||||
CASE
|
||||
WHEN key_from_me = 1 THEN 'Ausgehend'
|
||||
ELSE 'Eingehend'
|
||||
END as richtung,
|
||||
key_remote_jid as kontakt,
|
||||
substr(data, 1, 50) || '...' as nachricht_preview
|
||||
FROM messages
|
||||
WHERE timestamp > 0
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT 100;
|
||||
```
|
||||
|
||||
### Erweiterte Timeline mit Kontextinformationen
|
||||
|
||||
```sql
|
||||
-- Timeline mit Geolocation und Media-Daten
|
||||
SELECT
|
||||
datetime(m.timestamp/1000, 'unixepoch', 'localtime') as zeitstempel,
|
||||
c.display_name as kontakt_name,
|
||||
CASE
|
||||
WHEN m.key_from_me = 1 THEN '→ Gesendet'
|
||||
ELSE '← Empfangen'
|
||||
END as richtung,
|
||||
CASE
|
||||
WHEN m.media_wa_type IS NOT NULL THEN 'Media: ' || m.media_mime_type
|
||||
ELSE 'Text'
|
||||
END as nachricht_typ,
|
||||
CASE
|
||||
WHEN m.latitude IS NOT NULL THEN
|
||||
'Standort: ' || ROUND(m.latitude, 6) || ', ' || ROUND(m.longitude, 6)
|
||||
ELSE substr(m.data, 1, 100)
|
||||
END as inhalt
|
||||
FROM messages m
|
||||
LEFT JOIN wa_contacts c ON m.key_remote_jid = c.jid
|
||||
WHERE m.timestamp BETWEEN
|
||||
strftime('%s', '2024-01-01') * 1000 AND
|
||||
strftime('%s', '2024-01-31') * 1000
|
||||
ORDER BY m.timestamp;
|
||||
```
|
||||
|
||||
## Kommunikations-Analyse: Soziale Netzwerke aufdecken
|
||||
|
||||
### Häufigste Kontakte identifizieren
|
||||
|
||||
```sql
|
||||
-- Top-Kommunikationspartner nach Nachrichtenvolumen
|
||||
SELECT
|
||||
c.display_name,
|
||||
m.key_remote_jid,
|
||||
COUNT(*) as nachrichten_gesamt,
|
||||
SUM(CASE WHEN m.key_from_me = 1 THEN 1 ELSE 0 END) as gesendet,
|
||||
SUM(CASE WHEN m.key_from_me = 0 THEN 1 ELSE 0 END) as empfangen,
|
||||
MIN(datetime(m.timestamp/1000, 'unixepoch', 'localtime')) as erster_kontakt,
|
||||
MAX(datetime(m.timestamp/1000, 'unixepoch', 'localtime')) as letzter_kontakt
|
||||
FROM messages m
|
||||
LEFT JOIN wa_contacts c ON m.key_remote_jid = c.jid
|
||||
GROUP BY m.key_remote_jid
|
||||
HAVING nachrichten_gesamt > 10
|
||||
ORDER BY nachrichten_gesamt DESC;
|
||||
```
|
||||
|
||||
### Kommunikationsmuster-Analyse
|
||||
|
||||
```sql
|
||||
-- Tägliche Aktivitätsmuster
|
||||
SELECT
|
||||
strftime('%H', timestamp/1000, 'unixepoch', 'localtime') as stunde,
|
||||
COUNT(*) as nachrichten_anzahl,
|
||||
AVG(length(data)) as durchschnittliche_laenge
|
||||
FROM messages
|
||||
WHERE timestamp > 0 AND data IS NOT NULL
|
||||
GROUP BY stunde
|
||||
ORDER BY stunde;
|
||||
```
|
||||
|
||||
```sql
|
||||
-- Verdächtige Aktivitätsspitzen identifizieren
|
||||
WITH hourly_stats AS (
|
||||
SELECT
|
||||
date(timestamp/1000, 'unixepoch', 'localtime') as tag,
|
||||
strftime('%H', timestamp/1000, 'unixepoch', 'localtime') as stunde,
|
||||
COUNT(*) as nachrichten_pro_stunde
|
||||
FROM messages
|
||||
WHERE timestamp > 0
|
||||
GROUP BY tag, stunde
|
||||
),
|
||||
avg_per_hour AS (
|
||||
SELECT stunde, AVG(nachrichten_pro_stunde) as durchschnitt
|
||||
FROM hourly_stats
|
||||
GROUP BY stunde
|
||||
)
|
||||
SELECT
|
||||
h.tag,
|
||||
h.stunde,
|
||||
h.nachrichten_pro_stunde,
|
||||
a.durchschnitt,
|
||||
ROUND((h.nachrichten_pro_stunde - a.durchschnitt) / a.durchschnitt * 100, 2) as abweichung_prozent
|
||||
FROM hourly_stats h
|
||||
JOIN avg_per_hour a ON h.stunde = a.stunde
|
||||
WHERE h.nachrichten_pro_stunde > a.durchschnitt * 2
|
||||
ORDER BY abweichung_prozent DESC;
|
||||
```
|
||||
|
||||
## Browser-Forensik: Digitale Spuren verfolgen
|
||||
|
||||
### Chrome/Chromium History-Analyse
|
||||
|
||||
```sql
|
||||
-- Browser-History mit Besuchshäufigkeit
|
||||
SELECT
|
||||
url,
|
||||
title,
|
||||
visit_count,
|
||||
datetime(last_visit_time/1000000-11644473600, 'unixepoch', 'localtime') as letzter_besuch,
|
||||
CASE
|
||||
WHEN typed_count > 0 THEN 'Direkt eingegeben'
|
||||
ELSE 'Über Link/Verlauf'
|
||||
END as zugriff_art
|
||||
FROM urls
|
||||
WHERE last_visit_time > 0
|
||||
ORDER BY last_visit_time DESC
|
||||
LIMIT 100;
|
||||
```
|
||||
|
||||
### Such-Verlauf analysieren
|
||||
|
||||
```sql
|
||||
-- Google-Suchen aus Browser-History extrahieren
|
||||
SELECT
|
||||
datetime(last_visit_time/1000000-11644473600, 'unixepoch', 'localtime') as suchzeit,
|
||||
CASE
|
||||
WHEN url LIKE '%google.com/search%' THEN
|
||||
replace(substr(url, instr(url, 'q=') + 2,
|
||||
case when instr(substr(url, instr(url, 'q=') + 2), '&') > 0
|
||||
then instr(substr(url, instr(url, 'q=') + 2), '&') - 1
|
||||
else length(url) end), '+', ' ')
|
||||
ELSE 'Andere Suchmaschine'
|
||||
END as suchbegriff,
|
||||
url
|
||||
FROM urls
|
||||
WHERE url LIKE '%search%' OR url LIKE '%q=%'
|
||||
ORDER BY last_visit_time DESC;
|
||||
```
|
||||
|
||||
## Anomalie-Erkennung mit SQL
|
||||
|
||||
### Ungewöhnliche Datei-Zugriffe identifizieren
|
||||
|
||||
```sql
|
||||
-- Dateizugriffe außerhalb der Arbeitszeiten
|
||||
WITH file_access AS (
|
||||
SELECT
|
||||
datetime(timestamp, 'unixepoch', 'localtime') as zugriffszeit,
|
||||
strftime('%H', timestamp, 'unixepoch', 'localtime') as stunde,
|
||||
strftime('%w', timestamp, 'unixepoch', 'localtime') as wochentag,
|
||||
file_path,
|
||||
action_type
|
||||
FROM file_access_logs
|
||||
)
|
||||
SELECT *
|
||||
FROM file_access
|
||||
WHERE (
|
||||
stunde < '08' OR stunde > '18' OR -- Außerhalb 8-18 Uhr
|
||||
wochentag IN ('0', '6') -- Wochenende
|
||||
) AND action_type IN ('read', 'write', 'delete')
|
||||
ORDER BY zugriffszeit DESC;
|
||||
```
|
||||
|
||||
### Datenexfiltration-Indikatoren
|
||||
|
||||
```sql
|
||||
-- Große Dateiübertragungen in kurzen Zeiträumen
|
||||
SELECT
|
||||
datetime(transfer_start, 'unixepoch', 'localtime') as start_zeit,
|
||||
SUM(file_size) as gesamt_bytes,
|
||||
COUNT(*) as anzahl_dateien,
|
||||
destination_ip,
|
||||
GROUP_CONCAT(DISTINCT file_extension) as dateitypen
|
||||
FROM network_transfers
|
||||
WHERE transfer_start BETWEEN
|
||||
strftime('%s', 'now', '-7 days') AND strftime('%s', 'now')
|
||||
GROUP BY
|
||||
date(transfer_start, 'unixepoch', 'localtime'),
|
||||
strftime('%H', transfer_start, 'unixepoch', 'localtime'),
|
||||
destination_ip
|
||||
HAVING gesamt_bytes > 100000000 -- > 100MB
|
||||
ORDER BY gesamt_bytes DESC;
|
||||
```
|
||||
|
||||
## Erweiterte Techniken: Window Functions und CTEs
|
||||
|
||||
### Sliding Window-Analyse für Ereigniskorrelation
|
||||
|
||||
```sql
|
||||
-- Ereignisse in 5-Minuten-Fenstern korrelieren
|
||||
WITH event_windows AS (
|
||||
SELECT
|
||||
datetime(timestamp, 'unixepoch', 'localtime') as ereigniszeit,
|
||||
event_type,
|
||||
user_id,
|
||||
LAG(timestamp, 1) OVER (PARTITION BY user_id ORDER BY timestamp) as prev_timestamp,
|
||||
LEAD(timestamp, 1) OVER (PARTITION BY user_id ORDER BY timestamp) as next_timestamp
|
||||
FROM security_events
|
||||
ORDER BY timestamp
|
||||
)
|
||||
SELECT
|
||||
ereigniszeit,
|
||||
event_type,
|
||||
user_id,
|
||||
CASE
|
||||
WHEN (timestamp - prev_timestamp) < 300 THEN 'Schnelle Aufeinanderfolge'
|
||||
WHEN (next_timestamp - timestamp) < 300 THEN 'Vor schnellem Event'
|
||||
ELSE 'Isoliert'
|
||||
END as ereignis_kontext
|
||||
FROM event_windows;
|
||||
```
|
||||
|
||||
### Temporäre Anomalie-Scores
|
||||
|
||||
```sql
|
||||
-- Anomalie-Score basierend auf Abweichung vom Normalverhalten
|
||||
WITH user_baseline AS (
|
||||
SELECT
|
||||
user_id,
|
||||
AVG(daily_logins) as avg_logins,
|
||||
STDEV(daily_logins) as stddev_logins
|
||||
FROM (
|
||||
SELECT
|
||||
user_id,
|
||||
date(login_time, 'unixepoch', 'localtime') as login_date,
|
||||
COUNT(*) as daily_logins
|
||||
FROM user_logins
|
||||
WHERE login_time > strftime('%s', 'now', '-30 days')
|
||||
GROUP BY user_id, login_date
|
||||
)
|
||||
GROUP BY user_id
|
||||
HAVING COUNT(*) > 7 -- Mindestens 7 Tage Daten
|
||||
),
|
||||
current_behavior AS (
|
||||
SELECT
|
||||
user_id,
|
||||
date(login_time, 'unixepoch', 'localtime') as login_date,
|
||||
COUNT(*) as daily_logins
|
||||
FROM user_logins
|
||||
WHERE login_time > strftime('%s', 'now', '-7 days')
|
||||
GROUP BY user_id, login_date
|
||||
)
|
||||
SELECT
|
||||
c.user_id,
|
||||
c.login_date,
|
||||
c.daily_logins,
|
||||
b.avg_logins,
|
||||
ROUND(ABS(c.daily_logins - b.avg_logins) / b.stddev_logins, 2) as anomalie_score
|
||||
FROM current_behavior c
|
||||
JOIN user_baseline b ON c.user_id = b.user_id
|
||||
WHERE anomalie_score > 2.0 -- Mehr als 2 Standardabweichungen
|
||||
ORDER BY anomalie_score DESC;
|
||||
```
|
||||
|
||||
## Python-Integration für Automatisierung
|
||||
|
||||
### SQLite-Forensik mit Python
|
||||
|
||||
```python
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
class ForensicSQLAnalyzer:
|
||||
def __init__(self, db_path):
|
||||
self.conn = sqlite3.connect(db_path)
|
||||
self.conn.row_factory = sqlite3.Row
|
||||
|
||||
def extract_timeline(self, start_date=None, end_date=None):
|
||||
"""Timeline-Extraktion mit Datumsfilterung"""
|
||||
query = """
|
||||
SELECT
|
||||
datetime(timestamp/1000, 'unixepoch', 'localtime') as timestamp,
|
||||
event_type,
|
||||
details,
|
||||
user_context
|
||||
FROM events
|
||||
WHERE 1=1
|
||||
"""
|
||||
|
||||
params = []
|
||||
if start_date:
|
||||
query += " AND timestamp >= ?"
|
||||
params.append(int(start_date.timestamp() * 1000))
|
||||
if end_date:
|
||||
query += " AND timestamp <= ?"
|
||||
params.append(int(end_date.timestamp() * 1000))
|
||||
|
||||
query += " ORDER BY timestamp"
|
||||
|
||||
return pd.read_sql_query(query, self.conn, params=params)
|
||||
|
||||
def communication_analysis(self):
|
||||
"""Kommunikationsmuster analysieren"""
|
||||
query = """
|
||||
SELECT
|
||||
contact_id,
|
||||
COUNT(*) as message_count,
|
||||
AVG(message_length) as avg_length,
|
||||
MIN(timestamp) as first_contact,
|
||||
MAX(timestamp) as last_contact
|
||||
FROM messages
|
||||
GROUP BY contact_id
|
||||
HAVING message_count > 5
|
||||
ORDER BY message_count DESC
|
||||
"""
|
||||
|
||||
return pd.read_sql_query(query, self.conn)
|
||||
|
||||
def detect_anomalies(self, threshold=2.0):
|
||||
"""Statistische Anomalie-Erkennung"""
|
||||
query = """
|
||||
WITH daily_stats AS (
|
||||
SELECT
|
||||
date(timestamp, 'unixepoch', 'localtime') as day,
|
||||
COUNT(*) as daily_events
|
||||
FROM events
|
||||
GROUP BY day
|
||||
),
|
||||
stats AS (
|
||||
SELECT
|
||||
AVG(daily_events) as mean_events,
|
||||
STDEV(daily_events) as stddev_events
|
||||
FROM daily_stats
|
||||
)
|
||||
SELECT
|
||||
d.day,
|
||||
d.daily_events,
|
||||
s.mean_events,
|
||||
ABS(d.daily_events - s.mean_events) / s.stddev_events as z_score
|
||||
FROM daily_stats d, stats s
|
||||
WHERE z_score > ?
|
||||
ORDER BY z_score DESC
|
||||
"""
|
||||
|
||||
return pd.read_sql_query(query, self.conn, params=[threshold])
|
||||
|
||||
def export_findings(self, filename):
|
||||
"""Ermittlungsergebnisse exportieren"""
|
||||
timeline = self.extract_timeline()
|
||||
comms = self.communication_analysis()
|
||||
anomalies = self.detect_anomalies()
|
||||
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
timeline.to_excel(writer, sheet_name='Timeline', index=False)
|
||||
comms.to_excel(writer, sheet_name='Communications', index=False)
|
||||
anomalies.to_excel(writer, sheet_name='Anomalies', index=False)
|
||||
|
||||
# Verwendung
|
||||
analyzer = ForensicSQLAnalyzer('/path/to/evidence.db')
|
||||
findings = analyzer.export_findings('investigation_findings.xlsx')
|
||||
```
|
||||
|
||||
## Häufige Fallstricke und Best Practices
|
||||
|
||||
### Datenintegrität sicherstellen
|
||||
|
||||
```sql
|
||||
-- Konsistenz-Checks vor Analyse
|
||||
SELECT
|
||||
'Null Timestamps' as issue_type,
|
||||
COUNT(*) as count
|
||||
FROM messages
|
||||
WHERE timestamp IS NULL OR timestamp = 0
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'Missing Contact Info' as issue_type,
|
||||
COUNT(*) as count
|
||||
FROM messages m
|
||||
LEFT JOIN wa_contacts c ON m.key_remote_jid = c.jid
|
||||
WHERE c.jid IS NULL;
|
||||
```
|
||||
|
||||
### Performance-Optimierung
|
||||
|
||||
```sql
|
||||
-- Index für häufige Abfragen erstellen
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_timestamp
|
||||
ON messages(timestamp);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_messages_contact_timestamp
|
||||
ON messages(key_remote_jid, timestamp);
|
||||
|
||||
-- Query-Performance analysieren
|
||||
EXPLAIN QUERY PLAN
|
||||
SELECT * FROM messages
|
||||
WHERE timestamp BETWEEN ? AND ?
|
||||
ORDER BY timestamp;
|
||||
```
|
||||
|
||||
### Forensische Dokumentation
|
||||
|
||||
```sql
|
||||
-- Metadaten für Gerichtsverwertbarkeit dokumentieren
|
||||
SELECT
|
||||
'Database Schema Version' as info_type,
|
||||
user_version as value
|
||||
FROM pragma_user_version
|
||||
|
||||
UNION ALL
|
||||
|
||||
SELECT
|
||||
'Last Modified',
|
||||
datetime(mtime, 'unixepoch', 'localtime')
|
||||
FROM pragma_file_control;
|
||||
```
|
||||
|
||||
## Spezialisierte Forensik-Szenarien
|
||||
|
||||
### Mobile App-Forensik: Instagram-Datenbank
|
||||
|
||||
```sql
|
||||
-- Instagram-Nachrichten mit Medien-Metadaten
|
||||
SELECT
|
||||
datetime(m.timestamp/1000, 'unixepoch', 'localtime') as nachricht_zeit,
|
||||
u.username as absender,
|
||||
CASE
|
||||
WHEN m.item_type = 1 THEN 'Text: ' || m.text
|
||||
WHEN m.item_type = 2 THEN 'Bild: ' || mi.media_url
|
||||
WHEN m.item_type = 3 THEN 'Video: ' || mi.media_url
|
||||
ELSE 'Anderer Typ: ' || m.item_type
|
||||
END as inhalt,
|
||||
m.thread_key as chat_id
|
||||
FROM direct_messages m
|
||||
LEFT JOIN users u ON m.user_id = u.pk
|
||||
LEFT JOIN media_items mi ON m.media_id = mi.id
|
||||
WHERE m.timestamp > 0
|
||||
ORDER BY m.timestamp DESC;
|
||||
```
|
||||
|
||||
### Incident Response: Systemprotokoll-Korrelation
|
||||
|
||||
```sql
|
||||
-- Korrelation zwischen Login-Events und Netzwerk-Aktivität
|
||||
WITH suspicious_logins AS (
|
||||
SELECT
|
||||
login_time,
|
||||
user_id,
|
||||
source_ip,
|
||||
login_time + 3600 as investigation_window -- 1 Stunde nach Login
|
||||
FROM login_events
|
||||
WHERE source_ip NOT LIKE '192.168.%' -- Externe IPs
|
||||
AND login_time > strftime('%s', 'now', '-7 days')
|
||||
),
|
||||
network_activity AS (
|
||||
SELECT
|
||||
connection_time,
|
||||
source_ip,
|
||||
destination_ip,
|
||||
bytes_transferred,
|
||||
protocol
|
||||
FROM network_connections
|
||||
)
|
||||
SELECT
|
||||
datetime(sl.login_time, 'unixepoch', 'localtime') as verdaechtiger_login,
|
||||
sl.user_id,
|
||||
sl.source_ip as login_ip,
|
||||
COUNT(na.connection_time) as netzwerk_aktivitaeten,
|
||||
SUM(na.bytes_transferred) as gesamt_daten_bytes,
|
||||
GROUP_CONCAT(DISTINCT na.destination_ip) as ziel_ips
|
||||
FROM suspicious_logins sl
|
||||
LEFT JOIN network_activity na ON
|
||||
na.connection_time BETWEEN sl.login_time AND sl.investigation_window
|
||||
AND na.source_ip = sl.source_ip
|
||||
GROUP BY sl.login_time, sl.user_id, sl.source_ip
|
||||
HAVING netzwerk_aktivitaeten > 0
|
||||
ORDER BY gesamt_daten_bytes DESC;
|
||||
```
|
||||
|
||||
## Erweiterte WAL-Analyse und Recovery
|
||||
|
||||
### WAL-Datei Untersuchung
|
||||
|
||||
```sql
|
||||
-- WAL-Mode Status prüfen
|
||||
PRAGMA journal_mode;
|
||||
PRAGMA wal_checkpoint;
|
||||
|
||||
-- Uncommitted transactions in WAL identifizieren
|
||||
-- Hinweis: Erfordert spezielle Tools oder Hex-Editor
|
||||
-- Zeigt Konzept für manuelle Analyse
|
||||
|
||||
SELECT
|
||||
name,
|
||||
rootpage,
|
||||
sql
|
||||
FROM sqlite_master
|
||||
WHERE type = 'table'
|
||||
ORDER BY name;
|
||||
```
|
||||
|
||||
### Gelöschte Daten-Recovery
|
||||
|
||||
```python
|
||||
# Python-Script für erweiterte SQLite-Recovery
|
||||
import sqlite3
|
||||
import struct
|
||||
import os
|
||||
|
||||
class SQLiteForensics:
|
||||
def __init__(self, db_path):
|
||||
self.db_path = db_path
|
||||
self.page_size = self.get_page_size()
|
||||
|
||||
def get_page_size(self):
|
||||
"""SQLite Page-Size ermitteln"""
|
||||
with open(self.db_path, 'rb') as f:
|
||||
f.seek(16) # Page size offset
|
||||
return struct.unpack('>H', f.read(2))[0]
|
||||
|
||||
def analyze_freespace(self):
|
||||
"""Freespace auf gelöschte Records analysieren"""
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Freespace-Informationen sammeln
|
||||
cursor.execute("PRAGMA freelist_count;")
|
||||
free_pages = cursor.fetchone()[0]
|
||||
|
||||
cursor.execute("PRAGMA page_count;")
|
||||
total_pages = cursor.fetchone()[0]
|
||||
|
||||
recovery_potential = {
|
||||
'total_pages': total_pages,
|
||||
'free_pages': free_pages,
|
||||
'recovery_potential': f"{(free_pages/total_pages)*100:.2f}%"
|
||||
}
|
||||
|
||||
conn.close()
|
||||
return recovery_potential
|
||||
|
||||
def extract_unallocated(self):
|
||||
"""Unallocated Space für Recovery extrahieren"""
|
||||
# Vereinfachtes Beispiel - echte Implementation erfordert
|
||||
# detaillierte SQLite-Interna-Kenntnisse
|
||||
unallocated_data = []
|
||||
|
||||
with open(self.db_path, 'rb') as f:
|
||||
file_size = os.path.getsize(self.db_path)
|
||||
pages = file_size // self.page_size
|
||||
|
||||
for page_num in range(1, pages + 1):
|
||||
f.seek((page_num - 1) * self.page_size)
|
||||
page_data = f.read(self.page_size)
|
||||
|
||||
# Suche nach Text-Patterns in Freespace
|
||||
# (Vereinfacht - echte Recovery ist komplexer)
|
||||
if b'WhatsApp' in page_data or b'@' in page_data:
|
||||
unallocated_data.append({
|
||||
'page': page_num,
|
||||
'potential_data': page_data[:100] # Erste 100 Bytes
|
||||
})
|
||||
|
||||
return unallocated_data
|
||||
|
||||
# Verwendung für Recovery-Assessment
|
||||
forensics = SQLiteForensics('/path/to/damaged.db')
|
||||
recovery_info = forensics.analyze_freespace()
|
||||
print(f"Recovery-Potenzial: {recovery_info['recovery_potential']}")
|
||||
```
|
||||
|
||||
## Compliance und Rechtssicherheit
|
||||
|
||||
### Audit-Trail erstellen
|
||||
|
||||
```sql
|
||||
-- Forensische Dokumentation aller durchgeführten Abfragen
|
||||
CREATE TABLE IF NOT EXISTS forensic_audit_log (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
|
||||
investigator TEXT,
|
||||
query_type TEXT,
|
||||
sql_query TEXT,
|
||||
affected_rows INTEGER,
|
||||
case_number TEXT,
|
||||
notes TEXT
|
||||
);
|
||||
|
||||
-- Beispiel-Eintrag
|
||||
INSERT INTO forensic_audit_log
|
||||
(investigator, query_type, sql_query, affected_rows, case_number, notes)
|
||||
VALUES
|
||||
('Max Mustermann', 'TIMELINE_EXTRACTION',
|
||||
'SELECT * FROM messages WHERE timestamp BETWEEN ? AND ?',
|
||||
1247, 'CASE-2024-001',
|
||||
'Timeline-Extraktion für Zeitraum 01.01.2024 - 31.01.2024');
|
||||
```
|
||||
|
||||
### Hash-Verifikation implementieren
|
||||
|
||||
```python
|
||||
import hashlib
|
||||
import sqlite3
|
||||
|
||||
def verify_database_integrity(db_path, expected_hash=None):
|
||||
"""Datenbank-Integrität durch Hash-Verifikation prüfen"""
|
||||
|
||||
# SHA-256 Hash der Datenbankdatei
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(db_path, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(chunk)
|
||||
|
||||
current_hash = sha256_hash.hexdigest()
|
||||
|
||||
# Zusätzlich: Struktureller Integritäts-Check
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
try:
|
||||
cursor.execute("PRAGMA integrity_check;")
|
||||
integrity_result = cursor.fetchall()
|
||||
is_structurally_intact = integrity_result == [('ok',)]
|
||||
except Exception as e:
|
||||
is_structurally_intact = False
|
||||
integrity_result = [f"Error: {str(e)}"]
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
return {
|
||||
'file_hash': current_hash,
|
||||
'hash_matches': current_hash == expected_hash if expected_hash else None,
|
||||
'structurally_intact': is_structurally_intact,
|
||||
'integrity_details': integrity_result,
|
||||
'verified_at': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Chain of Custody dokumentieren
|
||||
def log_database_access(db_path, investigator, purpose):
|
||||
"""Datenbankzugriff für Chain of Custody protokollieren"""
|
||||
verification = verify_database_integrity(db_path)
|
||||
|
||||
log_entry = {
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'investigator': investigator,
|
||||
'database_path': db_path,
|
||||
'access_purpose': purpose,
|
||||
'pre_access_hash': verification['file_hash'],
|
||||
'database_integrity': verification['structurally_intact']
|
||||
}
|
||||
|
||||
# Log in separater Audit-Datei speichern
|
||||
with open('forensic_access_log.json', 'a') as log_file:
|
||||
json.dump(log_entry, log_file)
|
||||
log_file.write('\n')
|
||||
|
||||
return log_entry
|
||||
```
|
||||
|
||||
## Fazit und Weiterführende Ressourcen
|
||||
|
||||
SQL in der digitalen Forensik ist mehr als nur Datenbankabfragen - es ist ein mächtiges Werkzeug für:
|
||||
|
||||
- **Timeline-Rekonstruktion** mit präziser zeitlicher Korrelation
|
||||
- **Kommunikationsanalyse** für soziale Netzwerk-Aufklärung
|
||||
- **Anomalie-Erkennung** durch statistische Analyse
|
||||
- **Automatisierung** wiederkehrender Untersuchungsschritte
|
||||
- **Tiefe Datenextraktion** jenseits GUI-Limitationen
|
||||
|
||||
### Nächste Schritte
|
||||
|
||||
1. **Praktische Übung**: Beginnen Sie mit einfachen WhatsApp-Datenbank-Analysen
|
||||
2. **Tool-Integration**: Kombinieren Sie SQL mit Python für erweiterte Analysen
|
||||
3. **Spezialisierung**: Vertiefen Sie mobile-spezifische oder Browser-Forensik
|
||||
4. **Automation**: Entwickeln Sie wiederverwendbare SQL-Scripts für häufige Szenarien
|
||||
5. **Rechtssicherheit**: Implementieren Sie Audit-Trails und Hash-Verifikation
|
||||
|
||||
### Empfohlene Tools
|
||||
|
||||
- **DB Browser for SQLite**: GUI für interaktive Exploration
|
||||
- **SQLiteStudio**: Erweiterte SQLite-Verwaltung
|
||||
- **Python sqlite3**: Programmbasierte Automatisierung
|
||||
- **Autopsy**: Integration in forensische Workflows
|
||||
- **Cellebrite UFED**: Mobile Forensik mit SQL-Export
|
||||
|
||||
Die Kombination aus SQL-Kenntnissen und forensischem Verständnis macht moderne Ermittler zu hocheffizienten Datenanalytikern. In einer Welt zunehmender Datenmengen wird diese Fähigkeit zum entscheidenden Wettbewerbsvorteil.
|
||||
@@ -1,601 +0,0 @@
|
||||
---
|
||||
title: "Timeline-Analyse & Event-Korrelation: Methodische Rekonstruktion forensischer Ereignisse"
|
||||
description: "Umfassende Anleitung zur systematischen Timeline-Erstellung aus heterogenen Datenquellen, Super-Timeline-Processing und Advanced-Correlation-Techniken für komplexe Incident-Response-Szenarien."
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
last_updated: 2025-08-10
|
||||
difficulty: advanced
|
||||
categories: ["analysis", "methodology", "incident-response"]
|
||||
tags: ["timeline-correlation", "event-sequencing", "temporal-analysis", "super-timeline", "pivot-points", "behavioral-patterns", "anomaly-detection", "anti-forensics-detection", "incident-response", "log2timeline", "plaso"]
|
||||
tool_name: "Timeline Analysis & Event Correlation"
|
||||
related_tools: ["Autopsy", "Volatility", "Wireshark", "SIFT Workstation"]
|
||||
published: true
|
||||
---
|
||||
|
||||
# Timeline-Analyse & Event-Korrelation: Methodische Rekonstruktion forensischer Ereignisse
|
||||
|
||||
Timeline-Analyse bildet das Rückgrat moderner forensischer Untersuchungen und ermöglicht die chronologische Rekonstruktion von Ereignissen aus heterogenen digitalen Artefakten. Diese methodische Herangehensweise korreliert zeitbasierte Evidenz für präzise Incident-Response und belastbare Beweisführung.
|
||||
|
||||
## Grundlagen der forensischen Timeline-Analyse
|
||||
|
||||
### Was ist Timeline-Analyse?
|
||||
|
||||
Timeline-Analyse ist die systematische Korrelation zeitbasierter Artefakte aus verschiedenen digitalen Quellen zur Rekonstruktion von Ereignissequenzen. Sie ermöglicht Forensikern, das "Was", "Wann", "Wo" und "Wie" von Sicherheitsvorfällen zu verstehen.
|
||||
|
||||
**Kernprinzipien:**
|
||||
- **Chronologische Ordnung**: Alle Ereignisse werden in temporaler Reihenfolge arrangiert
|
||||
- **Multi-Source-Integration**: Daten aus verschiedenen Systemen werden vereint
|
||||
- **Zeitstempel-Normalisierung**: UTC-Konvertierung für einheitliche Referenz
|
||||
- **Korrelationsbasierte Analyse**: Zusammenhänge zwischen scheinbar unabhängigen Events
|
||||
|
||||
### Typologie forensischer Zeitstempel
|
||||
|
||||
**MAC-Times (Modified, Accessed, Created)**
|
||||
```
|
||||
Filesystem-Timestamps:
|
||||
- $STANDARD_INFORMATION (SI) - NTFS-Metadaten
|
||||
- $FILE_NAME (FN) - Directory-Entry-Timestamps
|
||||
- Born Date - Erste Erstellung im Filesystem
|
||||
- $USNJrnl - Change Journal Entries
|
||||
```
|
||||
|
||||
**Registry-Timestamps**
|
||||
```
|
||||
Windows Registry:
|
||||
- Key Last Write Time - Letzte Modifikation
|
||||
- Value Creation Time - Wert-Erstellung
|
||||
- Hive Load Time - Registry-Hive-Mounting
|
||||
```
|
||||
|
||||
**Event-Log-Timestamps**
|
||||
```
|
||||
Windows Event Logs:
|
||||
- TimeCreated - Event-Generierung
|
||||
- TimeWritten - Log-Persistierung
|
||||
- CorrelationActivityID - Cross-System-Tracking
|
||||
```
|
||||
|
||||
## Super-Timeline-Erstellung: Methodisches Vorgehen
|
||||
|
||||
### Phase 1: Artefakt-Akquisition und Preprocessing
|
||||
|
||||
**Datenquellen-Inventar erstellen:**
|
||||
|
||||
```bash
|
||||
# Filesystem-Timeline mit fls
|
||||
fls -r -p -m /mnt/evidence/image.dd > filesystem_timeline.body
|
||||
|
||||
# Registry-Timeline mit regtime
|
||||
regtime.py -r /mnt/evidence/registry/ > registry_timeline.csv
|
||||
|
||||
# Event-Log-Extraktion mit python-evtx
|
||||
evtx_dump.py Security.evtx > security_events.xml
|
||||
```
|
||||
|
||||
**Memory-Artefakte integrieren:**
|
||||
```bash
|
||||
# Volatility Timeline-Generierung
|
||||
vol.py -f memory.vmem --profile=Win10x64 timeliner > memory_timeline.csv
|
||||
|
||||
# Process-Timeline mit detailed Metadata
|
||||
vol.py -f memory.vmem --profile=Win10x64 pslist -v > process_details.txt
|
||||
```
|
||||
|
||||
### Phase 2: Zeitstempel-Normalisierung und UTC-Konvertierung
|
||||
|
||||
**Timezone-Handling:**
|
||||
```python
|
||||
# Python-Script für Timezone-Normalisierung
|
||||
import datetime
|
||||
import pytz
|
||||
|
||||
def normalize_timestamp(timestamp_str, source_timezone):
|
||||
"""
|
||||
Konvertiert lokale Timestamps zu UTC für einheitliche Timeline
|
||||
"""
|
||||
local_tz = pytz.timezone(source_timezone)
|
||||
dt = datetime.datetime.strptime(timestamp_str, '%Y-%m-%d %H:%M:%S')
|
||||
localized_dt = local_tz.localize(dt)
|
||||
utc_dt = localized_dt.astimezone(pytz.utc)
|
||||
return utc_dt.strftime('%Y-%m-%d %H:%M:%S UTC')
|
||||
```
|
||||
|
||||
**Anti-Timestomp-Detection:**
|
||||
```bash
|
||||
# Timestomp-Anomalien identifizieren
|
||||
analyzeMFT.py -f $MFT -o mft_analysis.csv
|
||||
# Suche nach: SI-Time < FN-Time (Timestomp-Indikator)
|
||||
```
|
||||
|
||||
### Phase 3: Log2timeline/PLASO Super-Timeline-Processing
|
||||
|
||||
**PLASO-basierte Timeline-Generierung:**
|
||||
```bash
|
||||
# Multi-Source-Timeline mit log2timeline
|
||||
log2timeline.py --storage-file evidence.plaso \
|
||||
--parsers "win7,chrome,firefox,skype" \
|
||||
--timezone "Europe/Berlin" \
|
||||
/mnt/evidence/
|
||||
|
||||
# CSV-Export für Analysis
|
||||
psort.py -w timeline_super.csv evidence.plaso
|
||||
```
|
||||
|
||||
**Advanced PLASO-Filtering:**
|
||||
```bash
|
||||
# Zeitfenster-spezifische Extraktion
|
||||
psort.py -w incident_window.csv \
|
||||
--date-filter "2024-01-10,2024-01-12" \
|
||||
evidence.plaso
|
||||
|
||||
# Ereignis-spezifisches Filtering
|
||||
psort.py -w web_activity.csv \
|
||||
--filter "parser contains 'chrome'" \
|
||||
evidence.plaso
|
||||
```
|
||||
|
||||
## Advanced Correlation-Techniken
|
||||
|
||||
### Pivot-Point-Identifikation
|
||||
|
||||
**Initial Compromise Detection:**
|
||||
```sql
|
||||
-- SQL-basierte Timeline-Analyse (bei CSV-Import in DB)
|
||||
SELECT timestamp, source, event_type, description
|
||||
FROM timeline
|
||||
WHERE description LIKE '%powershell%'
|
||||
OR description LIKE '%cmd.exe%'
|
||||
OR description LIKE '%rundll32%'
|
||||
ORDER BY timestamp;
|
||||
```
|
||||
|
||||
**Lateral Movement Patterns:**
|
||||
```python
|
||||
# Python-Script für Lateral-Movement-Detection
|
||||
def detect_lateral_movement(timeline_data):
|
||||
"""
|
||||
Identifiziert suspicious Login-Patterns über Zeitfenster
|
||||
"""
|
||||
login_events = timeline_data[
|
||||
timeline_data['event_type'].str.contains('4624|4625', na=False)
|
||||
]
|
||||
|
||||
# Gruppierung nach Source-IP und Zeitfenster-Analyse
|
||||
suspicious_logins = login_events.groupby(['source_ip']).apply(
|
||||
lambda x: len(x[x['timestamp'].diff().dt.seconds < 300]) > 5
|
||||
)
|
||||
|
||||
return suspicious_logins[suspicious_logins == True]
|
||||
```
|
||||
|
||||
### Behavioral Pattern Recognition
|
||||
|
||||
**User Activity Profiling:**
|
||||
```bash
|
||||
# Regelmäßige Aktivitätsmuster extrahieren
|
||||
grep -E "(explorer\.exe|chrome\.exe|outlook\.exe)" timeline.csv | \
|
||||
awk -F',' '{print substr($1,1,10), $3}' | \
|
||||
sort | uniq -c | sort -nr
|
||||
```
|
||||
|
||||
**Anomalie-Detection durch Statistical Analysis:**
|
||||
```python
|
||||
import pandas as pd
|
||||
from scipy import stats
|
||||
|
||||
def detect_activity_anomalies(timeline_df):
|
||||
"""
|
||||
Identifiziert ungewöhnliche Aktivitätsmuster via Z-Score
|
||||
"""
|
||||
# Aktivität pro Stunde aggregieren
|
||||
timeline_df['hour'] = pd.to_datetime(timeline_df['timestamp']).dt.hour
|
||||
hourly_activity = timeline_df.groupby('hour').size()
|
||||
|
||||
# Z-Score Berechnung für Anomalie-Detection
|
||||
z_scores = stats.zscore(hourly_activity)
|
||||
anomalous_hours = hourly_activity[abs(z_scores) > 2]
|
||||
|
||||
return anomalous_hours
|
||||
```
|
||||
|
||||
## Network-Event-Korrelation
|
||||
|
||||
### Cross-System Timeline Correlation
|
||||
|
||||
**SIEM-Integration für Multi-Host-Korrelation:**
|
||||
```bash
|
||||
# Splunk-Query für korrelierte Events
|
||||
index=windows EventCode=4624 OR EventCode=4625 OR EventCode=4648
|
||||
| eval login_time=strftime(_time, "%Y-%m-%d %H:%M:%S")
|
||||
| stats values(EventCode) as event_codes by src_ip, login_time
|
||||
| where mvcount(event_codes) > 1
|
||||
```
|
||||
|
||||
**Network Flow Timeline Integration:**
|
||||
```python
|
||||
# Zeek/Bro-Logs mit Filesystem-Timeline korrelieren
|
||||
def correlate_network_filesystem(conn_logs, file_timeline):
|
||||
"""
|
||||
Korreliert Netzwerk-Connections mit File-Access-Patterns
|
||||
"""
|
||||
# Zeitfenster-basierte Korrelation (±30 Sekunden)
|
||||
correlations = []
|
||||
|
||||
for _, conn in conn_logs.iterrows():
|
||||
conn_time = pd.to_datetime(conn['ts'])
|
||||
time_window = pd.Timedelta(seconds=30)
|
||||
|
||||
related_files = file_timeline[
|
||||
(pd.to_datetime(file_timeline['timestamp']) >= conn_time - time_window) &
|
||||
(pd.to_datetime(file_timeline['timestamp']) <= conn_time + time_window)
|
||||
]
|
||||
|
||||
if not related_files.empty:
|
||||
correlations.append({
|
||||
'connection': conn,
|
||||
'related_files': related_files,
|
||||
'correlation_strength': len(related_files)
|
||||
})
|
||||
|
||||
return correlations
|
||||
```
|
||||
|
||||
## Anti-Forensik-Detection durch Timeline-Inkonsistenzen
|
||||
|
||||
### Timestamp Manipulation Detection
|
||||
|
||||
**Timestomp-Pattern-Analyse:**
|
||||
```bash
|
||||
# MFT-Analyse für Timestomp-Detection
|
||||
analyzeMFT.py -f \$MFT -o mft_full.csv
|
||||
|
||||
# Suspekte Timestamp-Patterns identifizieren
|
||||
python3 << EOF
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
mft_data = pd.read_csv('mft_full.csv')
|
||||
|
||||
# Pattern 1: SI-Time vor FN-Time (klassischer Timestomp)
|
||||
timestomp_candidates = mft_data[
|
||||
pd.to_datetime(mft_data['SI_Modified']) < pd.to_datetime(mft_data['FN_Modified'])
|
||||
]
|
||||
|
||||
# Pattern 2: Unrealistische Timestamps (z.B. 1980-01-01)
|
||||
epoch_anomalies = mft_data[
|
||||
pd.to_datetime(mft_data['SI_Created']).dt.year < 1990
|
||||
]
|
||||
|
||||
print(f"Potential Timestomp: {len(timestomp_candidates)} files")
|
||||
print(f"Epoch Anomalies: {len(epoch_anomalies)} files")
|
||||
EOF
|
||||
```
|
||||
|
||||
### Event Log Manipulation Detection
|
||||
|
||||
**Windows Event Log Gap Analysis:**
|
||||
```python
|
||||
def detect_log_gaps(event_log_df):
|
||||
"""
|
||||
Identifiziert verdächtige Lücken in Event-Log-Sequenzen
|
||||
"""
|
||||
# Event-Record-IDs sollten sequenziell sein
|
||||
event_log_df['RecordNumber'] = pd.to_numeric(event_log_df['RecordNumber'])
|
||||
event_log_df = event_log_df.sort_values('RecordNumber')
|
||||
|
||||
# Gaps in Record-Sequenz finden
|
||||
record_diffs = event_log_df['RecordNumber'].diff()
|
||||
large_gaps = record_diffs[record_diffs > 100] # Threshold anpassbar
|
||||
|
||||
return large_gaps
|
||||
```
|
||||
|
||||
## Automated Timeline Processing & ML-basierte Anomalie-Erkennung
|
||||
|
||||
### Machine Learning für Pattern Recognition
|
||||
|
||||
**Unsupervised Clustering für Event-Gruppierung:**
|
||||
```python
|
||||
from sklearn.cluster import DBSCAN
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
import pandas as pd
|
||||
|
||||
def cluster_timeline_events(timeline_df):
|
||||
"""
|
||||
Gruppiert ähnliche Events via DBSCAN-Clustering
|
||||
"""
|
||||
# TF-IDF für Event-Descriptions
|
||||
vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
|
||||
event_vectors = vectorizer.fit_transform(timeline_df['description'])
|
||||
|
||||
# DBSCAN-Clustering
|
||||
clustering = DBSCAN(eps=0.5, min_samples=5).fit(event_vectors.toarray())
|
||||
timeline_df['cluster'] = clustering.labels_
|
||||
|
||||
# Anomalie-Events (Cluster -1)
|
||||
anomalous_events = timeline_df[timeline_df['cluster'] == -1]
|
||||
|
||||
return timeline_df, anomalous_events
|
||||
```
|
||||
|
||||
**Time-Series-Anomalie-Detection:**
|
||||
```python
|
||||
from sklearn.ensemble import IsolationForest
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
def detect_temporal_anomalies(timeline_df):
|
||||
"""
|
||||
Isolation Forest für zeitbasierte Anomalie-Detection
|
||||
"""
|
||||
# Stündliche Aktivität aggregieren
|
||||
timeline_df['timestamp'] = pd.to_datetime(timeline_df['timestamp'])
|
||||
hourly_activity = timeline_df.groupby(
|
||||
timeline_df['timestamp'].dt.floor('H')
|
||||
).size().reset_index(name='event_count')
|
||||
|
||||
# Isolation Forest Training
|
||||
iso_forest = IsolationForest(contamination=0.1)
|
||||
anomaly_labels = iso_forest.fit_predict(
|
||||
hourly_activity[['event_count']]
|
||||
)
|
||||
|
||||
# Anomale Zeitfenster identifizieren
|
||||
hourly_activity['anomaly'] = anomaly_labels
|
||||
anomalous_periods = hourly_activity[hourly_activity['anomaly'] == -1]
|
||||
|
||||
return anomalous_periods
|
||||
```
|
||||
|
||||
## Enterprise-Scale Timeline Processing
|
||||
|
||||
### Distributed Processing für große Datasets
|
||||
|
||||
**Apache Spark für Big-Data-Timeline-Analyse:**
|
||||
```python
|
||||
from pyspark.sql import SparkSession
|
||||
from pyspark.sql.functions import *
|
||||
|
||||
def process_enterprise_timeline(spark_session, timeline_path):
|
||||
"""
|
||||
Spark-basierte Verarbeitung für TB-große Timeline-Daten
|
||||
"""
|
||||
# Timeline-Daten laden
|
||||
timeline_df = spark_session.read.csv(
|
||||
timeline_path,
|
||||
header=True,
|
||||
inferSchema=True
|
||||
)
|
||||
|
||||
# Zeitfenster-basierte Aggregation
|
||||
windowed_activity = timeline_df \
|
||||
.withColumn("timestamp", to_timestamp("timestamp")) \
|
||||
.withColumn("hour_window", window("timestamp", "1 hour")) \
|
||||
.groupBy("hour_window", "source_system") \
|
||||
.agg(
|
||||
count("*").alias("event_count"),
|
||||
countDistinct("user").alias("unique_users"),
|
||||
collect_set("event_type").alias("event_types")
|
||||
)
|
||||
|
||||
return windowed_activity
|
||||
```
|
||||
|
||||
### Cloud-Forensics Timeline Integration
|
||||
|
||||
**AWS CloudTrail Timeline Correlation:**
|
||||
```bash
|
||||
# CloudTrail-Events mit lokaler Timeline korrelieren
|
||||
aws logs filter-log-events \
|
||||
--log-group-name CloudTrail \
|
||||
--start-time 1642636800000 \
|
||||
--end-time 1642723200000 \
|
||||
--filter-pattern "{ $.eventName = \"AssumeRole\" }" \
|
||||
--output json > cloudtrail_events.json
|
||||
|
||||
# JSON zu CSV für Timeline-Integration
|
||||
jq -r '.events[] | [.eventTime, .sourceIPAddress, .eventName, .userIdentity.type] | @csv' \
|
||||
cloudtrail_events.json > cloudtrail_timeline.csv
|
||||
```
|
||||
|
||||
## Praktische Anwendungsszenarien
|
||||
|
||||
### Szenario 1: Advanced Persistent Threat (APT) Investigation
|
||||
|
||||
**Mehrstufige Timeline-Analyse:**
|
||||
|
||||
1. **Initial Compromise Detection:**
|
||||
```bash
|
||||
# Web-Browser-Downloads mit Malware-Signaturen korrelieren
|
||||
grep -E "(\.exe|\.zip|\.pdf)" browser_downloads.csv | \
|
||||
while read line; do
|
||||
timestamp=$(echo $line | cut -d',' -f1)
|
||||
filename=$(echo $line | cut -d',' -f3)
|
||||
|
||||
# Hash-Verification gegen IOC-Liste
|
||||
sha256=$(sha256sum "/mnt/evidence/$filename" 2>/dev/null | cut -d' ' -f1)
|
||||
grep -q "$sha256" ioc_hashes.txt && echo "IOC Match: $timestamp - $filename"
|
||||
done
|
||||
```
|
||||
|
||||
2. **Lateral Movement Tracking:**
|
||||
```sql
|
||||
-- Cross-System-Bewegung via RDP/SMB
|
||||
SELECT t1.timestamp, t1.source_ip, t2.timestamp, t2.dest_ip
|
||||
FROM network_timeline t1
|
||||
JOIN filesystem_timeline t2 ON
|
||||
t2.timestamp BETWEEN t1.timestamp AND t1.timestamp + INTERVAL 5 MINUTE
|
||||
WHERE t1.protocol = 'RDP' AND t2.activity_type = 'file_creation'
|
||||
ORDER BY t1.timestamp;
|
||||
```
|
||||
|
||||
### Szenario 2: Insider-Threat-Analyse
|
||||
|
||||
**Behavioral Baseline vs. Anomalie-Detection:**
|
||||
```python
|
||||
def analyze_insider_threat(user_timeline, baseline_days=30):
|
||||
"""
|
||||
Vergleicht User-Aktivität mit historischer Baseline
|
||||
"""
|
||||
# Baseline-Zeitraum definieren
|
||||
baseline_end = pd.to_datetime('2024-01-01')
|
||||
baseline_start = baseline_end - pd.Timedelta(days=baseline_days)
|
||||
|
||||
baseline_activity = user_timeline[
|
||||
(user_timeline['timestamp'] >= baseline_start) &
|
||||
(user_timeline['timestamp'] <= baseline_end)
|
||||
]
|
||||
|
||||
# Anomale Aktivitätsmuster
|
||||
analysis_period = user_timeline[
|
||||
user_timeline['timestamp'] > baseline_end
|
||||
]
|
||||
|
||||
# Metriken: Off-Hours-Activity, Data-Volume, Access-Patterns
|
||||
baseline_metrics = calculate_user_metrics(baseline_activity)
|
||||
current_metrics = calculate_user_metrics(analysis_period)
|
||||
|
||||
anomaly_score = compare_metrics(baseline_metrics, current_metrics)
|
||||
|
||||
return anomaly_score
|
||||
```
|
||||
|
||||
## Herausforderungen und Lösungsansätze
|
||||
|
||||
### Challenge 1: Timezone-Komplexität in Multi-Domain-Umgebungen
|
||||
|
||||
**Problem:** Inkonsistente Timezones zwischen Systemen führen zu falschen Korrelationen.
|
||||
|
||||
**Lösung:**
|
||||
```python
|
||||
def unified_timezone_conversion(timeline_entries):
|
||||
"""
|
||||
Intelligente Timezone-Detection und UTC-Normalisierung
|
||||
"""
|
||||
timezone_mapping = {
|
||||
'windows_local': 'Europe/Berlin',
|
||||
'unix_utc': 'UTC',
|
||||
'web_browser': 'client_timezone' # Aus Browser-Metadaten
|
||||
}
|
||||
|
||||
for entry in timeline_entries:
|
||||
source_tz = detect_timezone_from_source(entry['source'])
|
||||
entry['timestamp_utc'] = convert_to_utc(
|
||||
entry['timestamp'],
|
||||
timezone_mapping.get(source_tz, 'UTC')
|
||||
)
|
||||
|
||||
return timeline_entries
|
||||
```
|
||||
|
||||
### Challenge 2: Volume-Skalierung bei Enterprise-Investigations
|
||||
|
||||
**Problem:** TB-große Timeline-Daten überschreiten Memory-Kapazitäten.
|
||||
|
||||
**Lösung - Streaming-basierte Verarbeitung:**
|
||||
```python
|
||||
def stream_process_timeline(file_path, chunk_size=10000):
|
||||
"""
|
||||
Memory-effiziente Timeline-Processing via Chunks
|
||||
"""
|
||||
for chunk in pd.read_csv(file_path, chunksize=chunk_size):
|
||||
# Chunk-weise Verarbeitung
|
||||
processed_chunk = apply_timeline_analysis(chunk)
|
||||
|
||||
# Streaming-Output zu aggregated Results
|
||||
yield processed_chunk
|
||||
```
|
||||
|
||||
### Challenge 3: Anti-Forensik und Timeline-Manipulation
|
||||
|
||||
**Problem:** Adversaries manipulieren Timestamps zur Evidence-Destruction.
|
||||
|
||||
**Lösung - Multi-Source-Validation:**
|
||||
```bash
|
||||
# Cross-Reference-Validation zwischen verschiedenen Timestamp-Quellen
|
||||
python3 << EOF
|
||||
# $MFT vs. $UsnJrnl vs. Event-Logs vs. Registry
|
||||
def validate_timestamp_integrity(file_path):
|
||||
sources = {
|
||||
'mft_si': get_mft_si_time(file_path),
|
||||
'mft_fn': get_mft_fn_time(file_path),
|
||||
'usnjrnl': get_usnjrnl_time(file_path),
|
||||
'prefetch': get_prefetch_time(file_path),
|
||||
'eventlog': get_eventlog_time(file_path)
|
||||
}
|
||||
|
||||
# Timestamp-Inkonsistenzen identifizieren
|
||||
inconsistencies = detect_timestamp_discrepancies(sources)
|
||||
confidence_score = calculate_integrity_confidence(sources)
|
||||
|
||||
return inconsistencies, confidence_score
|
||||
EOF
|
||||
```
|
||||
|
||||
## Tool-Integration und Workflow-Optimierung
|
||||
|
||||
### Timeline-Tool-Ecosystem
|
||||
|
||||
**Core-Tools-Integration:**
|
||||
```bash
|
||||
#!/bin/bash
|
||||
# Comprehensive Timeline-Workflow-Automation
|
||||
|
||||
# 1. Multi-Source-Acquisition
|
||||
log2timeline.py --storage-file case.plaso \
|
||||
--parsers "win7,chrome,firefox,apache,nginx" \
|
||||
--hashers "sha256" \
|
||||
/mnt/evidence/
|
||||
|
||||
# 2. Memory-Timeline-Integration
|
||||
volatility -f memory.vmem --profile=Win10x64 timeliner \
|
||||
--output=csv --output-file=memory_timeline.csv
|
||||
|
||||
# 3. Network-Timeline-Addition
|
||||
zeek -r network.pcap Log::default_path=/tmp/zeek_logs/
|
||||
python3 zeek_to_timeline.py /tmp/zeek_logs/ > network_timeline.csv
|
||||
|
||||
# 4. Timeline-Merge und Analysis
|
||||
psort.py -w comprehensive_timeline.csv case.plaso
|
||||
python3 merge_timelines.py comprehensive_timeline.csv \
|
||||
memory_timeline.csv network_timeline.csv > unified_timeline.csv
|
||||
|
||||
# 5. Advanced-Analysis-Pipeline
|
||||
python3 timeline_analyzer.py unified_timeline.csv \
|
||||
--detect-anomalies --pivot-analysis --correlation-strength=0.7
|
||||
```
|
||||
|
||||
### Autopsy Timeline-Viewer Integration
|
||||
|
||||
**Autopsy-Import für Visual Timeline Analysis:**
|
||||
```python
|
||||
def export_autopsy_timeline(timeline_df, case_name):
|
||||
"""
|
||||
Konvertiert Timeline zu Autopsy-kompatiblem Format
|
||||
"""
|
||||
autopsy_format = timeline_df[['timestamp', 'source', 'event_type', 'description']].copy()
|
||||
autopsy_format['timestamp'] = pd.to_datetime(autopsy_format['timestamp']).astype(int) // 10**9
|
||||
|
||||
# Autopsy-CSV-Format
|
||||
autopsy_format.to_csv(f"{case_name}_autopsy_timeline.csv",
|
||||
columns=['timestamp', 'source', 'event_type', 'description'],
|
||||
index=False)
|
||||
```
|
||||
|
||||
## Fazit und Best Practices
|
||||
|
||||
Timeline-Analyse repräsentiert eine fundamentale Investigationstechnik, die bei korrekter Anwendung präzise Incident-Rekonstruktion ermöglicht. Die Kombination aus methodischer Multi-Source-Integration, Advanced-Correlation-Techniken und ML-basierter Anomalie-Detection bildet die Basis für moderne forensische Untersuchungen.
|
||||
|
||||
**Key Success Factors:**
|
||||
|
||||
1. **Systematic Approach**: Strukturierte Herangehensweise von Akquisition bis Analysis
|
||||
2. **Multi-Source-Validation**: Cross-Reference zwischen verschiedenen Artefakt-Typen
|
||||
3. **Timezone-Awareness**: Konsistente UTC-Normalisierung für akkurate Korrelation
|
||||
4. **Anti-Forensik-Resistenz**: Detection von Timestamp-Manipulation und Evidence-Destruction
|
||||
5. **Scalability-Design**: Enterprise-fähige Processing-Pipelines für Big-Data-Szenarien
|
||||
|
||||
Die kontinuierliche Weiterentwicklung von Adversary-Techniken erfordert adaptive Timeline-Methoden, die sowohl traditionelle Artefakte als auch moderne Cloud- und Container-Umgebungen erfassen. Die Integration von Machine Learning in Timeline-Workflows eröffnet neue Möglichkeiten für automatisierte Anomalie-Detection und Pattern-Recognition bei gleichzeitiger Reduktion des manuellen Aufwands.
|
||||
|
||||
**Nächste Schritte:**
|
||||
- Vertiefung spezifischer Tool-Implementierungen (Autopsy, SIFT, etc.)
|
||||
- Cloud-native Timeline-Techniken für AWS/Azure-Umgebungen
|
||||
- Advanced Correlation-Algorithmen für Zero-Day-Detection
|
||||
- Integration von Threat-Intelligence in Timeline-Workflows
|
||||
@@ -1,490 +0,0 @@
|
||||
---
|
||||
title: "Extraktion logischer Dateisysteme alter Android-Smartphones - eine KI-Recherche"
|
||||
tool_name: "Android Logical Imaging"
|
||||
description: "Wie man alte Android-Handys aufbekommen könnte - eine Recherche von Claude"
|
||||
last_updated: 2025-07-21
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
difficulty: "advanced"
|
||||
categories: ["data-collection"]
|
||||
tags: ["imaging", "filesystem", "hardware-interface"]
|
||||
sections:
|
||||
overview: true
|
||||
installation: true
|
||||
configuration: true
|
||||
usage_examples: true
|
||||
best_practices: true
|
||||
troubleshooting: true
|
||||
advanced_topics: true
|
||||
review_status: "published"
|
||||
---
|
||||
|
||||
|
||||
# Übersicht
|
||||
|
||||
Open-Source Android Forensik bietet robuste Alternativen zu kommerziellen Lösungen wie Cellebrite UFED und Magnet AXIOM. Besonders für ältere Android-Geräte (5+ Jahre) existieren bewährte Methoden zur Datenextraktion und -analyse.
|
||||
|
||||
## Kernkomponenten des Open-Source Forensik-Stacks
|
||||
|
||||
**Autopsy Digital Forensics Platform** bildet das Fundament mit GUI-basierter Analyse und integrierten Android-Parsing-Fähigkeiten. Die Plattform unterstützt **ALEAPP (Android Logs Events And Protobuf Parser)**, das über 100 Artefakt-Kategorien aus Android-Extraktionen parst.
|
||||
|
||||
**Mobile Verification Toolkit (MVT)** von Amnesty International bietet spezialisierte Command-Line-Tools für Android-Analyse mit Fokus auf Kompromittierungserkennung.
|
||||
|
||||
**SIFT Workstation** stellt eine komplette Ubuntu-basierte forensische Umgebung mit 125+ vorinstallierten Tools bereit.
|
||||
|
||||
## Erfolgsraten nach Gerätealter
|
||||
|
||||
- **Pre-2017 Geräte**: 85-98% logische Extraktion, 30-70% physische Extraktion
|
||||
- **2017-2019 Geräte**: 80-95% logische Extraktion, 15-35% physische Extraktion
|
||||
- **2020+ Geräte**: 70-85% logische Extraktion, 5-15% physische Extraktion
|
||||
|
||||
# Installation
|
||||
|
||||
## SIFT Workstation Setup
|
||||
|
||||
### Systemanforderungen
|
||||
- Quad-Core CPU 2.5GHz+
|
||||
- 16GB+ RAM
|
||||
- 500GB+ SSD Speicher
|
||||
- USB 3.0+ Anschlüsse
|
||||
|
||||
### Installation
|
||||
1. Download von [SANS SIFT Workstation](https://www.sans.org/tools/sift-workstation/)
|
||||
2. VMware/VirtualBox Import der OVA-Datei
|
||||
3. VM-Konfiguration: 8GB+ RAM, 4+ CPU-Kerne
|
||||
|
||||
```bash
|
||||
# Update nach Installation
|
||||
sudo apt update && sudo apt upgrade -y
|
||||
sudo sift update
|
||||
```
|
||||
|
||||
## Autopsy Installation
|
||||
|
||||
### Windows Installation
|
||||
1. Download von [autopsy.com](https://www.autopsy.com/)
|
||||
2. Java 8+ Installation erforderlich
|
||||
3. Installation mit Administratorrechten
|
||||
|
||||
### Linux Installation
|
||||
```bash
|
||||
# Ubuntu/Debian
|
||||
sudo apt install autopsy sleuthkit
|
||||
# Oder manueller Download und Installation
|
||||
wget https://github.com/sleuthkit/autopsy/releases/latest
|
||||
```
|
||||
|
||||
## Essential Tools Installation
|
||||
|
||||
### Android Debug Bridge (ADB)
|
||||
```bash
|
||||
# Ubuntu/Debian
|
||||
sudo apt install android-tools-adb android-tools-fastboot
|
||||
|
||||
# Windows - Download Android Platform Tools
|
||||
# https://developer.android.com/studio/releases/platform-tools
|
||||
```
|
||||
|
||||
### ALEAPP Installation
|
||||
```bash
|
||||
git clone https://github.com/abrignoni/ALEAPP.git
|
||||
cd ALEAPP
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
### Mobile Verification Toolkit (MVT)
|
||||
```bash
|
||||
pip3 install mvt
|
||||
# Oder via GitHub für neueste Version
|
||||
git clone https://github.com/mvt-project/mvt.git
|
||||
cd mvt && pip3 install .
|
||||
```
|
||||
|
||||
### Andriller Installation
|
||||
```bash
|
||||
git clone https://github.com/den4uk/andriller.git
|
||||
cd andriller
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
|
||||
# Konfiguration
|
||||
|
||||
## ADB Setup und Gerätevorbereitung
|
||||
|
||||
### USB-Debugging aktivieren
|
||||
1. Entwickleroptionen freischalten (7x Build-Nummer antippen)
|
||||
2. USB-Debugging aktivieren
|
||||
3. Gerät via USB verbinden
|
||||
4. RSA-Fingerprint akzeptieren
|
||||
|
||||
### ADB Verbindung testen
|
||||
```bash
|
||||
adb devices
|
||||
# Sollte Gerät mit "device" Status zeigen
|
||||
adb shell getprop ro.build.version.release # Android Version
|
||||
adb shell getprop ro.product.model # Gerätemodell
|
||||
```
|
||||
|
||||
## Autopsy Projektkonfiguration
|
||||
|
||||
### Case-Setup
|
||||
1. Neuen Fall erstellen
|
||||
2. Ermittler-Informationen eingeben
|
||||
3. Case-Verzeichnis festlegen (ausreichend Speicherplatz)
|
||||
|
||||
### Android Analyzer Module aktivieren
|
||||
- Tools → Options → Modules
|
||||
- Android Analyzer aktivieren
|
||||
- ALEAPP Integration konfigurieren
|
||||
|
||||
### Hash-Algorithmen konfigurieren
|
||||
- MD5, SHA-1, SHA-256 für Integritätsprüfung
|
||||
- Automatische Hash-Berechnung bei Import aktivieren
|
||||
|
||||
## MVT Konfiguration
|
||||
|
||||
### Konfigurationsdatei erstellen
|
||||
```yaml
|
||||
# ~/.mvt/config.yaml
|
||||
adb_path: "/usr/bin/adb"
|
||||
output_folder: "/home/user/mvt_output"
|
||||
```
|
||||
|
||||
# Verwendungsbeispiele
|
||||
|
||||
## Fall 1: Logische Datenextraktion mit ADB
|
||||
|
||||
### Geräteinformationen sammeln
|
||||
```bash
|
||||
# Systeminfo
|
||||
adb shell getprop > device_properties.txt
|
||||
adb shell cat /proc/version > kernel_info.txt
|
||||
adb shell mount > mount_info.txt
|
||||
|
||||
# Installierte Apps
|
||||
adb shell pm list packages -f > installed_packages.txt
|
||||
```
|
||||
|
||||
### Datenbank-Extraktion
|
||||
```bash
|
||||
# SMS/MMS Datenbank
|
||||
adb pull /data/data/com.android.providers.telephony/databases/mmssms.db
|
||||
|
||||
# Kontakte
|
||||
adb pull /data/data/com.android.providers.contacts/databases/contacts2.db
|
||||
|
||||
# Anrufliste
|
||||
adb pull /data/data/com.android.providers.contacts/databases/calllog.db
|
||||
```
|
||||
|
||||
### WhatsApp Datenextraktion
|
||||
```bash
|
||||
# WhatsApp Datenbanken (Root erforderlich)
|
||||
adb shell su -c "cp -r /data/data/com.whatsapp/ /sdcard/whatsapp_backup/"
|
||||
adb pull /sdcard/whatsapp_backup/
|
||||
```
|
||||
|
||||
## Fall 2: Android Backup-Analyse
|
||||
|
||||
### Vollständiges Backup erstellen
|
||||
```bash
|
||||
# Umfassendes Backup (ohne Root)
|
||||
adb backup -all -system -apk -shared -f backup.ab
|
||||
|
||||
# Backup entschlüsseln (falls verschlüsselt)
|
||||
java -jar abe.jar unpack backup.ab backup.tar
|
||||
tar -xf backup.tar
|
||||
```
|
||||
|
||||
### Backup mit ALEAPP analysieren
|
||||
```bash
|
||||
python3 aleappGUI.py
|
||||
# Oder Command-Line
|
||||
python3 aleapp.py -t tar -i backup.tar -o output_folder
|
||||
```
|
||||
|
||||
## Fall 3: MVT Kompromittierungsanalyse
|
||||
|
||||
### Live-Geräteanalyse
|
||||
```bash
|
||||
# ADB-basierte Analyse
|
||||
mvt-android check-adb --output /path/to/output/
|
||||
|
||||
# Backup-Analyse
|
||||
mvt-android check-backup --output /path/to/output/ backup.ab
|
||||
```
|
||||
|
||||
### IOC-Suche mit Pegasus-Indikatoren
|
||||
```bash
|
||||
# Mit vorgefertigten IOCs
|
||||
mvt-android check-adb --iocs /path/to/pegasus.stix2 --output results/
|
||||
```
|
||||
|
||||
## Fall 4: Physische Extraktion (Root erforderlich)
|
||||
|
||||
### Device Rooting - MediaTek Geräte
|
||||
```bash
|
||||
# MTKClient für MediaTek-Chipsets
|
||||
git clone https://github.com/bkerler/mtkclient.git
|
||||
cd mtkclient
|
||||
python3 mtk payload
|
||||
|
||||
# Nach erfolgreichem Root
|
||||
adb shell su
|
||||
```
|
||||
|
||||
### Vollständiges Memory Dump
|
||||
```bash
|
||||
# Partitionslayout ermitteln
|
||||
adb shell su -c "cat /proc/partitions"
|
||||
adb shell su -c "ls -la /dev/block/"
|
||||
|
||||
# Vollständiges Device Image (Root erforderlich)
|
||||
adb shell su -c "dd if=/dev/block/mmcblk0 of=/sdcard/full_device.img bs=4096"
|
||||
adb pull /sdcard/full_device.img
|
||||
```
|
||||
|
||||
# Best Practices
|
||||
|
||||
## Rechtliche Compliance
|
||||
|
||||
### Dokumentation und Chain of Custody
|
||||
- **Vollständige Dokumentation**: Wer, Was, Wann, Wo, Warum
|
||||
- **Hash-Verifikation**: MD5/SHA-256 für alle extrahierten Daten
|
||||
- **Nur forensische Kopien analysieren**, niemals Originaldaten
|
||||
- **Schriftliche Genehmigung** für Geräteanalyse einholen
|
||||
|
||||
### Familiengeräte und Nachlässe
|
||||
- Genehmigung durch Nachlassverwalter erforderlich
|
||||
- Gerichtsbeschlüsse für Cloud-Zugang eventuell nötig
|
||||
- Drittpartei-Kommunikation kann weiterhin geschützt sein
|
||||
|
||||
## Technische Best Practices
|
||||
|
||||
### Hash-Integrität sicherstellen
|
||||
```bash
|
||||
# Hash vor und nach Transfer prüfen
|
||||
md5sum original_file.db
|
||||
sha256sum original_file.db
|
||||
|
||||
# Hash-Verifikation dokumentieren
|
||||
echo "$(date): MD5: $(md5sum file.db)" >> chain_of_custody.log
|
||||
```
|
||||
|
||||
### Sichere Arbeitsumgebung
|
||||
- Isolierte VM für Forensik-Arbeit
|
||||
- Netzwerk-Isolation während Analyse
|
||||
- Verschlüsselte Speicherung aller Evidenz
|
||||
- Regelmäßige Backups der Case-Datenbanken
|
||||
|
||||
### Qualitätssicherung
|
||||
- Peer-Review kritischer Analysen
|
||||
- Standardisierte Arbeitsabläufe (SOPs)
|
||||
- Regelmäßige Tool-Validierung
|
||||
- Kontinuierliche Weiterbildung
|
||||
|
||||
## Erfolgsmaximierung nach Gerätehersteller
|
||||
|
||||
### MediaTek-Geräte (Höchste Erfolgsrate)
|
||||
- BootROM-Exploits für MT6735, MT6737, MT6750, MT6753, MT6797
|
||||
- MTKClient für Hardware-Level-Zugang
|
||||
- Erfolgsrate: 80%+ für Geräte 2015-2019
|
||||
|
||||
### Samsung-Geräte
|
||||
- Ältere Knox-Implementierungen umgehbar
|
||||
- Emergency Dialer Exploits für Android 4.x
|
||||
- Erfolgsrate: 40-70% je nach Knox-Version
|
||||
|
||||
### Pixel/Nexus-Geräte
|
||||
- Bootloader-Unlocking oft möglich
|
||||
- Fastboot-basierte Recovery-Installation
|
||||
- Erfolgsrate: 60-80% bei freigeschaltetem Bootloader
|
||||
|
||||
# Troubleshooting
|
||||
|
||||
## Problem: ADB erkennt Gerät nicht
|
||||
|
||||
### Lösung: USB-Treiber und Berechtigungen
|
||||
```bash
|
||||
# Linux: USB-Berechtigungen prüfen
|
||||
lsusb | grep -i android
|
||||
sudo chmod 666 /dev/bus/usb/XXX/XXX
|
||||
|
||||
# udev-Regeln erstellen
|
||||
echo 'SUBSYSTEM=="usb", ATTR{idVendor}=="18d1", MODE="0666", GROUP="plugdev"' | sudo tee /etc/udev/rules.d/51-android.rules
|
||||
sudo udevadm control --reload-rules
|
||||
```
|
||||
|
||||
### Windows: Treiber-Installation
|
||||
1. Geräte-Manager öffnen
|
||||
2. Android-Gerät mit Warnsymbol finden
|
||||
3. Treiber manuell installieren (Android USB Driver)
|
||||
|
||||
## Problem: Verschlüsselte Android Backups
|
||||
|
||||
### Lösung: Android Backup Extractor
|
||||
```bash
|
||||
# ADB Backup Extractor installieren
|
||||
git clone https://github.com/nelenkov/android-backup-extractor.git
|
||||
cd android-backup-extractor
|
||||
gradle build
|
||||
|
||||
# Backup entschlüsseln
|
||||
java -jar abe.jar unpack backup.ab backup.tar [password]
|
||||
```
|
||||
|
||||
## Problem: Unzureichende Berechtigungen für Datenextraktion
|
||||
|
||||
### Lösung: Alternative Extraktionsmethoden
|
||||
```bash
|
||||
# AFLogical OSE für begrenzte Extraktion ohne Root
|
||||
# WhatsApp Key/DB Extractor für spezifische Apps
|
||||
# Backup-basierte Extraktion als Fallback
|
||||
|
||||
# Custom Recovery für erweiterten Zugang
|
||||
fastboot flash recovery twrp-device.img
|
||||
```
|
||||
|
||||
## Problem: ALEAPP Parsing-Fehler
|
||||
|
||||
### Lösung: Datenformat-Probleme beheben
|
||||
```bash
|
||||
# Log-Dateien prüfen
|
||||
python3 aleapp.py -t dir -i /path/to/data -o output --debug
|
||||
|
||||
# Spezifische Parser deaktivieren
|
||||
# Manuelle SQLite-Analyse bei Parser-Fehlern
|
||||
sqlite3 database.db ".tables"
|
||||
sqlite3 database.db ".schema table_name"
|
||||
```
|
||||
|
||||
# Erweiterte Techniken
|
||||
|
||||
## Memory Forensics mit LiME
|
||||
|
||||
### LiME für ARM-Devices kompilieren
|
||||
```bash
|
||||
# Cross-Compilation Setup
|
||||
export ARCH=arm
|
||||
export CROSS_COMPILE=arm-linux-gnueabi-
|
||||
export KERNEL_DIR=/path/to/kernel/source
|
||||
|
||||
# LiME Module kompilieren
|
||||
git clone https://github.com/504ensicsLabs/LiME.git
|
||||
cd LiME/src
|
||||
make
|
||||
|
||||
# Memory Dump erstellen (Root erforderlich)
|
||||
adb push lime.ko /data/local/tmp/
|
||||
adb shell su -c "insmod /data/local/tmp/lime.ko 'path=/sdcard/memory.lime format=lime'"
|
||||
```
|
||||
|
||||
### Volatility-Analyse von Android Memory
|
||||
```bash
|
||||
# Memory Dump analysieren
|
||||
python vol.py -f memory.lime --profile=Linux <profile> linux.pslist
|
||||
python vol.py -f memory.lime --profile=Linux <profile> linux.bash
|
||||
python vol.py -f memory.lime --profile=Linux <profile> linux.netstat
|
||||
```
|
||||
|
||||
## FRIDA-basierte Runtime-Analyse
|
||||
|
||||
### FRIDA für Kryptographie-Hooks
|
||||
```javascript
|
||||
// crypto_hooks.js - SSL/TLS Traffic abfangen
|
||||
Java.perform(function() {
|
||||
var SSLContext = Java.use("javax.net.ssl.SSLContext");
|
||||
SSLContext.init.overload('[Ljavax.net.ssl.KeyManager;', '[Ljavax.net.ssl.TrustManager;', 'java.security.SecureRandom').implementation = function(keyManagers, trustManagers, secureRandom) {
|
||||
console.log("[+] SSLContext.init() called");
|
||||
this.init(keyManagers, trustManagers, secureRandom);
|
||||
};
|
||||
});
|
||||
```
|
||||
|
||||
### FRIDA Installation und Verwendung
|
||||
```bash
|
||||
# FRIDA Server auf Android-Gerät installieren
|
||||
adb push frida-server /data/local/tmp/
|
||||
adb shell su -c "chmod 755 /data/local/tmp/frida-server"
|
||||
adb shell su -c "/data/local/tmp/frida-server &"
|
||||
|
||||
# Script ausführen
|
||||
frida -U -l crypto_hooks.js com.target.package
|
||||
```
|
||||
|
||||
## Custom Recovery und Fastboot-Exploits
|
||||
|
||||
### TWRP Installation für forensischen Zugang
|
||||
```bash
|
||||
# Bootloader entsperren (Herstellerabhängig)
|
||||
fastboot oem unlock
|
||||
# Oder
|
||||
fastboot flashing unlock
|
||||
|
||||
# TWRP flashen
|
||||
fastboot flash recovery twrp-device.img
|
||||
fastboot boot twrp-device.img # Temporäre Installation
|
||||
|
||||
# In TWRP: ADB-Zugang mit Root-Berechtigungen
|
||||
adb shell mount /system
|
||||
adb shell mount /data
|
||||
```
|
||||
|
||||
### Partitions-Imaging mit dd
|
||||
```bash
|
||||
# Vollständige Partition-Liste
|
||||
adb shell cat /proc/partitions
|
||||
|
||||
# Kritische Partitionen extrahieren
|
||||
adb shell dd if=/dev/block/bootdevice/by-name/system of=/external_sd/system.img
|
||||
adb shell dd if=/dev/block/bootdevice/by-name/userdata of=/external_sd/userdata.img
|
||||
adb shell dd if=/dev/block/bootdevice/by-name/boot of=/external_sd/boot.img
|
||||
```
|
||||
|
||||
## SQLite Forensics und gelöschte Daten
|
||||
|
||||
### Erweiterte SQLite-Analyse
|
||||
```bash
|
||||
# Freelist-Analyse für gelöschte Einträge
|
||||
sqlite3 database.db "PRAGMA freelist_count;"
|
||||
sqlite3 database.db "PRAGMA page_size;"
|
||||
|
||||
# WAL-Datei Analyse
|
||||
sqlite3 database.db "PRAGMA wal_checkpoint;"
|
||||
strings database.db-wal | grep -i "search_term"
|
||||
|
||||
# Undark für Deleted Record Recovery
|
||||
undark database.db --freelist --export-csv
|
||||
```
|
||||
|
||||
### Timeline-Rekonstruktion
|
||||
```bash
|
||||
# Autopsy Timeline-Generierung
|
||||
# Tools → Generate Timeline
|
||||
# Analyse von MAC-Times (Modified, Accessed, Created)
|
||||
|
||||
# Plaso Timeline-Tools
|
||||
log2timeline.py timeline.plaso /path/to/android/data/
|
||||
psort.py -o dynamic timeline.plaso
|
||||
```
|
||||
|
||||
## Weiterführende Ressourcen
|
||||
|
||||
### Dokumentation und Standards
|
||||
- [NIST SP 800-101 Rev. 1 - Mobile Device Forensics Guidelines](https://csrc.nist.gov/pubs/sp/800/101/r1/final)
|
||||
- [SANS FOR585 - Smartphone Forensics](https://www.sans.org/cyber-security-courses/advanced-smartphone-mobile-device-forensics/)
|
||||
- [ALEAPP GitHub Repository](https://github.com/abrignoni/ALEAPP)
|
||||
- [MVT Documentation](https://docs.mvt.re/en/latest/)
|
||||
|
||||
### Community und Weiterbildung
|
||||
- [Autopsy User Documentation](https://sleuthkit.org/autopsy/docs/)
|
||||
- [Android Forensics References](https://github.com/impillar/AndroidReferences/blob/master/AndroidTools.md)
|
||||
- [Digital Forensics Framework Collection](https://github.com/mesquidar/ForensicsTools)
|
||||
|
||||
### Spezialisierte Tools
|
||||
- [MTKClient für MediaTek Exploits](https://github.com/bkerler/mtkclient)
|
||||
- [Android Forensics Framework](https://github.com/nowsecure/android-forensics)
|
||||
- [Santoku Linux Mobile Forensics Distribution](https://santoku-linux.com/)
|
||||
|
||||
---
|
||||
|
||||
**Wichtiger Hinweis**: Diese Anleitung dient ausschließlich für autorisierte forensische Untersuchungen. Stellen Sie sicher, dass Sie über entsprechende rechtliche Befugnisse verfügen, bevor Sie diese Techniken anwenden. Bei Zweifeln konsultieren Sie Rechtsberatung.
|
||||
@@ -1,141 +0,0 @@
|
||||
---
|
||||
title: "Kali Linux - Die Hacker-Distribution für Forensik & Penetration Testing"
|
||||
tool_name: "Kali Linux"
|
||||
description: "Leitfaden zur Installation, Nutzung und Best Practices für Kali Linux – die All-in-One-Plattform für Security-Profis."
|
||||
last_updated: 2025-08-10
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
difficulty: "intermediate"
|
||||
categories: ["incident-response", "forensics", "penetration-testing"]
|
||||
tags: ["live-boot", "tool-collection", "penetration-testing", "forensics-suite", "virtualization", "arm-support"]
|
||||
sections:
|
||||
overview: true
|
||||
installation: true
|
||||
configuration: true
|
||||
usage_examples: true
|
||||
best_practices: true
|
||||
troubleshooting: true
|
||||
advanced_topics: true
|
||||
review_status: "published"
|
||||
---
|
||||
|
||||
> **⚠️ Hinweis**: Dies ist ein vorläufiger, KI-generierter Knowledgebase-Eintrag. Wir freuen uns über Verbesserungen und Ergänzungen durch die Community!
|
||||
|
||||
|
||||
# Übersicht
|
||||
|
||||
Kali Linux ist eine auf Debian basierende Linux-Distribution, die speziell für Penetration Testing, digitale Forensik, Reverse Engineering und Incident Response entwickelt wurde. Mit über 600 vorinstallierten Tools ist sie ein unverzichtbares Werkzeug für Security-Experten, Ermittler und forensische Analysten. Die Live-Boot-Funktion erlaubt es, Systeme ohne Spuren zu hinterlassen zu analysieren – ideal für forensische Untersuchungen.
|
||||
|
||||
## Installation
|
||||
|
||||
### Option 1: Live-System (USB/DVD)
|
||||
|
||||
1. ISO-Image von [kali.org](https://www.kali.org/get-kali/) herunterladen.
|
||||
2. Mit **Rufus** oder **balenaEtcher** auf einen USB-Stick schreiben.
|
||||
3. Vom USB-Stick booten (ggf. Boot-Reihenfolge im BIOS anpassen).
|
||||
4. Kali kann direkt ohne Installation im Live-Modus verwendet werden.
|
||||
|
||||
### Option 2: Installation auf Festplatte
|
||||
|
||||
1. ISO-Image booten und **Graphical Install** wählen.
|
||||
2. Schritt-für-Schritt durch den Installationsassistenten navigieren:
|
||||
- Sprache, Zeitzone und Tastaturlayout auswählen
|
||||
- Partitionierung konfigurieren (automatisch oder manuell)
|
||||
- Benutzerkonten erstellen
|
||||
3. Nach Installation Neustart durchführen.
|
||||
|
||||
### Option 3: Virtuelle Maschine (VM)
|
||||
|
||||
- Offizielle VM-Images für VirtualBox und VMware von der [Kali-Website](https://www.kali.org/get-kali/#kali-virtual-machines)
|
||||
- Importieren, ggf. Netzwerkbrücke und Shared Folders aktivieren
|
||||
|
||||
## Konfiguration
|
||||
|
||||
### Netzwerkeinstellungen
|
||||
|
||||
- Konfiguration über `nmtui` oder `/etc/network/interfaces`
|
||||
- VPN und Proxy-Integration über GUI oder Terminal
|
||||
|
||||
### Updates & Paketquellen
|
||||
|
||||
```bash
|
||||
sudo apt update && sudo apt full-upgrade
|
||||
````
|
||||
|
||||
> Hinweis: `kali-rolling` ist die Standard-Distribution für kontinuierliche Updates.
|
||||
|
||||
### Sprache & Lokalisierung
|
||||
|
||||
```bash
|
||||
sudo dpkg-reconfigure locales
|
||||
sudo dpkg-reconfigure keyboard-configuration
|
||||
```
|
||||
|
||||
## Verwendungsbeispiele
|
||||
|
||||
### 1. Netzwerkscan mit Nmap
|
||||
|
||||
```bash
|
||||
nmap -sS -T4 -A 192.168.1.0/24
|
||||
```
|
||||
|
||||
### 2. Passwort-Cracking mit John the Ripper
|
||||
|
||||
```bash
|
||||
john --wordlist=/usr/share/wordlists/rockyou.txt hashes.txt
|
||||
```
|
||||
|
||||
### 3. Forensik mit Autopsy
|
||||
|
||||
```bash
|
||||
autopsy &
|
||||
```
|
||||
|
||||
### 4. Android-Analyse mit MobSF (in Docker)
|
||||
|
||||
```bash
|
||||
docker pull opensecurity/mobile-security-framework-mobsf
|
||||
docker run -it -p 8000:8000 mobsf
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
* Nutze immer **aktuelle Snapshots** oder VM-Clones vor gefährlichen Tests
|
||||
* Verwende separate Netzwerke (z. B. Host-only oder NAT) für Tests
|
||||
* Deaktiviere automatisches WLAN bei forensischen Analysen
|
||||
* Prüfe und aktualisiere regelmäßig Toolsets (`apt`, `git`, `pip`)
|
||||
* Halte deine ISO-Images versioniert für forensische Reproduzierbarkeit
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Problem: Keine Internetverbindung nach Installation
|
||||
|
||||
**Lösung:** Netzwerkadapter prüfen, ggf. mit `ifconfig` oder `ip a` überprüfen, DHCP aktivieren.
|
||||
|
||||
### Problem: Tools fehlen nach Update
|
||||
|
||||
**Lösung:** Tool-Gruppen wie `kali-linux-default` manuell nachinstallieren:
|
||||
|
||||
```bash
|
||||
sudo apt install kali-linux-default
|
||||
```
|
||||
|
||||
### Problem: „Permission Denied“ bei Tools
|
||||
|
||||
**Lösung:** Root-Rechte nutzen oder mit `sudo` ausführen.
|
||||
|
||||
## Weiterführende Themen
|
||||
|
||||
* **Kustomisierung von Kali ISOs** mit `live-build`
|
||||
* **NetHunter**: Kali für mobile Geräte (Android)
|
||||
* **Kali Purple**: Defensive Security Suite
|
||||
* Integration mit **Cloud-Infrastrukturen** via WSL oder Azure
|
||||
|
||||
---
|
||||
|
||||
**Links & Ressourcen:**
|
||||
|
||||
* Offizielle Website: [https://kali.org](https://kali.org/)
|
||||
* Dokumentation: [https://docs.kali.org/](https://docs.kali.org/)
|
||||
* GitLab Repo: [https://gitlab.com/kalilinux](https://gitlab.com/kalilinux)
|
||||
* Discord-Community: [https://discord.com/invite/kali-linux](https://discord.com/invite/kali-linux)
|
||||
|
||||
@@ -1,133 +0,0 @@
|
||||
---
|
||||
title: "MISP - Plattform für Threat Intelligence Sharing"
|
||||
tool_name: "MISP"
|
||||
description: "Das Rückgrat des modernen Threat-Intelligence-Sharings mit über 40.000 aktiven Instanzen weltweit."
|
||||
last_updated: 2025-07-20
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
difficulty: "intermediate"
|
||||
categories: ["incident-response", "static-investigations", "malware-analysis", "network-forensics", "cloud-forensics"]
|
||||
tags: ["web-based", "threat-intelligence", "api", "correlation", "ioc-sharing", "automation"]
|
||||
sections:
|
||||
overview: true
|
||||
installation: true
|
||||
configuration: true
|
||||
usage_examples: true
|
||||
best_practices: true
|
||||
troubleshooting: true
|
||||
advanced_topics: false
|
||||
review_status: "published"
|
||||
---
|
||||
|
||||
> **⚠️ Hinweis**: Dies ist ein vorläufiger, KI-generierter Knowledgebase-Eintrag. Wir freuen uns über Verbesserungen und Ergänzungen durch die Community!
|
||||
|
||||
|
||||
# Übersicht
|
||||
|
||||
**MISP (Malware Information Sharing Platform & Threat Sharing)** ist eine freie Open-Source-Plattform zur strukturierten Erfassung, Speicherung, Analyse und gemeinsamen Nutzung von Cyber-Bedrohungsdaten. Mit über 40.000 Instanzen weltweit ist MISP der De-facto-Standard für den Austausch von Indicators of Compromise (IoCs) und Threat Intelligence zwischen CERTs, SOCs, Strafverfolgungsbehörden und anderen sicherheitsrelevanten Organisationen.
|
||||
|
||||
Die föderierte Architektur ermöglicht einen kontrollierten, dezentralen Austausch von Informationen über vertrauenswürdige Partner hinweg. Durch Taxonomien, Tags und integrierte APIs ist eine automatische Anreicherung, Korrelation und Verarbeitung von Informationen in SIEMs, Firewalls oder Endpoint-Lösungen möglich.
|
||||
|
||||
## Installation
|
||||
|
||||
### Voraussetzungen
|
||||
|
||||
- **Server-Betriebssystem:** Linux (empfohlen: Debian/Ubuntu)
|
||||
- **Abhängigkeiten:** MariaDB/MySQL, PHP, Apache/Nginx, Redis
|
||||
- **Ressourcen:** Mindestens 4 GB RAM, SSD empfohlen
|
||||
|
||||
### Installationsschritte
|
||||
|
||||
```bash
|
||||
# Beispiel für Debian/Ubuntu:
|
||||
sudo apt update && sudo apt install -y curl gnupg git python3 python3-pip redis-server mariadb-server apache2 php libapache2-mod-php
|
||||
|
||||
# MISP klonen
|
||||
git clone https://github.com/MISP/MISP.git /var/www/MISP
|
||||
|
||||
# Setup-Skript nutzen
|
||||
cd /var/www/MISP && bash INSTALL/INSTALL.debian.sh
|
||||
````
|
||||
|
||||
Weitere Details: [Offizielle Installationsanleitung](https://misp.github.io/MISP/INSTALL.debian/)
|
||||
|
||||
## Konfiguration
|
||||
|
||||
### Webserver
|
||||
|
||||
* HTTPS aktivieren (Let's Encrypt oder Reverse Proxy)
|
||||
* PHP-Konfiguration anpassen (`upload_max_filesize`, `memory_limit`, `post_max_size`)
|
||||
|
||||
### Benutzerrollen
|
||||
|
||||
* Administrator, Org-Admin, Analyst etc.
|
||||
* Zugriffsbeschränkungen nach Organisation/Feed definierbar
|
||||
|
||||
### Feeds und Galaxies
|
||||
|
||||
* Aktivierung von Feeds (z. B. CIRCL, Abuse.ch, OpenCTI)
|
||||
* Nutzung von Galaxies zur Klassifizierung (APT-Gruppen, Malware-Familien)
|
||||
|
||||
## Verwendungsbeispiele
|
||||
|
||||
### Beispiel 1: Import von IoCs aus externem Feed
|
||||
|
||||
1. Feed aktivieren unter **Administration → List Feeds**
|
||||
2. Feed synchronisieren
|
||||
3. Ereignisse durchsuchen, analysieren, ggf. mit eigenen Daten korrelieren
|
||||
|
||||
### Beispiel 2: Automatisierte Anbindung an SIEM
|
||||
|
||||
* REST-API-Token erstellen
|
||||
* API-Calls zur Abfrage neuer Events (z. B. mit Python, Logstash oder MISP Workbench)
|
||||
* Integration in Security-Systeme über JSON/STIX export
|
||||
|
||||
## Best Practices
|
||||
|
||||
* Regelmäßige Backups der Datenbank
|
||||
* Taxonomien konsistent verwenden
|
||||
* Nutzung der Sighting-Funktion zur Validierung von IoCs
|
||||
* Vertrauensstufen (TLP, PAP) korrekt setzen
|
||||
* Nicht nur konsumieren – auch teilen!
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Problem: MISP-Feeds laden nicht
|
||||
|
||||
**Lösung:**
|
||||
|
||||
* Internetverbindung prüfen
|
||||
* Cronjobs aktiv?
|
||||
* Logs prüfen: `/var/www/MISP/app/tmp/logs/error.log`
|
||||
|
||||
### Problem: API gibt 403 zurück
|
||||
|
||||
**Lösung:**
|
||||
|
||||
* Ist der API-Key korrekt und aktiv?
|
||||
* Rechte des Benutzers überprüfen
|
||||
* IP-Filter im MISP-Backend beachten
|
||||
|
||||
### Problem: Hohe Datenbanklast
|
||||
|
||||
**Lösung:**
|
||||
|
||||
* Indizes optimieren
|
||||
* Redis aktivieren
|
||||
* Alte Events regelmäßig archivieren oder löschen
|
||||
|
||||
## Weiterführende Themen
|
||||
|
||||
* STIX2-Import/Export
|
||||
* Erweiterungen mit MISP Modules (z. B. für Virustotal, YARA)
|
||||
* Föderierte Netzwerke und Community-Portale
|
||||
* Integration mit OpenCTI oder TheHive
|
||||
|
||||
---
|
||||
|
||||
**Links:**
|
||||
|
||||
* 🌐 [Offizielle Projektseite](https://misp-project.org/)
|
||||
* 📦 [CC24-MISP-Instanz](https://misp.cc24.dev)
|
||||
* 📊 [Status-Monitoring](https://status.mikoshi.de/api/badge/34/status)
|
||||
|
||||
Lizenz: **AGPL-3.0**
|
||||
@@ -1,124 +0,0 @@
|
||||
---
|
||||
title: "Nextcloud - Sichere Kollaborationsplattform"
|
||||
tool_name: "Nextcloud"
|
||||
description: "Detaillierte Anleitung und Best Practices für Nextcloud in forensischen Einsatzszenarien"
|
||||
last_updated: 2025-07-20
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
difficulty: "novice"
|
||||
categories: ["collaboration-general"]
|
||||
tags: ["web-based", "collaboration", "file-sharing", "api", "encryption", "document-management"]
|
||||
sections:
|
||||
overview: true
|
||||
installation: true
|
||||
configuration: true
|
||||
usage_examples: true
|
||||
best_practices: true
|
||||
troubleshooting: true
|
||||
advanced_topics: false
|
||||
review_status: "published"
|
||||
---
|
||||
|
||||
> **⚠️ Hinweis**: Dies ist ein vorläufiger, KI-generierter Knowledgebase-Eintrag. Wir freuen uns über Verbesserungen und Ergänzungen durch die Community!
|
||||
|
||||
|
||||
# Übersicht
|
||||
|
||||
Nextcloud ist eine Open-Source-Cloud-Suite, die speziell für die sichere Zusammenarbeit entwickelt wurde. Sie eignet sich ideal für forensische Teams, da sie eine DSGVO-konforme Umgebung mit verschlüsselter Dateiablage, Office-Integration und Videokonferenzen bereitstellt. Zusätzlich bietet Nextcloud einen integrierten SSO-Provider, der das Identitätsmanagement für andere forensische Tools stark vereinfacht.
|
||||
|
||||
Skalierbar von kleinen Raspberry-Pi-Installationen bis hin zu hochverfügbaren Multi-Node-Setups.
|
||||
|
||||
- **Website:** [nextcloud.com](https://nextcloud.com/)
|
||||
- **Demo/Projektinstanz:** [cloud.cc24.dev](https://cloud.cc24.dev)
|
||||
- **Statusseite:** [Mikoshi Status](https://status.mikoshi.de/api/badge/11/status)
|
||||
- **Lizenz:** AGPL-3.0
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### Voraussetzungen
|
||||
|
||||
- Linux-Server oder Raspberry Pi
|
||||
- PHP 8.1 oder höher
|
||||
- MariaDB/PostgreSQL
|
||||
- Webserver (Apache/Nginx)
|
||||
- SSL-Zertifikat (empfohlen: Let's Encrypt)
|
||||
|
||||
### Installationsschritte (Ubuntu Beispiel)
|
||||
|
||||
```bash
|
||||
sudo apt update && sudo apt upgrade
|
||||
sudo apt install apache2 mariadb-server libapache2-mod-php php php-mysql \
|
||||
php-gd php-xml php-mbstring php-curl php-zip php-intl php-bcmath unzip
|
||||
|
||||
wget https://download.nextcloud.com/server/releases/latest.zip
|
||||
unzip latest.zip -d /var/www/
|
||||
chown -R www-data:www-data /var/www/nextcloud
|
||||
````
|
||||
|
||||
Danach den Web-Installer im Browser aufrufen (`https://<your-domain>/nextcloud`) und Setup abschließen.
|
||||
|
||||
## Konfiguration
|
||||
|
||||
* **Trusted Domains** in `config.php` definieren
|
||||
* SSO mit OpenID Connect aktivieren
|
||||
* Dateiverschlüsselung aktivieren (`Settings → Security`)
|
||||
* Benutzer und Gruppen über LDAP oder SAML integrieren
|
||||
|
||||
## Verwendungsbeispiele
|
||||
|
||||
### Gemeinsame Fallbearbeitung
|
||||
|
||||
1. Ermittlungsordner als geteiltes Gruppenverzeichnis anlegen
|
||||
2. Versionierung und Kommentare zu forensischen Berichten aktivieren
|
||||
3. Vorschau für Office-Dateien, PDFs und Bilder direkt im Browser nutzen
|
||||
|
||||
### Videokonferenzen mit "Nextcloud Talk"
|
||||
|
||||
* Sichere Kommunikation zwischen Ermittlern und Sachverständigen
|
||||
* Ende-zu-Ende-verschlüsselt
|
||||
* Bildschirmfreigabe möglich
|
||||
|
||||
### Automatischer Dateiimport per API
|
||||
|
||||
* REST-Schnittstelle nutzen, um z. B. automatisch Logdateien oder Exportdaten hochzuladen
|
||||
* Ideal für Anbindung an SIEM, DLP oder Analyse-Pipelines
|
||||
|
||||
## Best Practices
|
||||
|
||||
* Zwei-Faktor-Authentifizierung aktivieren
|
||||
* Tägliche Backups der Datenbank und Datenstruktur
|
||||
* Nutzung von OnlyOffice oder Collabora für revisionssichere Dokumentenbearbeitung
|
||||
* Zugriff regelmäßig überprüfen, insbesondere bei externen Partnern
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Problem: Langsame Performance
|
||||
|
||||
**Lösung:** APCu aktivieren und Caching optimieren (`config.php → 'memcache.local'`).
|
||||
|
||||
### Problem: Dateien erscheinen nicht im Sync
|
||||
|
||||
**Lösung:** Cronjob für `files:scan` konfigurieren oder manuell ausführen:
|
||||
|
||||
```bash
|
||||
sudo -u www-data php /var/www/nextcloud/occ files:scan --all
|
||||
```
|
||||
|
||||
### Problem: Fehlermeldung "Trusted domain not set"
|
||||
|
||||
**Lösung:** In `config/config.php` Eintrag `trusted_domains` korrekt konfigurieren:
|
||||
|
||||
```php
|
||||
'trusted_domains' =>
|
||||
array (
|
||||
0 => 'yourdomain.tld',
|
||||
1 => 'cloud.cc24.dev',
|
||||
),
|
||||
```
|
||||
|
||||
## Weiterführende Themen
|
||||
|
||||
* **Integration mit Forensik-Plattformen** (über WebDAV, API oder SSO)
|
||||
* **Custom Apps entwickeln** für spezielle Ermittlungs-Workflows
|
||||
* **Auditing aktivieren**: Nutzung und Änderungen nachvollziehen mit Protokollierungsfunktionen
|
||||
@@ -1,162 +0,0 @@
|
||||
---
|
||||
title: "Velociraptor – Skalierbare Endpoint-Forensik mit VQL"
|
||||
tool_name: "Velociraptor"
|
||||
description: "Detaillierte Anleitung und Best Practices für Velociraptor – Remote-Forensik der nächsten Generation"
|
||||
last_updated: 2025-07-20
|
||||
author: "Claude 4 Sonnett (Prompt: Mario Stöckl)"
|
||||
difficulty: "advanced"
|
||||
categories: ["incident-response", "malware-analysis", "network-forensics"]
|
||||
gated_content: true
|
||||
tags: ["web-based", "endpoint-monitoring", "artifact-extraction", "scripting", "live-forensics", "hunting"]
|
||||
sections:
|
||||
overview: true
|
||||
installation: true
|
||||
configuration: true
|
||||
usage_examples: true
|
||||
best_practices: true
|
||||
troubleshooting: true
|
||||
advanced_topics: true
|
||||
review_status: "published"
|
||||
---
|
||||
|
||||
> **⚠️ Hinweis**: Dies ist ein vorläufiger, KI-generierter Knowledgebase-Eintrag. Wir freuen uns über Verbesserungen und Ergänzungen durch die Community!
|
||||
|
||||
|
||||
# Übersicht
|
||||
|
||||
Velociraptor ist ein Open-Source-Tool zur Endpoint-Forensik mit Fokus auf Skalierbarkeit, Präzision und Geschwindigkeit. Es ermöglicht die zielgerichtete Erfassung und Analyse digitaler Artefakte über eine eigene Query Language – VQL (Velociraptor Query Language). Die Architektur erlaubt remote Zugriff auf tausende Endpoints gleichzeitig, ohne dass vollständige Disk-Images erforderlich sind.
|
||||
|
||||
## Hauptmerkmale
|
||||
|
||||
- 🌐 Web-basierte Benutzeroberfläche
|
||||
- 💡 VQL – mächtige, SQL-ähnliche Abfragesprache
|
||||
- 🚀 Hochskalierbare Hunt-Funktionalität
|
||||
- 🔍 Artefaktbasierte Sammlung (ohne Full-Image)
|
||||
- 🖥️ Plattformunterstützung für Windows, macOS, Linux
|
||||
- 📦 Apache 2.0 Lizenz – Open Source
|
||||
|
||||
Weitere Infos: [velociraptor.app](https://www.velociraptor.app/)
|
||||
Projektspiegel: [raptor.cc24.dev](https://raptor.cc24.dev)
|
||||
Status: 
|
||||
|
||||
---
|
||||
|
||||
## Installation
|
||||
|
||||
### Voraussetzungen
|
||||
|
||||
- Python ≥ 3.9
|
||||
- Adminrechte auf dem System
|
||||
- Firewall-Freigaben für Webport (Standard: 8000)
|
||||
|
||||
### Installation unter Linux/macOS
|
||||
|
||||
```bash
|
||||
wget https://github.com/Velocidex/velociraptor/releases/latest/download/velociraptor
|
||||
chmod +x velociraptor
|
||||
sudo mv velociraptor /usr/local/bin/
|
||||
````
|
||||
|
||||
### Installation unter Windows
|
||||
|
||||
1. Download der `.exe` von der [Release-Seite](https://github.com/Velocidex/velociraptor/releases)
|
||||
2. Ausführung in PowerShell mit Adminrechten:
|
||||
|
||||
```powershell
|
||||
.\velociraptor.exe config generate > server.config.yaml
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Konfiguration
|
||||
|
||||
### Server Setup
|
||||
|
||||
1. Generiere die Konfigurationsdatei:
|
||||
|
||||
```bash
|
||||
velociraptor config generate > server.config.yaml
|
||||
```
|
||||
2. Starte den Server:
|
||||
|
||||
```bash
|
||||
velociraptor --config server.config.yaml frontend
|
||||
```
|
||||
3. Zugriff über Browser via `https://<hostname>:8000`
|
||||
|
||||
### Client Deployment
|
||||
|
||||
* MSI/EXE für Windows, oder `deb/rpm` für Linux
|
||||
* Unterstützt automatische Registrierung am Server
|
||||
* Deployment über GPO, Puppet, Ansible etc. möglich
|
||||
|
||||
---
|
||||
|
||||
## Verwendungsbeispiele
|
||||
|
||||
### 1. Live-Memory-Artefakte sammeln
|
||||
|
||||
```vql
|
||||
SELECT * FROM Artifact.MemoryInfo()
|
||||
```
|
||||
|
||||
### 2. Hunt starten auf verdächtige Prozesse
|
||||
|
||||
```vql
|
||||
SELECT * FROM pslist()
|
||||
WHERE Name =~ "mimikatz|cobaltstrike"
|
||||
```
|
||||
|
||||
### 3. Dateiinhalt extrahieren
|
||||
|
||||
```vql
|
||||
SELECT * FROM glob(globs="C:\\Users\\*\\AppData\\*.dat")
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Best Practices
|
||||
|
||||
* Erstelle eigene Artefakte für unternehmensspezifische Bedrohungsmodelle
|
||||
* Verwende "Notebook"-Funktion für strukturierte Analysen
|
||||
* Nutze "Labels", um Endpoints zu organisieren (z. B. `location:Berlin`)
|
||||
* Kombiniere Velociraptor mit SIEM/EDR-Systemen über REST API
|
||||
|
||||
---
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Problem: Keine Verbindung vom Client zum Server
|
||||
|
||||
**Lösung:**
|
||||
|
||||
* Ports freigegeben? (Default: 8000/tcp)
|
||||
* TLS-Zertifikate korrekt generiert?
|
||||
* `server.config.yaml` auf korrekte `public_ip` prüfen
|
||||
|
||||
### Problem: Hunt hängt in Warteschleife
|
||||
|
||||
**Lösung:**
|
||||
|
||||
* Genügend Worker-Prozesse aktiv?
|
||||
* Endpoint online?
|
||||
* `log_level` auf `debug` setzen und Log analysieren
|
||||
|
||||
---
|
||||
|
||||
## Weiterführende Themen
|
||||
|
||||
* Eigene Artefakte schreiben mit VQL
|
||||
* Integration mit ELK Stack
|
||||
* Automatisiertes Incident Response Playbook
|
||||
* Velociraptor als IR-as-a-Service einsetzen
|
||||
|
||||
---
|
||||
|
||||
🧠 **Tipp:** Die Lernkurve bei VQL ist steil – aber mit hohem ROI. Testumgebung aufsetzen und mit Community-Artefakten starten.
|
||||
|
||||
📚 Weitere Ressourcen:
|
||||
|
||||
* [Offizielle Doku](https://docs.velociraptor.app/)
|
||||
* [YouTube Channel](https://www.youtube.com/c/VelociraptorDFIR)
|
||||
* [Community auf Discord](https://www.velociraptor.app/community/)
|
||||
@@ -57,6 +57,44 @@ tools:
|
||||
accessType: download
|
||||
license: Apache-2.0
|
||||
knowledgebase: false
|
||||
- name: Thorium
|
||||
icon: ⚛️
|
||||
type: software
|
||||
description: >-
|
||||
CISAs portable Hybrid-Analyse-Tool für die schnelle Untersuchung von Windows-
|
||||
Systemen auf bösartige Aktivitäten. Scannt mit kuratierten YARA- und
|
||||
Sigma-Regeln Arbeitsspeicher, Prozesse, Dateisystem, Netzwerkverbindungen und
|
||||
Systemprotokolle. Ideal für schnelle Triage im Incident Response, sowohl live als auch
|
||||
auf gemounteten Images. Die Ausgabe erfolgt in strukturierten JSON-Reports.
|
||||
domains:
|
||||
- incident-response
|
||||
- malware-analysis
|
||||
phases:
|
||||
- examination
|
||||
- analysis
|
||||
platforms:
|
||||
- Linux
|
||||
related_software:
|
||||
- Loki
|
||||
- YARA
|
||||
- Velociraptor
|
||||
skillLevel: intermediate
|
||||
accessType: download
|
||||
url: https://github.com/cisagov/thorium
|
||||
license: MIT
|
||||
knowledgebase: false
|
||||
tags:
|
||||
- cli
|
||||
- triage
|
||||
- fast-scan
|
||||
- ioc-matching
|
||||
- yara-scan
|
||||
- sigma-rules
|
||||
- memory-analysis
|
||||
- process-analysis
|
||||
- filesystem-scanning
|
||||
- log-analysis
|
||||
- portable
|
||||
- name: Volatility 3
|
||||
type: software
|
||||
description: >-
|
||||
@@ -119,9 +157,8 @@ tools:
|
||||
Kill-Chain-Phasen. Föderierte Architektur ermöglicht selektives
|
||||
Intelligence-Sharing zwischen vertrauenswürdigen Partnern durch
|
||||
Tagging-System. Correlation-Engine findet automatisch Zusammenhänge
|
||||
zwischen scheinbar unabhängigen Incidents. ZeroMQ-Feed pusht IOCs in
|
||||
Echtzeit an Firewalls, SIEMs und Detection-Systeme für automatisierte
|
||||
Response.
|
||||
zwischen scheinbar unabhängigen Incidents. Integriert mit Firewalls ind
|
||||
SIEMS, die mit MISP-Anreicherungen gefüttert werden können.
|
||||
url: https://misp-project.org/
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
@@ -157,6 +194,7 @@ tools:
|
||||
- OpenCTI
|
||||
icon: 🌐
|
||||
projectUrl: https://misp.cc24.dev
|
||||
statusUrl: https://status.mikoshi.de/api/badge/34/status
|
||||
license: AGPL-3.0
|
||||
accessType: server-based
|
||||
knowledgebase: true
|
||||
@@ -221,18 +259,16 @@ tools:
|
||||
- name: Timesketch
|
||||
type: software
|
||||
description: >-
|
||||
Google's Collaborative Timeline-Analyse-Platform meistert Millionen von
|
||||
korrelierten Events durch hochperformante
|
||||
Elasticsearch-Backend-Architektur für Enterprise-Scale-Investigations.
|
||||
Plaso-Integration parst automatisch über 300 verschiedene Log-Formate in
|
||||
einheitliche Super-Timeline mit standardisierten Attributen. Interactive
|
||||
Timeline-Explorer mit dynamischen Heatmaps, Activity-Graphen und
|
||||
Statistical-Analysis für Advanced-Pattern-Recognition. Sigma-Rules werden
|
||||
direkt auf Timelines angewendet für Automated-Threat-Detection,
|
||||
Machine-Learning-Analyzers erkennen Login-Brute-Force, Lateral-Movement
|
||||
und Data-Exfiltration-Patterns. Collaborative-Features: Shared-Sketches,
|
||||
Analyst-Comments, Saved-Searches und narrative Stories für
|
||||
Management-Reporting.
|
||||
Google's Timeline-Analyse-Platform meistert Millionen von korrelierten
|
||||
Events durch skalierende Elasticsearch-Backend-Architektur für
|
||||
umfangreiche Zeitlinienanalysen. Plaso-Integration parst automatisch über
|
||||
300 verschiedene Log-Formate in einheitliche Timeline mit standardisierten
|
||||
Attributen. Statistische Analysen und Plugins zur Datenanreicherung wie
|
||||
maxming GeoIP und MISP sind verfügbar. Sigma-Rules werden direkt auf
|
||||
Timelines angewendet für automatisierte Detektion von Anomalien,
|
||||
Login-Brute-Force, Lateral-Movement und Data-Exfiltration-Patterns.
|
||||
Kollaborative Funktionen: Gemeinsames Bearbeiten, Analystenkommentare,
|
||||
"Stories" für Management-Berichterstattung.
|
||||
url: https://timesketch.org/
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
@@ -269,6 +305,7 @@ tools:
|
||||
- Kibana
|
||||
icon: ⏱️
|
||||
projectUrl: https://timesketch.cc24.dev
|
||||
statusUrl: https://status.mikoshi.de/api/badge/37/status
|
||||
license: Apache-2.0
|
||||
accessType: server-based
|
||||
- name: Wireshark
|
||||
@@ -922,18 +959,20 @@ tools:
|
||||
- name: Neo4j
|
||||
type: software
|
||||
description: >-
|
||||
Native Graph-Datenbank transformiert komplexe Relationship-Data in
|
||||
intuitive Visualisierungen durch Cypher-Query-Language für forensische
|
||||
Pattern-Detection. Graph-Algorithmen finden kürzeste Pfade zwischen
|
||||
Entities, Community-Detection identifiziert Fraud-Rings und
|
||||
Criminal-Networks automatisch. Visual-Graph-Explorer macht verborgene
|
||||
Multi-Hop-Connections sichtbar für Money-Laundering, Social-Engineering
|
||||
und Organized-Crime-Investigations. APOC-Bibliothek bietet 450+
|
||||
spezialisierte Procedures für Advanced-Analytics: Centrality-Measures,
|
||||
PageRank, Clustering-Coefficients. Bloom-Visualization-Tool für
|
||||
nicht-technische Stakeholder mit Point-and-Click-Exploration. Import aus
|
||||
CSV, JSON und relationalen Datenbanken, Elasticsearch-Integration für
|
||||
Hybrid-Search-Scenarios.
|
||||
Graph-Datenbank transformiert komplexe relationale Daten in intuitive
|
||||
Visualisierungen. Die SQL-ähnliche Cypher-Query-Language ist nach einer
|
||||
gewissen Lernkurve intuitiv und bietet viele Möglichkeiten.
|
||||
Cypher-Algorithmen finden kürzeste Pfade zwischen Entitäten, viele weitere
|
||||
Automatisierungen sind möglich. Die Anwendbarkeiten sind wegen der
|
||||
abstrakten Struktur von Neo4J daher unbegrenzt und in allen Domänen
|
||||
(hauptsichlich Netzwerkforensik, Finanztransaktionsanalysen,
|
||||
Kriminalermittlungen gegen organisiertes Verbrechen) zur Visualisierung
|
||||
und ggf. auch zur Analyse einsetzbar. Die APOC-Bibliothek bietet darüber
|
||||
hinaus noch zahlreiche weitere Plugins. Import aus CSV, JSON und
|
||||
relationalen Datenbanken.
|
||||
|
||||
Leider versteckt Neo4J einige seiner Funktionen mittlerweile hinter einem
|
||||
Premium-Modell und entfernt sich so vom Open-Source-Konzept.
|
||||
url: https://neo4j.com/
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
@@ -971,6 +1010,7 @@ tools:
|
||||
- Linkurious
|
||||
icon: 🕸️
|
||||
projectUrl: https://graph.cc24.dev
|
||||
statusUrl: https://status.mikoshi.de/api/badge/32/status
|
||||
license: GPL-3.0 / Commercial
|
||||
accessType: server-based
|
||||
- name: QGIS
|
||||
@@ -2141,23 +2181,25 @@ tools:
|
||||
related_concepts:
|
||||
- Digital Evidence Chain of Custody
|
||||
- name: Aftermath
|
||||
icon: 🎯
|
||||
type: software
|
||||
description: >-
|
||||
Jamfs Open-Source-Juwel für macOS-Forensik sammelt systematisch Artefakte
|
||||
ohne Full-System-Image. Optimiert für Incident-Response mit minimalem
|
||||
System-Impact. Extrahiert kritische Daten: laufende Prozesse, Netzwerk-
|
||||
verbindungen, installierte Software, Persistence-Mechanismen. Besonders
|
||||
wertvoll: Unified-Log-Parser für System-Events, Browser-Artefakte aller
|
||||
Major-Browser, Quick-Look-Thumbnails, FSEvents für Dateiaktivitäten. Die
|
||||
modulare Architektur erlaubt selektive Sammlung. Output in strukturierten
|
||||
JSON/CSV für einfache Analyse. Zeitstempel-Normalisierung für
|
||||
Timeline-Erstellung. Unterstützt moderne macOS-Security-Features:
|
||||
TCC-Permissions, Code-Signing-Status, XProtect-Matches. Die Remote-
|
||||
Collection via MDM/SSH skaliert auf Unternehmensflotten. Besonders clever:
|
||||
Sammlung von Cloud-Synchronisations-Artefakten (iCloud, Dropbox).
|
||||
Regelmäßige Updates für neue macOS-Versionen. Die Alternative zu teuren
|
||||
kommerziellen Mac-Forensik-Suiten.
|
||||
Jamfs Open-Source-Software für macOS-Forensik sammelt systematisch
|
||||
Artefakte, ohne zuvor ein Full-System-Image zu ziehen. Optimiert für
|
||||
Incident-Response mit minimalem Systemeingriff. Extrahiert kritische
|
||||
Daten: laufende Prozesse, Netzwerkverbindungen, installierte Software,
|
||||
Persistenzmechanismen. Besonders wertvoll: Unified-Log-Parser für
|
||||
System-Events, Browser-Artefakte aller größeren Browser,
|
||||
Quick-Look-Thumbnails, FSEvents für Dateiaktivitäten. Die modulare
|
||||
Architektur erlaubt selektive Sammlung. Output in strukturierten JSON/CSV
|
||||
für einfache Analyse. Zeitstempel-Normalisierung für Timeline-Erstellung.
|
||||
Unterstützt moderne macOS-Sicherheitsfeatures: TCC-Permissions,
|
||||
Code-Signing-Status, XProtect-Matches. Die Remote-Collection via MDM/SSH
|
||||
skaliert auf Unternehmensflotten. Besonders clever: Sammlung von
|
||||
Cloud-Synchronisations-Artefakten (iCloud, Dropbox). Regelmäßige Updates
|
||||
für neue macOS-Versionen. Die Alternative zu teuren kommerziellen
|
||||
Mac-Forensik-Suiten.
|
||||
url: https://github.com/jamf/aftermath/
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
- incident-response
|
||||
- static-investigations
|
||||
@@ -2167,14 +2209,6 @@ tools:
|
||||
- examination
|
||||
platforms:
|
||||
- macOS
|
||||
related_software:
|
||||
- osquery
|
||||
- KAPE
|
||||
skillLevel: intermediate
|
||||
accessType: download
|
||||
url: https://github.com/jamf/aftermath/
|
||||
license: Apache-2.0
|
||||
knowledgebase: false
|
||||
tags:
|
||||
- cli
|
||||
- triage
|
||||
@@ -2190,6 +2224,12 @@ tools:
|
||||
- json-export
|
||||
related_concepts:
|
||||
- Digital Evidence Chain of Custody
|
||||
related_software:
|
||||
- osquery
|
||||
- KAPE
|
||||
icon: 🎯
|
||||
license: Apache-2.0
|
||||
accessType: download
|
||||
- name: RegRipper
|
||||
type: software
|
||||
description: >-
|
||||
@@ -2280,17 +2320,15 @@ tools:
|
||||
- name: PhotoRec
|
||||
type: software
|
||||
description: >-
|
||||
Signature-Based File-Carving-Tool rekonstruiert gelöschte Files durch
|
||||
Signatur-basiertes File-Carving-Tool rekonstruiert gelöschte Daten durch
|
||||
Header/Footer-Pattern-Matching unabhängig vom Dateisystem-Zustand oder
|
||||
Partition-Table-Corruption. Unterstützt über 300 File-Formats: Images
|
||||
(JPEG, PNG, TIFF), Documents (PDF, DOC, XLS), Archives (ZIP, RAR), Videos
|
||||
(AVI, MP4) und Custom-Signatures. Read-Only-Operation gewährleistet
|
||||
forensische Evidence-Integrity, funktioniert bei beschädigten,
|
||||
formatierten oder korrupten Dateisystemen. Paranoid-Mode scannt jeden
|
||||
einzelnen Sektor für Maximum-Recovery-Rate bei fragmentierten Files.
|
||||
Konfigurierbare File-Extensions und Custom-Signature-Development für
|
||||
proprietäre Formats. Companion-Software TestDisk repariert
|
||||
Partition-Tables und Boot-Sectors für Filesystem-Recovery-Scenarios.
|
||||
Korruption des Dateisystems. Unterstützt über 300 Datei-Formate: Bilder
|
||||
(JPEG, PNG, TIFF), Dokumente (PDF, DOC, XLS), Archive (ZIP, RAR), Videos
|
||||
(AVI, MP4) und selbstdefinierte Dateisignaturen. Read-Only gewährleistet
|
||||
forensische Integrität, funktioniert bei beschädigten, formatierten oder
|
||||
korrupten Dateisystemen. Paranoid-Mode scannt jeden einzelnen Sektor für
|
||||
maximale Anzahl wiederhergestellter Daten. Integrierbar mit Software wie
|
||||
TestDisk.
|
||||
url: https://www.cgsecurity.org/wiki/PhotoRec
|
||||
skillLevel: beginner
|
||||
domains:
|
||||
@@ -2299,6 +2337,7 @@ tools:
|
||||
- fraud-investigation
|
||||
phases:
|
||||
- examination
|
||||
- data-collection
|
||||
platforms:
|
||||
- Windows
|
||||
- Linux
|
||||
@@ -2689,42 +2728,6 @@ tools:
|
||||
icon: 🔍
|
||||
license: Proprietary
|
||||
accessType: commercial
|
||||
- name: FRED
|
||||
type: software
|
||||
description: >-
|
||||
Hardware-Forensik-Workstation ermöglicht simultanes Imaging von 8
|
||||
Evidenzen durch Hot-Swap-UltraBay
|
||||
und integrierte Write-Blocker für SATA/IDE/USB/FireWire. Hardware-Hash-Acceleration beschleunigt
|
||||
MD5/SHA-Verifizierung, Touchscreen-Konsole steuert Parallel-Processing ohne Host-System-Belastung.
|
||||
Field-Kit-Version mit 4-Bay-Kapazität für Vor-Ort-Akquisition, modulares Design erlaubt
|
||||
RAID-Controller-Upgrades für NAS-Forensik.
|
||||
url: https://www.digitalintelligence.com/products/fred/
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
- static-investigations
|
||||
- incident-response
|
||||
phases:
|
||||
- data-collection
|
||||
platforms:
|
||||
- Hardware
|
||||
tags:
|
||||
- gui
|
||||
- commercial
|
||||
- write-blocker
|
||||
- physical-copy
|
||||
- scenario:disk_imaging
|
||||
- multithreaded
|
||||
- hardware-solution
|
||||
- hot-swap
|
||||
- raid-recovery
|
||||
- parallel-imaging
|
||||
- touch-control
|
||||
- lab-equipment
|
||||
related_concepts:
|
||||
- Digital Evidence Chain of Custody
|
||||
icon: 🖥️
|
||||
license: Proprietary
|
||||
accessType: commercial
|
||||
- name: GraphSense
|
||||
icon: 📊
|
||||
type: software
|
||||
@@ -3388,26 +3391,25 @@ tools:
|
||||
description: >-
|
||||
Die kommerzielle Blockchain-Analytics-Plattform konkurriert mit
|
||||
Chainalysis durch erweiterte Compliance-Features und RegTech-Integration.
|
||||
Clustering- Algorithmen identifizieren Services durch
|
||||
Transaction-Pattern-Analysis: Exchanges, Darknet-Markets, Mixers,
|
||||
Ransomware-Wallets. Die Compliance- Suite bietet Real-Time-Screening gegen
|
||||
OFAC/EU-Sanctions-Listen. Besonders stark: DeFi-Protocol-Analysis
|
||||
dekodiert Smart-Contract- Interactions, Cross-Chain-Tracking folgt Funds
|
||||
über Bridges, Investigation-Tools für Complex-Money-Laundering-Schemes.
|
||||
API-Integration ermöglicht Automated-AML-Workflows. Die Typology-Library
|
||||
kategorisiert Verdachtsmuster nach FATF-Standards. Court-Ready-Reports
|
||||
mit Blockchain- Evidence-Chain. Training-Programme zertifizieren
|
||||
Investigators. Unterstützt Bitcoin, Ethereum, und 15+ andere Blockchains.
|
||||
Enterprise- Deployment für Banken, Exchanges und Strafverfolgung. Der
|
||||
Clustering-Algorithmen identifizieren Dienstleister durch
|
||||
Transaktionsmusteranalyse: Exchanges, Darknet-Markets, Mixer,
|
||||
Ransomware-Wallets. Die Compliance-Suite bietet Echtzeitüberwachung von
|
||||
OFAC/EU-Sanktionslisten. Besonders stark: DeFi-Protokollanalyse dekodiert
|
||||
Smart-Contract-Interaktionen, Cross-Chain-Tracking folgt Geldern über
|
||||
verschiedene Blockchains hinweg, Ermittlungswerkzeuge für komplexe
|
||||
Geldwäsche-Schemata. API-Integration ermöglicht programmatische
|
||||
Integration. Unterstützt Bitcoin, Ethereum, und 15+ andere Blockchains.
|
||||
Enterprise-Deployment für Banken, Exchanges und Strafverfolgung. Der
|
||||
europäische Fokus macht es zur Alternative für EU-basierte
|
||||
Organisationen.
|
||||
skillLevel: intermediate
|
||||
url: https://www.elliptic.co
|
||||
icon: ₿
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
- fraud-investigation
|
||||
phases:
|
||||
- analysis
|
||||
platforms:
|
||||
- Web
|
||||
tags:
|
||||
- blockchain-analysis
|
||||
- compliance-screening
|
||||
@@ -3416,11 +3418,8 @@ tools:
|
||||
- cross-chain-tracking
|
||||
- aml-workflows
|
||||
- court-reporting
|
||||
platforms:
|
||||
- Web
|
||||
accessType: cloud
|
||||
license: Subscription
|
||||
knowledgebase: false
|
||||
icon: ₿
|
||||
license: Proprietary
|
||||
- name: FACT
|
||||
type: software
|
||||
description: >-
|
||||
@@ -4329,13 +4328,13 @@ tools:
|
||||
- name: ADB
|
||||
type: software
|
||||
description: >-
|
||||
Kommuniziert mit Android-Geräten für forensische Datenextraktion über USB
|
||||
oder Netzwerk ohne Root-Zugriff. Erstellt logische Backups von App-Daten,
|
||||
installiert forensische Analysewerkzeuge, erfasst Live-Logcats für
|
||||
Incident-Response. Port-Weiterleitung ermöglicht sichere Remote-Analyse.
|
||||
File-Transfer-Funktionen extrahieren Beweise direkt vom Gerät.
|
||||
Shell-Access für erweiterte Forensik-Kommandos. Unverzichtbar für
|
||||
Mobile-Incident-Response und App-Entwicklungs-Forensik.
|
||||
Die "Android Debug Bridge" ist grundsätzlich ein Werkzeug für
|
||||
Android-Entwickler, wird aber auch gern in der Mobile-Forensik genutzt.
|
||||
|
||||
Sie ermöglicht bei Android-Geräten forensische Datenextraktion über USB
|
||||
oder Netzwerk teilweise ohne Root-Zugriff, besonders einfach bei älteren
|
||||
Geräten. Erstellt logische Backups von App-Daten, installiert forensische
|
||||
Analysewerkzeuge.
|
||||
url: https://developer.android.com/tools/adb
|
||||
skillLevel: intermediate
|
||||
domains:
|
||||
@@ -4523,8 +4522,8 @@ tools:
|
||||
Deauth-Frames für Handshake-Erfassung. WEP-Schlüssel-Rekonstruktion in
|
||||
Minuten, WPA2-PSK-Recovery mit Dictionary-Angriffen.
|
||||
Rogue-Access-Point-Erkennung und Client-Probing-Analyse für
|
||||
Bewegungsprofile. GPU-Beschleunigung via hashcat für moderne
|
||||
Verschlüsselungsstandards.
|
||||
Bewegungsprofile. Ein sehr etabliertes Tool, das immer noch seine Relevanz
|
||||
vor allem auch im Pentesting besitzt.
|
||||
url: https://www.aircrack-ng.org/
|
||||
skillLevel: advanced
|
||||
domains:
|
||||
@@ -7160,6 +7159,97 @@ tools:
|
||||
- kernel-analysis
|
||||
related_concepts:
|
||||
- Memory Forensics & Process Analysis
|
||||
- name: ChipWhisperer
|
||||
type: software
|
||||
description: >-
|
||||
Hardware-Sicherheitsanalyse-Plattform für Firmware-Extraktion aus
|
||||
eingebetteten Systemen durch Stromverbrauchsanalysen. Automatisierte
|
||||
Differential-Power-Analysis (DPA) und Correlation-Power-Analysis (CPA)
|
||||
brechen AES-Implementierungen und extrahieren Verschlüsselungsschlüssel
|
||||
aus Mikrocontrollern. Fehlereinschleusung umgeht Bootloader-Überprüfung
|
||||
und Secure-Boot-Mechanismen. Besonders wertvoll für IoT-Geräte-Forensik:
|
||||
Umgehung von Hardware-Security-Modulen, Clock-Glitching für
|
||||
Code-Ausführungs-Übernahme, Spannungsfehler für
|
||||
Authentifizierungs-Umgehung. Python-API automatisiert Angriffsszenarien,
|
||||
CW-Lite/Pro-Hardware skaliert von Hobby bis professionelle
|
||||
Penetrationstests. Standardplattform für Hardware-Hacking und eingebettete
|
||||
Systemforensik.
|
||||
url: https://www.newae.com/chipwhisperer
|
||||
skillLevel: expert
|
||||
domains:
|
||||
- ics-forensics
|
||||
- static-investigations
|
||||
phases:
|
||||
- data-collection
|
||||
- analysis
|
||||
platforms:
|
||||
- Windows
|
||||
- Linux
|
||||
- macOS
|
||||
tags:
|
||||
- hardware-analysis
|
||||
- side-channel-attack
|
||||
- power-analysis
|
||||
- fault-injection
|
||||
- embedded-security
|
||||
- firmware-extraction
|
||||
- iot-forensics
|
||||
- hardware-hacking
|
||||
- encryption-bypass
|
||||
- python-api
|
||||
related_concepts:
|
||||
- Hash Functions & Digital Signatures
|
||||
related_software:
|
||||
- Binwalk
|
||||
- Ghidra
|
||||
- ICSpector
|
||||
icon: 🫓
|
||||
license: GPL-3.0
|
||||
accessType: download
|
||||
- name: JTAG-Analyse
|
||||
type: method
|
||||
description: >-
|
||||
Direkter Hardware-Schnittstellenzugriff auf eingebettete Systeme über
|
||||
Joint Test Action Group Debug-Schnittstelle für Firmware-Extraktion und
|
||||
Systemanalyse. Boundary-Scan-Verfahren identifiziert verfügbare JTAG-Pins
|
||||
auch bei undokumentierten Geräten durch systematische Pin-Tests.
|
||||
Flash-Speicher-Abzüge umgehen Software-Schutzmaßnahmen und extrahieren
|
||||
komplette Firmware-Abbilder inklusive verschlüsselter Bereiche.
|
||||
|
||||
Debug-Port-Ausnutzung ermöglicht Live-Speicherzugriff,
|
||||
Register-Manipulation und Code-Injection in laufende Systeme. Besonders
|
||||
kritisch für IoT-Forensik: Router-Hintertüren, intelligente
|
||||
Geräte-Manipulationen, Industriesteuerungsanlagen-Kompromittierungen.
|
||||
Kombiniert mit Chip-Off-Techniken für maximale Datenwiederherstellung bei
|
||||
sicherheitsgehärteten Geräten. Standard-Methodik für Hardware-Forensik.
|
||||
url: https://www.jtag.com/what-is-jtag-testing-of-electronics-tutorial/#
|
||||
skillLevel: expert
|
||||
domains:
|
||||
- ics-forensics
|
||||
- mobile-forensics
|
||||
- static-investigations
|
||||
phases:
|
||||
- data-collection
|
||||
- examination
|
||||
tags:
|
||||
- hardware-interface
|
||||
- firmware-extraction
|
||||
- debug-access
|
||||
- boundary-scan
|
||||
- embedded-analysis
|
||||
- iot-forensics
|
||||
- flash-memory
|
||||
- system-exploitation
|
||||
- hardware-forensics
|
||||
- pin-identification
|
||||
related_concepts:
|
||||
- Digital Evidence Chain of Custody
|
||||
related_software:
|
||||
- ChipWhisperer
|
||||
- Binwalk
|
||||
- OpenOCD
|
||||
icon: 💳
|
||||
knowledgebase: true
|
||||
domains:
|
||||
- id: incident-response
|
||||
name: Incident Response & Breach-Untersuchung
|
||||
@@ -7228,3 +7318,4 @@ scenarios:
|
||||
- id: scenario:windows-registry
|
||||
icon: 📜
|
||||
friendly_name: Windows Registry analysieren
|
||||
skill_levels: {}
|
||||
|
||||
@@ -37,7 +37,6 @@ const { title, description = 'ForensicPathways - A comprehensive directory of di
|
||||
} catch (error) {
|
||||
console.error('Failed to load utility functions:', error);
|
||||
|
||||
// Provide fallback implementations
|
||||
(window as any).createToolSlug = (toolName: string) => {
|
||||
if (!toolName || typeof toolName !== 'string') return '';
|
||||
return toolName.toLowerCase().replace(/[^a-z0-9\s-]/g, '').replace(/\s+/g, '-').replace(/-+/g, '-').replace(/^-|-$/g, '');
|
||||
@@ -119,7 +118,6 @@ const { title, description = 'ForensicPathways - A comprehensive directory of di
|
||||
(window as any).prioritizeSearchResults = prioritizeSearchResults;
|
||||
|
||||
document.addEventListener('DOMContentLoaded', async () => {
|
||||
// CRITICAL: Load utility functions FIRST before any URL handling
|
||||
await loadUtilityFunctions();
|
||||
|
||||
const THEME_KEY = 'dfir-theme';
|
||||
@@ -173,32 +171,31 @@ const { title, description = 'ForensicPathways - A comprehensive directory of di
|
||||
getStoredTheme
|
||||
};
|
||||
|
||||
(window as any).showToolDetails = function(toolName: string, modalType: string = 'primary') {
|
||||
|
||||
let attempts = 0;
|
||||
const maxAttempts = 50;
|
||||
|
||||
const tryDelegate = () => {
|
||||
const matrixShowToolDetails = (window as any).matrixShowToolDetails;
|
||||
(window as any).showToolDetails = function(toolName: string, modalType: string = 'primary') {
|
||||
let attempts = 0;
|
||||
const maxAttempts = 50;
|
||||
|
||||
if (matrixShowToolDetails && typeof matrixShowToolDetails === 'function') {
|
||||
return matrixShowToolDetails(toolName, modalType);
|
||||
}
|
||||
const tryDelegate = () => {
|
||||
const matrixShowToolDetails = (window as any).matrixShowToolDetails;
|
||||
|
||||
if (matrixShowToolDetails && typeof matrixShowToolDetails === 'function') {
|
||||
return matrixShowToolDetails(toolName, modalType);
|
||||
}
|
||||
|
||||
const directShowToolDetails = (window as any).directShowToolDetails;
|
||||
if (directShowToolDetails && typeof directShowToolDetails === 'function') {
|
||||
return directShowToolDetails(toolName, modalType);
|
||||
}
|
||||
|
||||
attempts++;
|
||||
if (attempts < maxAttempts) {
|
||||
setTimeout(tryDelegate, 100);
|
||||
} else {
|
||||
}
|
||||
};
|
||||
|
||||
const directShowToolDetails = (window as any).directShowToolDetails;
|
||||
if (directShowToolDetails && typeof directShowToolDetails === 'function') {
|
||||
return directShowToolDetails(toolName, modalType);
|
||||
}
|
||||
|
||||
attempts++;
|
||||
if (attempts < maxAttempts) {
|
||||
setTimeout(tryDelegate, 100);
|
||||
} else {
|
||||
}
|
||||
tryDelegate();
|
||||
};
|
||||
|
||||
tryDelegate();
|
||||
};
|
||||
|
||||
(window as any).hideToolDetails = function(modalType: string = 'both') {
|
||||
const matrixHideToolDetails = (window as any).matrixHideToolDetails;
|
||||
@@ -229,7 +226,7 @@ const { title, description = 'ForensicPathways - A comprehensive directory of di
|
||||
authRequired: data.aiAuthRequired,
|
||||
expires: data.expires
|
||||
};
|
||||
case 'gatedcontent': // ADD THIS CASE
|
||||
case 'gatedcontent':
|
||||
return {
|
||||
authenticated: data.gatedContentAuthenticated,
|
||||
authRequired: data.gatedContentAuthRequired,
|
||||
@@ -353,6 +350,29 @@ const { title, description = 'ForensicPathways - A comprehensive directory of di
|
||||
};
|
||||
initAIButton();
|
||||
});
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
const isFirefox = navigator.userAgent.toLowerCase().includes('firefox') ||
|
||||
navigator.userAgent.toLowerCase().includes('librewolf');
|
||||
|
||||
if (isFirefox) {
|
||||
console.log('[VIDEO] Firefox detected - setting up error recovery');
|
||||
|
||||
document.querySelectorAll('video').forEach(video => {
|
||||
let errorCount = 0;
|
||||
|
||||
video.addEventListener('error', () => {
|
||||
errorCount++;
|
||||
console.log(`[VIDEO] Error ${errorCount} in Firefox for: ${video.getAttribute('data-video-title')}`);
|
||||
|
||||
});
|
||||
|
||||
video.addEventListener('loadedmetadata', () => {
|
||||
const title = video.getAttribute('data-video-title') || 'Video';
|
||||
console.log(`[VIDEO] Successfully loaded: ${title}`);
|
||||
});
|
||||
});
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
@@ -184,7 +184,7 @@ import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
|
||||
<div style="display: grid; gap: 1.25rem;">
|
||||
<div style="background-color: var(--color-bg-secondary); padding: 1.25rem; border-radius: 0.5rem;">
|
||||
<h4 style="margin: 0 0 0.5rem 0; color: var(--color-accent);">🔍 Vorschläge</h4>
|
||||
<h4 style="margin: 0 0 0.5rem 0; color: var(--color-accent);">📝 Vorschläge</h4>
|
||||
<p style="margin: 0;">
|
||||
Du hast eine Idee, wie wir den Hub erweitern können? Reiche deinen Vorschlag unkompliziert
|
||||
über unsere <a href="/contribute#vorschlaege">/contribute</a>-Seite ein.
|
||||
@@ -210,15 +210,54 @@ import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
<svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
|
||||
<path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z" />
|
||||
</svg>
|
||||
Git‑Repository besuchen
|
||||
Git-Repository besuchen
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Lightning Support Section with simple-boost integration -->
|
||||
<div style="background-color: var(--color-bg-secondary); padding: 1.25rem; border-radius: 0.5rem;">
|
||||
<h4 style="margin: 0 0 0.5rem 0; color: var(--color-accent);">⚡ Unterstützung</h4>
|
||||
<p style="margin: 0;">
|
||||
Kleine Spenden zur Infrastruktur-Finanzierung nehme ich auch gerne an, wenn es sein muss.
|
||||
Fragt einfach nach der Lightning-Adresse oder BTC-Adresse!
|
||||
<h4 style="margin: 0 0 0.75rem 0; color: var(--color-accent); display: flex; align-items: center; gap: 0.5rem;">
|
||||
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
|
||||
<polygon points="13,2 3,14 12,14 11,22 21,10 12,10 13,2"/>
|
||||
</svg>
|
||||
⚡ Unterstützung
|
||||
</h4>
|
||||
<p style="margin: 0 0 1rem 0; font-size: 0.875rem; line-height: 1.5;">
|
||||
Kleine Spenden zur Server-Finanzierung sind willkommen.
|
||||
</p>
|
||||
|
||||
<div style="margin-bottom: 1rem;">
|
||||
<!-- Simple EUR Payment -->
|
||||
<div style="display: flex; gap: 0.75rem; align-items: center; justify-content: center; max-width: 300px; margin: 0 auto;">
|
||||
<input
|
||||
type="number"
|
||||
id="eur-amount"
|
||||
min="0.01"
|
||||
step="0.01"
|
||||
placeholder="0,50"
|
||||
value="0.5"
|
||||
style="width: 80px; padding: 0.5rem; border: 1px solid var(--color-border); border-radius: 0.375rem; font-size: 0.875rem; text-align: center;">
|
||||
<span style="font-size: 0.875rem; color: var(--color-text-secondary);">€</span>
|
||||
<simple-boost
|
||||
id="eur-boost"
|
||||
class="bootstrap"
|
||||
nwc="nostr+walletconnect://4fe05896e1faf09d1902ea24ef589f65a9606d1710420a9574ce331e3c7f486b?relay=wss://nostr.mikoshi.de&secret=bdfc861fe71e8d9e375b7a2484052e92def7caf4b317d8f6537b784d3cd6eb3b"
|
||||
amount="0.5"
|
||||
currency="eur"
|
||||
memo="ForensicPathways Unterstützung - Vielen Dank!"
|
||||
style="background-color: var(--color-accent); color: white; border: none; border-radius: 0.375rem; padding: 0.5rem 1rem; font-size: 0.875rem; cursor: pointer;">
|
||||
⚡ Senden
|
||||
</simple-boost>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style="margin-top: 1rem; padding: 0.75rem; background-color: var(--color-bg); border-radius: 0.375rem; border-left: 3px solid var(--color-accent);">
|
||||
<p style="margin: 0; font-size: 0.75rem; color: var(--color-text-secondary); line-height: 1.4; text-align: center;">
|
||||
<strong>⚡ Lightning-Unterstützung:</strong> Betrag eingeben und senden.
|
||||
Benötigt eine Lightning-Wallet wie <a href="https://getalby.com" target="_blank" rel="noopener" style="color: var(--color-accent);">Alby</a> oder
|
||||
<a href="https://phoenix.acinq.co" target="_blank" rel="noopener" style="color: var(--color-accent);">Phoenix</a>.
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -231,4 +270,70 @@ import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
</p>
|
||||
</div>
|
||||
</section>
|
||||
</BaseLayout>
|
||||
</BaseLayout>
|
||||
|
||||
<script>
|
||||
// TODO: cleanup
|
||||
import('simple-boost').then(() => {
|
||||
console.log('Simple-boost loaded successfully from local dependencies');
|
||||
|
||||
setupDynamicAmounts();
|
||||
}).catch(error => {
|
||||
console.error('Failed to load simple-boost:', error);
|
||||
const script = document.createElement('script');
|
||||
script.type = 'module';
|
||||
script.src = '/node_modules/simple-boost/dist/simple-boost.js';
|
||||
script.onload = () => {
|
||||
console.log('Simple-boost fallback loaded');
|
||||
setupDynamicAmounts();
|
||||
};
|
||||
script.onerror = () => console.error('Simple-boost fallback failed');
|
||||
document.head.appendChild(script);
|
||||
});
|
||||
|
||||
function setupDynamicAmounts() {
|
||||
const eurBoost = document.getElementById('eur-boost');
|
||||
const eurInput = document.getElementById('eur-amount') as HTMLInputElement;
|
||||
|
||||
if (eurBoost && eurInput) {
|
||||
eurBoost.addEventListener('click', (e) => {
|
||||
const amount = parseFloat(eurInput.value) || 0.5;
|
||||
eurBoost.setAttribute('amount', amount.toString());
|
||||
console.log('EUR amount set to:', amount);
|
||||
});
|
||||
|
||||
eurInput.addEventListener('input', () => {
|
||||
const amount = parseFloat(eurInput.value) || 0.5;
|
||||
eurBoost.setAttribute('amount', amount.toString());
|
||||
});
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<style>
|
||||
simple-boost {
|
||||
--simple-boost-primary: var(--color-warning);
|
||||
--simple-boost-primary-hover: var(--color-accent);
|
||||
--simple-boost-text: white;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
simple-boost:hover {
|
||||
transform: translateY(-1px);
|
||||
box-shadow: 0 4px 8px rgba(0,0,0,0.15) !important;
|
||||
}
|
||||
|
||||
simple-boost .simple-boost-button {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-family: inherit;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
/* Loading state styling */
|
||||
simple-boost[loading] {
|
||||
opacity: 0.7;
|
||||
cursor: not-allowed;
|
||||
}
|
||||
</style>
|
||||
@@ -1,16 +1,18 @@
|
||||
// src/pages/api/ai/embeddings-status.ts
|
||||
import type { APIRoute } from 'astro';
|
||||
import { embeddingsService } from '../../../utils/embeddings.js';
|
||||
|
||||
export const prerender = false;
|
||||
|
||||
export const GET: APIRoute = async () => {
|
||||
try {
|
||||
const { embeddingsService } = await import('../../../utils/embeddings.js');
|
||||
await embeddingsService.waitForInitialization();
|
||||
|
||||
const stats = embeddingsService.getStats();
|
||||
const status = stats.enabled && stats.initialized ? 'ready' :
|
||||
stats.enabled && !stats.initialized ? 'initializing' : 'disabled';
|
||||
const status = stats.initialized ? 'ready' :
|
||||
!stats.initialized ? 'initializing' : 'disabled';
|
||||
|
||||
console.log(`[EMBEDDINGS-STATUS-API] Service status: ${status}, stats:`, stats);
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
@@ -23,6 +25,8 @@ export const GET: APIRoute = async () => {
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error('[EMBEDDINGS-STATUS-API] Error checking embeddings status:', error);
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: false,
|
||||
embeddings: { enabled: false, initialized: false, count: 0 },
|
||||
|
||||
@@ -1,28 +1,57 @@
|
||||
// src/pages/api/ai/enhance-input.ts - Enhanced AI service compatibility
|
||||
|
||||
// src/pages/api/ai/enhance-input.ts
|
||||
import type { APIRoute } from 'astro';
|
||||
import { withAPIAuth } from '../../../utils/auth.js';
|
||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||
import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js';
|
||||
import { aiService } from '../../../utils/aiService.js';
|
||||
import { JSONParser } from '../../../utils/jsonUtils.js';
|
||||
import { getPrompt } from '../../../config/prompts.js';
|
||||
|
||||
export const prerender = false;
|
||||
|
||||
function getEnv(key: string): string {
|
||||
const value = process.env[key];
|
||||
if (!value) {
|
||||
throw new Error(`Missing environment variable: ${key}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
const RATE_LIMIT_WINDOW_MS =
|
||||
Number.isFinite(parseInt(process.env.RATE_LIMIT_WINDOW_MS ?? '', 10))
|
||||
? parseInt(process.env.RATE_LIMIT_WINDOW_MS!, 10)
|
||||
: 60_000;
|
||||
|
||||
const AI_ENDPOINT = getEnv('AI_ANALYZER_ENDPOINT');
|
||||
const AI_ANALYZER_API_KEY = getEnv('AI_ANALYZER_API_KEY');
|
||||
const AI_ANALYZER_MODEL = getEnv('AI_ANALYZER_MODEL');
|
||||
const RATE_LIMIT_MAX =
|
||||
Number.isFinite(parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS ?? '', 10))
|
||||
? parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS!, 10)
|
||||
: 5;
|
||||
|
||||
const INPUT_MIN_CHARS = 40;
|
||||
const INPUT_MAX_CHARS = 1000;
|
||||
const Q_MIN_LEN = 15;
|
||||
const Q_MAX_LEN = 160;
|
||||
const Q_MAX_COUNT = 3;
|
||||
const AI_TEMPERATURE = 0.3;
|
||||
const CLEANER_TEMPERATURE = 0.0;
|
||||
|
||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
||||
const RATE_LIMIT_WINDOW = 60 * 1000;
|
||||
const RATE_LIMIT_MAX = 5;
|
||||
|
||||
function checkRateLimit(userId: string): boolean {
|
||||
const now = Date.now();
|
||||
const entry = rateLimitStore.get(userId);
|
||||
if (!entry || now > entry.resetTime) {
|
||||
rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW_MS });
|
||||
return true;
|
||||
}
|
||||
if (entry.count >= RATE_LIMIT_MAX) return false;
|
||||
entry.count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
function cleanupExpiredRateLimits(): void {
|
||||
const now = Date.now();
|
||||
for (const [userId, entry] of rateLimitStore.entries()) {
|
||||
if (now > entry.resetTime) rateLimitStore.delete(userId);
|
||||
}
|
||||
}
|
||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||
|
||||
/**
|
||||
* Helpers
|
||||
*/
|
||||
function sanitizeInput(input: string): string {
|
||||
return input
|
||||
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
|
||||
@@ -30,112 +59,24 @@ function sanitizeInput(input: string): string {
|
||||
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
||||
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
||||
.trim()
|
||||
.slice(0, 1000);
|
||||
.slice(0, INPUT_MAX_CHARS);
|
||||
}
|
||||
|
||||
function checkRateLimit(userId: string): boolean {
|
||||
const now = Date.now();
|
||||
const userLimit = rateLimitStore.get(userId);
|
||||
|
||||
if (!userLimit || now > userLimit.resetTime) {
|
||||
rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
|
||||
return true;
|
||||
}
|
||||
|
||||
if (userLimit.count >= RATE_LIMIT_MAX) {
|
||||
return false;
|
||||
}
|
||||
|
||||
userLimit.count++;
|
||||
return true;
|
||||
}
|
||||
|
||||
function cleanupExpiredRateLimits() {
|
||||
const now = Date.now();
|
||||
for (const [userId, limit] of rateLimitStore.entries()) {
|
||||
if (now > limit.resetTime) {
|
||||
rateLimitStore.delete(userId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||
|
||||
function createEnhancementPrompt(input: string): string {
|
||||
return `Sie sind ein DFIR-Experte mit Spezialisierung auf forensische Methodik. Ein Nutzer beschreibt ein forensisches Szenario oder Problem. Analysieren Sie die Eingabe auf Vollständigkeit für eine wissenschaftlich fundierte forensische Untersuchung.
|
||||
|
||||
ANALYSIEREN SIE DIESE FORENSISCHEN KATEGORIEN:
|
||||
1. **Incident Context**: Was ist passiert? Welche Angriffsvektoren oder technischen Probleme liegen vor?
|
||||
2. **Affected Systems**: Welche spezifischen Technologien/Plattformen sind betroffen? (Windows/Linux/ICS/SCADA/Mobile/Cloud/Network Infrastructure)
|
||||
3. **Available Evidence**: Welche forensischen Datenquellen stehen zur Verfügung? (RAM-Dumps, Disk-Images, Log-Files, Network-Captures, Registry-Hives)
|
||||
4. **Investigation Objectives**: Was soll erreicht werden? (IOC-Extraktion, Timeline-Rekonstruktion, Attribution, Impact-Assessment)
|
||||
5. **Timeline Constraints**: Wie zeitkritisch ist die Untersuchung?
|
||||
6. **Legal & Compliance**: Rechtliche Anforderungen, Chain of Custody, Compliance-Rahmen (DSGVO, sector-specific regulations)
|
||||
7. **Technical Constraints**: Verfügbare Ressourcen, Skills, Infrastrukturbeschränkungen
|
||||
|
||||
WENN die Beschreibung alle kritischen forensischen Aspekte abdeckt: Geben Sie eine leere Liste [] zurück.
|
||||
|
||||
WENN wichtige forensische Details fehlen: Formulieren Sie 2-3 präzise Fragen, die die kritischsten Lücken für eine wissenschaftlich fundierte forensische Analyse schließen.
|
||||
|
||||
QUALITÄTSKRITERIEN FÜR FRAGEN:
|
||||
- Forensisch spezifisch, nicht allgemein (NICHT: "Mehr Details?")
|
||||
- Methodisch relevant (NICHT: "Wann passierte das?")
|
||||
- Priorisiert nach Auswirkung auf die forensische Untersuchungsqualität
|
||||
- Die Frage soll maximal 20 Wörter umfassen
|
||||
|
||||
ANTWORTFORMAT (NUR JSON, KEIN ZUSÄTZLICHER TEXT):
|
||||
[
|
||||
"spezifische Frage 1?",
|
||||
"spezifische Frage 2?",
|
||||
"spezifische Frage 3?"
|
||||
]
|
||||
|
||||
NUTZER-EINGABE:
|
||||
${input}
|
||||
`.trim();
|
||||
}
|
||||
|
||||
async function callAIService(prompt: string): Promise<Response> {
|
||||
const endpoint = AI_ENDPOINT;
|
||||
const apiKey = AI_ANALYZER_API_KEY;
|
||||
const model = AI_ANALYZER_MODEL;
|
||||
|
||||
let headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
|
||||
if (apiKey) {
|
||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
||||
console.log('[ENHANCE API] Using API key authentication');
|
||||
} else {
|
||||
console.log('[ENHANCE API] No API key - making request without authentication');
|
||||
}
|
||||
|
||||
const requestBody = {
|
||||
model,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
max_tokens: 300,
|
||||
temperature: 0.7,
|
||||
top_p: 0.9,
|
||||
frequency_penalty: 0.2,
|
||||
presence_penalty: 0.1
|
||||
};
|
||||
|
||||
return fetch(`${endpoint}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(requestBody)
|
||||
});
|
||||
function stripJsonFences(s: string): string {
|
||||
return s.replace(/^```json\s*/i, '')
|
||||
.replace(/^```\s*/i, '')
|
||||
.replace(/\s*```\s*$/, '')
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Handler
|
||||
*/
|
||||
export const POST: APIRoute = async ({ request }) => {
|
||||
try {
|
||||
const authResult = await withAPIAuth(request, 'ai');
|
||||
if (!authResult.authenticated) {
|
||||
return createAuthErrorResponse();
|
||||
}
|
||||
|
||||
const userId = authResult.userId;
|
||||
const auth = await withAPIAuth(request, 'ai');
|
||||
if (!auth.authenticated) return createAuthErrorResponse();
|
||||
const userId = auth.userId;
|
||||
|
||||
if (!checkRateLimit(userId)) {
|
||||
return apiError.rateLimit('Enhancement rate limit exceeded');
|
||||
@@ -144,79 +85,53 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
const body = await request.json();
|
||||
const { input } = body;
|
||||
|
||||
if (!input || typeof input !== 'string' || input.length < 40) {
|
||||
return apiError.badRequest('Input too short for enhancement (minimum 40 characters)');
|
||||
if (!input || typeof input !== 'string' || input.length < INPUT_MIN_CHARS) {
|
||||
return apiError.badRequest(`Input too short for enhancement (minimum ${INPUT_MIN_CHARS} characters)`);
|
||||
}
|
||||
|
||||
const sanitizedInput = sanitizeInput(input);
|
||||
if (sanitizedInput.length < 40) {
|
||||
if (sanitizedInput.length < INPUT_MIN_CHARS) {
|
||||
return apiError.badRequest('Input too short after sanitization');
|
||||
}
|
||||
|
||||
const systemPrompt = createEnhancementPrompt(sanitizedInput);
|
||||
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 4)}`;
|
||||
|
||||
const aiResponse = await enqueueApiCall(() => callAIService(systemPrompt), taskId);
|
||||
const taskId = `enhance_${userId}_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`;
|
||||
const questionsPrompt = getPrompt('enhancementQuestions', sanitizedInput);
|
||||
|
||||
if (!aiResponse.ok) {
|
||||
const errorText = await aiResponse.text();
|
||||
console.error('[ENHANCE API] AI enhancement error:', errorText, 'Status:', aiResponse.status);
|
||||
return apiServerError.unavailable('Enhancement service unavailable');
|
||||
}
|
||||
console.log(`[ENHANCE-API] Processing enhancement request for user: ${userId}`);
|
||||
|
||||
const aiData = await aiResponse.json();
|
||||
const aiContent = aiData.choices?.[0]?.message?.content;
|
||||
const aiResponse = await enqueueApiCall(
|
||||
() => aiService.callAI(questionsPrompt, { temperature: AI_TEMPERATURE }),
|
||||
taskId
|
||||
);
|
||||
|
||||
if (!aiContent) {
|
||||
if (!aiResponse?.content) {
|
||||
return apiServerError.unavailable('No enhancement response');
|
||||
}
|
||||
|
||||
let questions;
|
||||
try {
|
||||
const cleanedContent = aiContent
|
||||
.replace(/^```json\s*/i, '')
|
||||
.replace(/\s*```\s*$/, '')
|
||||
.trim();
|
||||
questions = JSON.parse(cleanedContent);
|
||||
|
||||
if (!Array.isArray(questions)) {
|
||||
throw new Error('Response is not an array');
|
||||
}
|
||||
|
||||
questions = questions
|
||||
.filter(q => typeof q === 'string' && q.length > 20 && q.length < 200)
|
||||
.filter(q => q.includes('?'))
|
||||
.filter(q => {
|
||||
const forensicsTerms = ['forensisch', 'log', 'dump', 'image', 'artefakt', 'evidence', 'incident', 'system', 'netzwerk', 'zeitraum', 'verfügbar'];
|
||||
const lowerQ = q.toLowerCase();
|
||||
return forensicsTerms.some(term => lowerQ.includes(term));
|
||||
})
|
||||
.map(q => q.trim())
|
||||
.slice(0, 3);
|
||||
|
||||
if (questions.length === 0) {
|
||||
questions = [];
|
||||
}
|
||||
let parsed: unknown = JSONParser.safeParseJSON(stripJsonFences(aiResponse.content), null);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Failed to parse enhancement response:', aiContent);
|
||||
questions = [];
|
||||
}
|
||||
let questions: string[] = Array.isArray(parsed) ? parsed : [];
|
||||
questions = questions
|
||||
.filter(q => typeof q === 'string')
|
||||
.map(q => q.trim())
|
||||
.filter(q => q.endsWith('?'))
|
||||
.filter(q => q.length >= Q_MIN_LEN && q.length <= Q_MAX_LEN)
|
||||
.slice(0, Q_MAX_COUNT);
|
||||
|
||||
console.log(`[ENHANCE API] User: ${userId}, Forensics Questions: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
||||
console.log(`[ENHANCE-API] User: ${userId}, Questions generated: ${questions.length}, Input length: ${sanitizedInput.length}`);
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
questions,
|
||||
taskId,
|
||||
inputComplete: questions.length === 0
|
||||
inputComplete: questions.length === 0
|
||||
}), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' }
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error('Enhancement error:', error);
|
||||
} catch (err) {
|
||||
console.error('[ENHANCE-API] Enhancement error:', err);
|
||||
return apiServerError.internal('Enhancement processing failed');
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
// src/pages/api/ai/query.ts
|
||||
|
||||
import type { APIRoute } from 'astro';
|
||||
import { withAPIAuth } from '../../../utils/auth.js';
|
||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||
@@ -21,15 +20,14 @@ const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '
|
||||
const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10);
|
||||
|
||||
function sanitizeInput(input: string): string {
|
||||
let sanitized = input
|
||||
return input
|
||||
.replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]')
|
||||
.replace(/\<\/?[^>]+(>|$)/g, '')
|
||||
.replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]')
|
||||
.replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]')
|
||||
.trim();
|
||||
|
||||
sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' ');
|
||||
return sanitized;
|
||||
.trim()
|
||||
.slice(0, 2000)
|
||||
.replace(/\s+/g, ' ');
|
||||
}
|
||||
|
||||
function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } {
|
||||
@@ -78,7 +76,7 @@ function incrementMicroTaskCount(userId: string, aiCallsMade: number): void {
|
||||
}
|
||||
}
|
||||
|
||||
function cleanupExpiredRateLimits() {
|
||||
function cleanupExpiredRateLimits(): void {
|
||||
const now = Date.now();
|
||||
const maxStoreSize = 1000;
|
||||
|
||||
@@ -118,51 +116,52 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
const body = await request.json();
|
||||
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
||||
|
||||
console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
||||
console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
|
||||
console.log(`[AI-API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
||||
console.log(`[AI-API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`);
|
||||
|
||||
if (!query || typeof query !== 'string') {
|
||||
console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
|
||||
console.log(`[AI-API] Invalid query for task ${clientTaskId}`);
|
||||
return apiError.badRequest('Query required');
|
||||
}
|
||||
|
||||
if (!['workflow', 'tool'].includes(mode)) {
|
||||
console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
||||
console.log(`[AI-API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
||||
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
||||
}
|
||||
|
||||
const sanitizedQuery = sanitizeInput(query);
|
||||
if (sanitizedQuery.includes('[FILTERED]')) {
|
||||
console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
|
||||
console.log(`[AI-API] Filtered input detected for task ${clientTaskId}`);
|
||||
return apiError.badRequest('Invalid input detected');
|
||||
}
|
||||
|
||||
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
||||
|
||||
console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
|
||||
console.log(`[AI-API] Enqueueing pipeline task ${taskId}`);
|
||||
|
||||
const result = await enqueueApiCall(() =>
|
||||
aiPipeline.processQuery(sanitizedQuery, mode)
|
||||
, taskId);
|
||||
|
||||
if (!result || !result.recommendation) {
|
||||
return apiServerError.unavailable('No response from micro-task AI pipeline');
|
||||
return apiServerError.unavailable('No response from AI pipeline');
|
||||
}
|
||||
|
||||
const stats = result.processingStats;
|
||||
const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed;
|
||||
incrementMicroTaskCount(userId, estimatedAICallsMade);
|
||||
|
||||
console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
|
||||
console.log(` - Mode: ${mode}`);
|
||||
console.log(` - User: ${userId}`);
|
||||
console.log(` - Query length: ${sanitizedQuery.length}`);
|
||||
console.log(` - Processing time: ${stats.processingTimeMs}ms`);
|
||||
console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`);
|
||||
console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`);
|
||||
console.log(` - Estimated AI calls: ${estimatedAICallsMade}`);
|
||||
console.log(` - Embeddings used: ${stats.embeddingsUsed}`);
|
||||
console.log(` - Final items: ${stats.finalSelectedItems}`);
|
||||
console.log(`[AI-API] Pipeline completed for ${taskId}:`, {
|
||||
mode,
|
||||
user: userId,
|
||||
queryLength: sanitizedQuery.length,
|
||||
processingTime: stats.processingTimeMs,
|
||||
microTasksCompleted: stats.microTasksCompleted,
|
||||
microTasksFailed: stats.microTasksFailed,
|
||||
estimatedAICalls: estimatedAICallsMade,
|
||||
embeddingsUsed: stats.embeddingsUsed,
|
||||
finalItems: stats.finalSelectedItems
|
||||
});
|
||||
|
||||
const currentLimit = rateLimitStore.get(userId);
|
||||
const remainingMicroTasks = currentLimit ?
|
||||
@@ -176,7 +175,7 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
query: sanitizedQuery,
|
||||
processingStats: {
|
||||
...result.processingStats,
|
||||
pipelineType: 'micro-task',
|
||||
pipelineType: 'refactored',
|
||||
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||
estimatedAICallsMade
|
||||
@@ -192,18 +191,16 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
console.error('[MICRO-TASK API] Pipeline error:', error);
|
||||
console.error('[AI-API] Pipeline error:', error);
|
||||
|
||||
if (error.message.includes('embeddings')) {
|
||||
return apiServerError.unavailable('Embeddings service error - using AI fallback');
|
||||
} else if (error.message.includes('micro-task')) {
|
||||
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed');
|
||||
} else if (error.message.includes('selector')) {
|
||||
return apiServerError.unavailable('AI selector service error');
|
||||
return apiServerError.unavailable('Embeddings service error');
|
||||
} else if (error.message.includes('AI')) {
|
||||
return apiServerError.unavailable('AI service error');
|
||||
} else if (error.message.includes('rate limit')) {
|
||||
return apiError.rateLimit('AI service rate limits exceeded during micro-task processing');
|
||||
return apiError.rateLimit('AI service rate limits exceeded');
|
||||
} else {
|
||||
return apiServerError.internal('Micro-task AI pipeline error');
|
||||
return apiServerError.internal('AI pipeline error');
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -1,5 +1,7 @@
|
||||
// src/pages/api/auth/login.ts
|
||||
import type { APIRoute } from 'astro';
|
||||
import { generateAuthUrl, generateState, logAuthEvent } from '../../../utils/auth.js';
|
||||
import { serialize } from 'cookie';
|
||||
|
||||
export const prerender = false;
|
||||
|
||||
@@ -8,14 +10,27 @@ export const GET: APIRoute = async ({ url, redirect }) => {
|
||||
const state = generateState();
|
||||
const authUrl = generateAuthUrl(state);
|
||||
|
||||
console.log('Generated auth URL:', authUrl);
|
||||
console.log('[AUTH] Generated auth URL:', authUrl);
|
||||
|
||||
const returnTo = url.searchParams.get('returnTo') || '/';
|
||||
|
||||
logAuthEvent('Login initiated', { returnTo, authUrl });
|
||||
|
||||
const stateData = JSON.stringify({ state, returnTo });
|
||||
const stateCookie = `auth_state=${encodeURIComponent(stateData)}; HttpOnly; SameSite=Lax; Path=/; Max-Age=600`;
|
||||
|
||||
const publicBaseUrl = process.env.PUBLIC_BASE_URL || '';
|
||||
const isProduction = process.env.NODE_ENV === 'production';
|
||||
const isSecure = publicBaseUrl.startsWith('https://') || isProduction;
|
||||
|
||||
const stateCookie = serialize('auth_state', stateData, {
|
||||
httpOnly: true,
|
||||
secure: isSecure,
|
||||
sameSite: 'lax',
|
||||
maxAge: 600, // 10 minutes
|
||||
path: '/'
|
||||
});
|
||||
|
||||
console.log('[AUTH] Setting auth state cookie:', stateCookie.substring(0, 50) + '...');
|
||||
|
||||
return new Response(null, {
|
||||
status: 302,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// src/pages/api/auth/process.ts (FIXED - Proper cookie handling)
|
||||
// src/pages/api/auth/process.ts
|
||||
import type { APIRoute } from 'astro';
|
||||
import {
|
||||
verifyAuthState,
|
||||
@@ -7,7 +7,7 @@ import {
|
||||
createSessionWithCookie,
|
||||
logAuthEvent
|
||||
} from '../../../utils/auth.js';
|
||||
import { apiError, apiSpecial, apiWithHeaders, handleAPIRequest } from '../../../utils/api.js';
|
||||
import { apiError, apiSpecial, handleAPIRequest } from '../../../utils/api.js';
|
||||
|
||||
export const prerender = false;
|
||||
|
||||
@@ -30,9 +30,15 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
|
||||
const stateVerification = verifyAuthState(request, state);
|
||||
if (!stateVerification.isValid || !stateVerification.stateData) {
|
||||
logAuthEvent('State verification failed', {
|
||||
error: stateVerification.error,
|
||||
hasStateData: !!stateVerification.stateData
|
||||
});
|
||||
return apiError.badRequest(stateVerification.error || 'Invalid state parameter');
|
||||
}
|
||||
|
||||
console.log('[AUTH] State verification successful, exchanging code for tokens');
|
||||
|
||||
const tokens = await exchangeCodeForTokens(code);
|
||||
const userInfo = await getUserInfo(tokens.access_token);
|
||||
|
||||
@@ -43,6 +49,12 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
email: sessionResult.userEmail
|
||||
});
|
||||
|
||||
const returnUrl = new URL(stateVerification.stateData.returnTo, request.url);
|
||||
returnUrl.searchParams.set('auth', 'success');
|
||||
const redirectUrl = returnUrl.toString();
|
||||
|
||||
console.log('[AUTH] Redirecting to:', redirectUrl);
|
||||
|
||||
const responseHeaders = new Headers();
|
||||
responseHeaders.set('Content-Type', 'application/json');
|
||||
|
||||
@@ -51,7 +63,7 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
|
||||
return new Response(JSON.stringify({
|
||||
success: true,
|
||||
redirectTo: stateVerification.stateData.returnTo
|
||||
redirectTo: redirectUrl
|
||||
}), {
|
||||
status: 200,
|
||||
headers: responseHeaders
|
||||
|
||||
@@ -9,16 +9,16 @@ export const GET: APIRoute = async ({ request }) => {
|
||||
return await handleAPIRequest(async () => {
|
||||
const contributionAuth = await withAPIAuth(request, 'contributions');
|
||||
const aiAuth = await withAPIAuth(request, 'ai');
|
||||
const gatedContentAuth = await withAPIAuth(request, 'gatedcontent'); // ADDED
|
||||
const gatedContentAuth = await withAPIAuth(request, 'gatedcontent');
|
||||
|
||||
return apiResponse.success({
|
||||
authenticated: contributionAuth.authenticated || aiAuth.authenticated || gatedContentAuth.authenticated,
|
||||
contributionAuthRequired: contributionAuth.authRequired,
|
||||
aiAuthRequired: aiAuth.authRequired,
|
||||
gatedContentAuthRequired: gatedContentAuth.authRequired, // ADDED
|
||||
gatedContentAuthRequired: gatedContentAuth.authRequired,
|
||||
contributionAuthenticated: contributionAuth.authenticated,
|
||||
aiAuthenticated: aiAuth.authenticated,
|
||||
gatedContentAuthenticated: gatedContentAuth.authenticated, // ADDED
|
||||
gatedContentAuthenticated: gatedContentAuth.authenticated,
|
||||
expires: contributionAuth.session?.exp ? new Date(contributionAuth.session.exp * 1000).toISOString() : null
|
||||
});
|
||||
}, 'Status check failed');
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// src/pages/api/contribute/knowledgebase.ts - SIMPLIFIED: Issues only, minimal validation
|
||||
// src/pages/api/contribute/knowledgebase.ts
|
||||
import type { APIRoute } from 'astro';
|
||||
import { withAPIAuth } from '../../../utils/auth.js';
|
||||
import { apiResponse, apiError, apiServerError, handleAPIRequest } from '../../../utils/api.js';
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// src/pages/api/contribute/tool.ts (UPDATED - Using consolidated API responses + related_software)
|
||||
// src/pages/api/contribute/tool.ts
|
||||
import type { APIRoute } from 'astro';
|
||||
import { withAPIAuth } from '../../../utils/auth.js';
|
||||
import { apiResponse, apiError, apiServerError, apiSpecial, handleAPIRequest } from '../../../utils/api.js';
|
||||
@@ -82,31 +82,27 @@ function sanitizeInput(obj: any): any {
|
||||
}
|
||||
|
||||
function preprocessFormData(body: any): any {
|
||||
// Handle comma-separated strings from autocomplete inputs
|
||||
if (body.tool) {
|
||||
// Handle tags
|
||||
if (typeof body.tool.tags === 'string') {
|
||||
body.tool.tags = body.tool.tags.split(',').map((t: string) => t.trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
// Handle related concepts
|
||||
if (body.tool.relatedConcepts) {
|
||||
if (typeof body.tool.relatedConcepts === 'string') {
|
||||
body.tool.related_concepts = body.tool.relatedConcepts.split(',').map((t: string) => t.trim()).filter(Boolean);
|
||||
} else {
|
||||
body.tool.related_concepts = body.tool.relatedConcepts;
|
||||
}
|
||||
delete body.tool.relatedConcepts; // Remove the original key
|
||||
delete body.tool.relatedConcepts;
|
||||
}
|
||||
|
||||
// Handle related software
|
||||
if (body.tool.relatedSoftware) {
|
||||
if (typeof body.tool.relatedSoftware === 'string') {
|
||||
body.tool.related_software = body.tool.relatedSoftware.split(',').map((t: string) => t.trim()).filter(Boolean);
|
||||
} else {
|
||||
body.tool.related_software = body.tool.relatedSoftware;
|
||||
}
|
||||
delete body.tool.relatedSoftware; // Remove the original key
|
||||
delete body.tool.relatedSoftware;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -142,14 +138,11 @@ async function validateToolData(tool: any, action: string): Promise<{ valid: boo
|
||||
}
|
||||
}
|
||||
|
||||
// Validate related items exist (optional validation - could be enhanced)
|
||||
if (tool.related_concepts && tool.related_concepts.length > 0) {
|
||||
// Could validate that referenced concepts actually exist
|
||||
console.log('[VALIDATION] Related concepts provided:', tool.related_concepts);
|
||||
}
|
||||
|
||||
if (tool.related_software && tool.related_software.length > 0) {
|
||||
// Could validate that referenced software actually exists
|
||||
console.log('[VALIDATION] Related software provided:', tool.related_software);
|
||||
}
|
||||
|
||||
@@ -187,7 +180,6 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
return apiSpecial.invalidJSON();
|
||||
}
|
||||
|
||||
// Preprocess form data to handle autocomplete inputs
|
||||
body = preprocessFormData(body);
|
||||
|
||||
const sanitizedBody = sanitizeInput(body);
|
||||
|
||||
@@ -35,16 +35,8 @@ export const POST: APIRoute = async ({ request }) => {
|
||||
);
|
||||
}
|
||||
|
||||
/* --- (rest of the handler unchanged) -------------------------- */
|
||||
const { embeddingsService } = await import('../../../utils/embeddings.js');
|
||||
|
||||
if (!embeddingsService.isEnabled()) {
|
||||
return new Response(
|
||||
JSON.stringify({ success: false, error: 'Semantic search not available' }),
|
||||
{ status: 400, headers: { 'Content-Type': 'application/json' } }
|
||||
);
|
||||
}
|
||||
|
||||
await embeddingsService.waitForInitialization();
|
||||
|
||||
const similarItems = await embeddingsService.findSimilar(
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
// src/pages/contribute/index.astro - Consolidated Auth
|
||||
// src/pages/contribute/index.astro
|
||||
import BaseLayout from '../../layouts/BaseLayout.astro';
|
||||
import { withAuth } from '../../utils/auth.js';
|
||||
|
||||
|
||||
@@ -23,7 +23,6 @@ const editToolName = Astro.url.searchParams.get('edit');
|
||||
const editTool = editToolName ? existingTools.find(tool => tool.name === editToolName) : null;
|
||||
const isEdit = !!editTool;
|
||||
|
||||
// Extract data for autocomplete
|
||||
const allTags = [...new Set(existingTools.flatMap(tool => tool.tags || []))].sort();
|
||||
const allSoftwareAndMethods = existingTools
|
||||
.filter(tool => tool.type === 'software' || tool.type === 'method')
|
||||
@@ -300,7 +299,6 @@ const allConcepts = existingTools
|
||||
</BaseLayout>
|
||||
|
||||
<script define:vars={{ isEdit, editTool, domains, phases, domainAgnosticSoftware, allTags, allSoftwareAndMethods, allConcepts }}>
|
||||
// Consolidated Autocomplete Functionality - inlined to avoid module loading issues
|
||||
class AutocompleteManager {
|
||||
constructor(inputElement, dataSource, options = {}) {
|
||||
this.input = inputElement;
|
||||
@@ -337,7 +335,6 @@ class AutocompleteManager {
|
||||
this.dropdown = document.createElement('div');
|
||||
this.dropdown.className = 'autocomplete-dropdown';
|
||||
|
||||
// Insert dropdown after input
|
||||
this.input.parentNode.style.position = 'relative';
|
||||
this.input.parentNode.insertBefore(this.dropdown, this.input.nextSibling);
|
||||
}
|
||||
@@ -358,7 +355,6 @@ class AutocompleteManager {
|
||||
});
|
||||
|
||||
this.input.addEventListener('blur', (e) => {
|
||||
// Delay to allow click events on dropdown items
|
||||
setTimeout(() => {
|
||||
if (!this.dropdown.contains(document.activeElement)) {
|
||||
this.hideDropdown();
|
||||
@@ -450,7 +446,6 @@ class AutocompleteManager {
|
||||
})
|
||||
.join('');
|
||||
|
||||
// Bind click events
|
||||
this.dropdown.querySelectorAll('.autocomplete-option').forEach((option, index) => {
|
||||
option.addEventListener('click', () => {
|
||||
this.selectItem(this.filteredData[index]);
|
||||
@@ -484,7 +479,6 @@ class AutocompleteManager {
|
||||
this.hideDropdown();
|
||||
}
|
||||
|
||||
// Trigger change event
|
||||
this.input.dispatchEvent(new CustomEvent('autocomplete:select', {
|
||||
detail: { item, text, selectedItems: Array.from(this.selectedItems) }
|
||||
}));
|
||||
@@ -510,7 +504,6 @@ class AutocompleteManager {
|
||||
`)
|
||||
.join('');
|
||||
|
||||
// Bind remove events
|
||||
this.selectedContainer.querySelectorAll('.autocomplete-remove').forEach(btn => {
|
||||
btn.addEventListener('click', (e) => {
|
||||
e.preventDefault();
|
||||
@@ -636,7 +629,6 @@ class ContributionForm {
|
||||
}
|
||||
|
||||
setupAutocomplete() {
|
||||
// Tags autocomplete
|
||||
if (this.elements.tagsInput && this.elements.tagsHidden) {
|
||||
const tagsManager = new AutocompleteManager(this.elements.tagsInput, allTags, {
|
||||
allowMultiple: true,
|
||||
@@ -644,7 +636,6 @@ class ContributionForm {
|
||||
placeholder: 'Beginne zu tippen, um Tags hinzuzufügen...'
|
||||
});
|
||||
|
||||
// Set initial values if editing
|
||||
if (this.editTool?.tags) {
|
||||
tagsManager.setSelectedItems(this.editTool.tags);
|
||||
}
|
||||
@@ -652,7 +643,6 @@ class ContributionForm {
|
||||
this.autocompleteManagers.set('tags', tagsManager);
|
||||
}
|
||||
|
||||
// Related concepts autocomplete
|
||||
if (this.elements.relatedConceptsInput && this.elements.relatedConceptsHidden) {
|
||||
const conceptsManager = new AutocompleteManager(this.elements.relatedConceptsInput, allConcepts, {
|
||||
allowMultiple: true,
|
||||
@@ -660,7 +650,6 @@ class ContributionForm {
|
||||
placeholder: 'Beginne zu tippen, um Konzepte zu finden...'
|
||||
});
|
||||
|
||||
// Set initial values if editing
|
||||
if (this.editTool?.related_concepts) {
|
||||
conceptsManager.setSelectedItems(this.editTool.related_concepts);
|
||||
}
|
||||
@@ -668,7 +657,6 @@ class ContributionForm {
|
||||
this.autocompleteManagers.set('relatedConcepts', conceptsManager);
|
||||
}
|
||||
|
||||
// Related software autocomplete
|
||||
if (this.elements.relatedSoftwareInput && this.elements.relatedSoftwareHidden) {
|
||||
const softwareManager = new AutocompleteManager(this.elements.relatedSoftwareInput, allSoftwareAndMethods, {
|
||||
allowMultiple: true,
|
||||
@@ -676,7 +664,6 @@ class ContributionForm {
|
||||
placeholder: 'Beginne zu tippen, um Software/Methoden zu finden...'
|
||||
});
|
||||
|
||||
// Set initial values if editing
|
||||
if (this.editTool?.related_software) {
|
||||
softwareManager.setSelectedItems(this.editTool.related_software);
|
||||
}
|
||||
@@ -684,7 +671,6 @@ class ContributionForm {
|
||||
this.autocompleteManagers.set('relatedSoftware', softwareManager);
|
||||
}
|
||||
|
||||
// Listen for autocomplete changes to update YAML preview
|
||||
Object.values(this.autocompleteManagers).forEach(manager => {
|
||||
if (manager.input) {
|
||||
manager.input.addEventListener('autocomplete:select', () => {
|
||||
@@ -726,14 +712,10 @@ class ContributionForm {
|
||||
updateFieldVisibility() {
|
||||
const type = this.elements.typeSelect.value;
|
||||
|
||||
// Only hide/show software-specific fields (platforms, license)
|
||||
// Relations should always be visible since all tool types can have relationships
|
||||
this.elements.softwareFields.style.display = type === 'software' ? 'block' : 'none';
|
||||
|
||||
// Always show relations - all tool types can have relationships
|
||||
this.elements.relationsFields.style.display = 'block';
|
||||
|
||||
// Only mark platform/license as required for software
|
||||
if (this.elements.platformsRequired) {
|
||||
this.elements.platformsRequired.style.display = type === 'software' ? 'inline' : 'none';
|
||||
}
|
||||
@@ -741,7 +723,6 @@ class ContributionForm {
|
||||
this.elements.licenseRequired.style.display = type === 'software' ? 'inline' : 'none';
|
||||
}
|
||||
|
||||
// Always show both relation sections - let users decide what's relevant
|
||||
const conceptsSection = document.getElementById('related-concepts-section');
|
||||
const softwareSection = document.getElementById('related-software-section');
|
||||
if (conceptsSection) conceptsSection.style.display = 'block';
|
||||
@@ -806,19 +787,16 @@ class ContributionForm {
|
||||
tool.knowledgebase = true;
|
||||
}
|
||||
|
||||
// Handle tags from autocomplete
|
||||
const tagsValue = this.elements.tagsHidden?.value || '';
|
||||
if (tagsValue) {
|
||||
tool.tags = tagsValue.split(',').map(t => t.trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
// Handle related concepts from autocomplete
|
||||
const relatedConceptsValue = this.elements.relatedConceptsHidden?.value || '';
|
||||
if (relatedConceptsValue) {
|
||||
tool.related_concepts = relatedConceptsValue.split(',').map(t => t.trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
// Handle related software from autocomplete
|
||||
const relatedSoftwareValue = this.elements.relatedSoftwareHidden?.value || '';
|
||||
if (relatedSoftwareValue) {
|
||||
tool.related_software = relatedSoftwareValue.split(',').map(t => t.trim()).filter(Boolean);
|
||||
@@ -983,19 +961,16 @@ class ContributionForm {
|
||||
}
|
||||
};
|
||||
|
||||
// Handle tags from autocomplete
|
||||
const tagsValue = this.elements.tagsHidden?.value || '';
|
||||
if (tagsValue) {
|
||||
submission.tool.tags = tagsValue.split(',').map(t => t.trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
// Handle related concepts from autocomplete
|
||||
const relatedConceptsValue = this.elements.relatedConceptsHidden?.value || '';
|
||||
if (relatedConceptsValue) {
|
||||
submission.tool.related_concepts = relatedConceptsValue.split(',').map(t => t.trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
// Handle related software from autocomplete
|
||||
const relatedSoftwareValue = this.elements.relatedSoftwareHidden?.value || '';
|
||||
if (relatedSoftwareValue) {
|
||||
submission.tool.related_software = relatedSoftwareValue.split(',').map(t => t.trim()).filter(Boolean);
|
||||
@@ -1072,7 +1047,6 @@ class ContributionForm {
|
||||
}
|
||||
|
||||
destroy() {
|
||||
// Clean up autocomplete managers
|
||||
this.autocompleteManagers.forEach(manager => {
|
||||
manager.destroy();
|
||||
});
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
---
|
||||
//src/pages/index.astro
|
||||
import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
import ToolCard from '../components/ToolCard.astro';
|
||||
import ToolFilters from '../components/ToolFilters.astro';
|
||||
@@ -509,9 +510,7 @@ if (aiAuthRequired) {
|
||||
}, 500);
|
||||
};
|
||||
|
||||
function handleSharedURL() {
|
||||
console.log('[SHARE] Handling shared URL:', window.location.search);
|
||||
|
||||
function handleSharedURL() {
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
const toolParam = urlParams.get('tool');
|
||||
const viewParam = urlParams.get('view');
|
||||
@@ -686,8 +685,6 @@ if (aiAuthRequired) {
|
||||
window.switchToAIView = () => switchToView('ai');
|
||||
window.switchToView = switchToView;
|
||||
|
||||
// CRITICAL: Handle shared URLs AFTER everything is set up
|
||||
// Increased timeout to ensure all components and utility functions are loaded
|
||||
setTimeout(() => {
|
||||
handleSharedURL();
|
||||
}, 1000);
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
---
|
||||
//src/pages/knowledgebase.astro
|
||||
import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
import { getCollection } from 'astro:content';
|
||||
import { getToolsData } from '../utils/dataService.js';
|
||||
@@ -10,7 +11,6 @@ const allKnowledgebaseEntries = await getCollection('knowledgebase', (entry) =>
|
||||
return entry.data.published !== false;
|
||||
});
|
||||
|
||||
// Check if gated content authentication is enabled globally
|
||||
const gatedContentAuthEnabled = isGatedContentAuthRequired();
|
||||
|
||||
const knowledgebaseEntries = allKnowledgebaseEntries.map((entry) => {
|
||||
@@ -27,8 +27,7 @@ const knowledgebaseEntries = allKnowledgebaseEntries.map((entry) => {
|
||||
difficulty: entry.data.difficulty,
|
||||
categories: entry.data.categories || [],
|
||||
tags: entry.data.tags || [],
|
||||
gated_content: entry.data.gated_content || false, // NEW: Include gated content flag
|
||||
|
||||
gated_content: entry.data.gated_content || false,
|
||||
tool_name: entry.data.tool_name,
|
||||
related_tools: entry.data.related_tools || [],
|
||||
associatedTool,
|
||||
@@ -45,7 +44,6 @@ const knowledgebaseEntries = allKnowledgebaseEntries.map((entry) => {
|
||||
|
||||
knowledgebaseEntries.sort((a: any, b: any) => a.title.localeCompare(b.title));
|
||||
|
||||
// Count gated vs public articles for statistics
|
||||
const gatedCount = knowledgebaseEntries.filter(entry => entry.gated_content).length;
|
||||
const publicCount = knowledgebaseEntries.length - gatedCount;
|
||||
---
|
||||
|
||||
@@ -21,7 +21,6 @@ export async function getStaticPaths() {
|
||||
|
||||
const { entry }: { entry: any } = Astro.props;
|
||||
|
||||
// Check if this article is gated and if gated content auth is required globally
|
||||
const isGatedContent = entry.data.gated_content === true;
|
||||
const gatedContentAuthRequired = isGatedContentAuthRequired();
|
||||
const requiresAuth = isGatedContent && gatedContentAuthRequired;
|
||||
@@ -62,24 +61,28 @@ const currentUrl = Astro.url.href;
|
||||
<BaseLayout title={entry.data.title} description={entry.data.description}>
|
||||
{requiresAuth && (
|
||||
<script define:vars={{ requiresAuth, articleTitle: entry.data.title }}>
|
||||
// Client-side authentication check for gated content
|
||||
document.addEventListener('DOMContentLoaded', async () => {
|
||||
if (!requiresAuth) return;
|
||||
|
||||
console.log('[GATED CONTENT] Checking client-side auth for: ' + articleTitle);
|
||||
|
||||
// Hide content immediately while checking auth
|
||||
const urlParams = new URLSearchParams(window.location.search);
|
||||
const authSuccess = urlParams.get('auth') === 'success';
|
||||
|
||||
const contentArea = document.querySelector('.article-content');
|
||||
const sidebar = document.querySelector('.article-sidebar');
|
||||
|
||||
|
||||
if (contentArea) {
|
||||
contentArea.style.display = 'none';
|
||||
}
|
||||
// DON'T hide the sidebar container - just prevent TOC generation
|
||||
//if (sidebar) {
|
||||
//sidebar.innerHTML = ''; // Clear any content instead of hiding
|
||||
//}
|
||||
|
||||
if (authSuccess) {
|
||||
console.log('[GATED CONTENT] Auth success detected, waiting for session...');
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
|
||||
const cleanUrl = window.location.protocol + "//" + window.location.host + window.location.pathname;
|
||||
window.history.replaceState({}, document.title, cleanUrl);
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch('/api/auth/status');
|
||||
@@ -93,7 +96,6 @@ const currentUrl = Astro.url.href;
|
||||
if (authRequired && !isAuthenticated) {
|
||||
console.log('[GATED CONTENT] Access denied - showing auth required message: ' + articleTitle);
|
||||
|
||||
// Show authentication required message (no auto-redirect)
|
||||
if (contentArea) {
|
||||
const loginUrl = '/api/auth/login?returnTo=' + encodeURIComponent(window.location.href);
|
||||
contentArea.innerHTML = [
|
||||
@@ -121,11 +123,9 @@ const currentUrl = Astro.url.href;
|
||||
}
|
||||
} else {
|
||||
console.log('[GATED CONTENT] Access granted for: ' + articleTitle);
|
||||
// Show content for authenticated users
|
||||
if (contentArea) {
|
||||
contentArea.style.display = 'block';
|
||||
}
|
||||
// Let TOC generate normally for authenticated users
|
||||
setTimeout(() => {
|
||||
if (typeof generateTOCContent === 'function') {
|
||||
generateTOCContent();
|
||||
@@ -134,7 +134,6 @@ const currentUrl = Astro.url.href;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[GATED CONTENT] Auth check failed:', error);
|
||||
// On error, show auth required message
|
||||
if (requiresAuth && contentArea) {
|
||||
const loginUrl = '/api/auth/login?returnTo=' + encodeURIComponent(window.location.href);
|
||||
contentArea.innerHTML = [
|
||||
@@ -402,29 +401,10 @@ const currentUrl = Astro.url.href;
|
||||
}
|
||||
|
||||
function generateSidebarTOC() {
|
||||
// NEW: Don't generate TOC for gated content that requires auth
|
||||
if (requiresAuth) {
|
||||
fetch('/api/auth/status')
|
||||
.then(response => response.json())
|
||||
.then(authStatus => {
|
||||
const isAuthenticated = authStatus.gatedContentAuthenticated || false;
|
||||
const authRequired = authStatus.gatedContentAuthRequired || false;
|
||||
|
||||
// Only generate TOC if user is authenticated for gated content
|
||||
if (authRequired && !isAuthenticated) {
|
||||
return; // Don't generate TOC
|
||||
} else {
|
||||
generateTOCContent(); // Generate TOC for authenticated users
|
||||
}
|
||||
})
|
||||
.catch(() => {
|
||||
// On error, don't generate TOC for gated content
|
||||
return;
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// For non-gated content, generate TOC normally
|
||||
generateTOCContent();
|
||||
}
|
||||
|
||||
@@ -530,17 +510,14 @@ const currentUrl = Astro.url.href;
|
||||
pre.dataset.copyEnhanced = 'true';
|
||||
pre.style.position ||= 'relative';
|
||||
|
||||
// Try to find an existing copy button we can reuse
|
||||
let btn =
|
||||
pre.querySelector('.copy-btn') || // our class
|
||||
pre.querySelector('.copy-btn') ||
|
||||
pre.querySelector('.btn-copy, .copy-button, .code-copy, .copy-code, button[aria-label*="copy" i]');
|
||||
|
||||
// If there is an "old" button that is NOT ours, prefer to reuse it by giving it our class.
|
||||
if (btn && !btn.classList.contains('copy-btn')) {
|
||||
btn.classList.add('copy-btn');
|
||||
}
|
||||
|
||||
// If no button at all, create one
|
||||
if (!btn) {
|
||||
btn = document.createElement('button');
|
||||
btn.type = 'button';
|
||||
@@ -555,7 +532,6 @@ const currentUrl = Astro.url.href;
|
||||
pre.appendChild(btn);
|
||||
}
|
||||
|
||||
// If there is a SECOND old button lingering (top-left in your case), hide it
|
||||
const possibleOldButtons = pre.querySelectorAll(
|
||||
'.btn-copy, .copy-button, .code-copy, .copy-code, button[aria-label*="copy" i]'
|
||||
);
|
||||
@@ -563,7 +539,6 @@ const currentUrl = Astro.url.href;
|
||||
if (b !== btn) b.style.display = 'none';
|
||||
});
|
||||
|
||||
// Success pill
|
||||
if (!pre.querySelector('.copied-pill')) {
|
||||
const pill = document.createElement('div');
|
||||
pill.className = 'copied-pill';
|
||||
@@ -571,7 +546,6 @@ const currentUrl = Astro.url.href;
|
||||
pre.appendChild(pill);
|
||||
}
|
||||
|
||||
// Screen reader live region
|
||||
if (!pre.querySelector('.sr-live')) {
|
||||
const live = document.createElement('div');
|
||||
live.className = 'sr-live';
|
||||
@@ -614,12 +588,11 @@ const currentUrl = Astro.url.href;
|
||||
});
|
||||
}
|
||||
|
||||
// keep your existing DOMContentLoaded; just ensure this is called
|
||||
window.generateTOCContent = generateTOCContent;
|
||||
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
// existing:
|
||||
calculateReadingTime();
|
||||
generateSidebarTOC();
|
||||
// new/updated:
|
||||
enhanceCodeCopy();
|
||||
});
|
||||
</script>
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
---
|
||||
//src/pages/status.astro
|
||||
import BaseLayout from '../layouts/BaseLayout.astro';
|
||||
import { getToolsData } from '../utils/dataService.js';
|
||||
|
||||
|
||||
@@ -675,6 +675,7 @@ input[type="checkbox"] {
|
||||
border-radius: 0.25rem;
|
||||
font-size: 0.75rem;
|
||||
margin: 0.125rem;
|
||||
max-height: 1.5rem;
|
||||
}
|
||||
|
||||
/* ===================================================================
|
||||
@@ -1806,11 +1807,44 @@ input[type="checkbox"] {
|
||||
.ai-textarea-section {
|
||||
flex: 1;
|
||||
min-width: 0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.ai-textarea-section textarea {
|
||||
width: 100%;
|
||||
height: 180px;
|
||||
min-height: 180px;
|
||||
max-height: 300px;
|
||||
resize: vertical;
|
||||
font-size: 0.9375rem;
|
||||
line-height: 1.5;
|
||||
padding: 0.75rem;
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: 0.375rem;
|
||||
background-color: var(--color-bg);
|
||||
color: var(--color-text);
|
||||
transition: var(--transition-fast);
|
||||
flex: 1;
|
||||
}
|
||||
|
||||
.confidence-tooltip {
|
||||
background: var(--color-bg) !important;
|
||||
border: 2px solid var(--color-border) !important;
|
||||
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15) !important;
|
||||
z-index: 2000 !important;
|
||||
}
|
||||
|
||||
.ai-textarea-section textarea:focus {
|
||||
outline: none;
|
||||
border-color: var(--color-primary);
|
||||
box-shadow: 0 0 0 3px rgb(37 99 235 / 10%);
|
||||
}
|
||||
|
||||
.ai-suggestions-section {
|
||||
flex: 0 0 320px;
|
||||
min-height: 120px;
|
||||
min-height: 180px;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.ai-input-container textarea {
|
||||
@@ -1887,7 +1921,6 @@ input[type="checkbox"] {
|
||||
box-shadow: 0 2px 4px 0 rgb(255 255 255 / 10%);
|
||||
}
|
||||
|
||||
/* Enhanced contextual analysis cards */
|
||||
.contextual-analysis-card {
|
||||
margin-bottom: 2rem;
|
||||
border-left: 4px solid;
|
||||
@@ -1984,7 +2017,6 @@ input[type="checkbox"] {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
/* Enhanced queue status for micro-tasks */
|
||||
.queue-status-card.micro-task-mode {
|
||||
border-left: 4px solid var(--color-primary);
|
||||
}
|
||||
@@ -1997,7 +2029,6 @@ input[type="checkbox"] {
|
||||
border-radius: 0.5rem 0.5rem 0 0;
|
||||
}
|
||||
|
||||
/* Mobile responsive adjustments */
|
||||
@media (max-width: 768px) {
|
||||
.micro-task-steps {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
@@ -2189,12 +2220,20 @@ input[type="checkbox"] {
|
||||
border-radius: 1rem;
|
||||
font-weight: 500;
|
||||
text-transform: uppercase;
|
||||
position: relative;
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
.tool-rec-priority.high { background-color: var(--color-error); color: white; }
|
||||
.tool-rec-priority.medium { background-color: var(--color-warning); color: white; }
|
||||
.tool-rec-priority.low { background-color: var(--color-accent); color: white; }
|
||||
|
||||
[data-theme="dark"] .confidence-tooltip {
|
||||
background: var(--color-bg-secondary) !important;
|
||||
border-color: var(--color-border) !important;
|
||||
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.4) !important;
|
||||
}
|
||||
|
||||
.tool-rec-justification {
|
||||
font-size: 0.875rem;
|
||||
line-height: 1.5;
|
||||
@@ -2613,7 +2652,8 @@ footer {
|
||||
================================================================= */
|
||||
|
||||
.smart-prompting-container {
|
||||
height: 100%;
|
||||
height: auto;
|
||||
min-height: 180px;
|
||||
animation: smartPromptSlideIn 0.4s cubic-bezier(0.4, 0, 0.2, 1);
|
||||
}
|
||||
|
||||
@@ -2622,8 +2662,10 @@ footer {
|
||||
border: 1px solid var(--color-border);
|
||||
border-radius: 0.5rem;
|
||||
padding: 1rem;
|
||||
height: 100%;
|
||||
min-height: 120px;
|
||||
height: auto;
|
||||
min-height: 180px;
|
||||
max-height: 400px;
|
||||
overflow-y: auto;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
opacity: 0.85;
|
||||
@@ -2663,8 +2705,8 @@ footer {
|
||||
|
||||
/* Smart Prompting Hint */
|
||||
.smart-prompting-hint {
|
||||
height: 100%;
|
||||
min-height: 120px;
|
||||
height: 180px;
|
||||
min-height: 180px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
animation: hintFadeIn 0.3s ease-in-out;
|
||||
@@ -3378,8 +3420,8 @@ footer {
|
||||
|
||||
.ai-suggestions-section {
|
||||
flex: 0 0 auto;
|
||||
width: 100%;
|
||||
max-width: none;
|
||||
height: auto;
|
||||
min-height: 120px;
|
||||
}
|
||||
|
||||
.ai-textarea-section {
|
||||
@@ -3389,6 +3431,11 @@ footer {
|
||||
min-height: 100px;
|
||||
}
|
||||
|
||||
.ai-textarea-section textarea {
|
||||
height: 150px;
|
||||
min-height: 150px;
|
||||
}
|
||||
|
||||
.ai-spotlight-content {
|
||||
flex-direction: column;
|
||||
gap: 0.75rem;
|
||||
|
||||
@@ -688,3 +688,107 @@
|
||||
/* Expand content */
|
||||
.article-main { max-width: 100% !important; }
|
||||
}
|
||||
|
||||
|
||||
/* ==========================================================================
|
||||
VIDEO EMBEDDING - ULTRA SIMPLE: Just full width, natural aspect ratios
|
||||
========================================================================== */
|
||||
|
||||
/* Video Container - just a styled wrapper */
|
||||
:where(.markdown-content) .video-container {
|
||||
width: 100%;
|
||||
margin: 2rem 0;
|
||||
border-radius: var(--radius-lg, 0.75rem);
|
||||
overflow: hidden;
|
||||
background-color: var(--color-bg-tertiary, #000);
|
||||
box-shadow: var(--shadow-lg, 0 12px 30px rgba(0,0,0,0.16));
|
||||
}
|
||||
|
||||
/* Video Element - full width, natural aspect ratio */
|
||||
:where(.markdown-content) .video-container video {
|
||||
width: 100%;
|
||||
height: auto;
|
||||
display: block;
|
||||
background-color: #000;
|
||||
border: none;
|
||||
outline: none;
|
||||
}
|
||||
|
||||
/* YouTube iframe - full width, preserve embedded dimensions ratio */
|
||||
:where(.markdown-content) .video-container iframe {
|
||||
width: 100%;
|
||||
height: auto;
|
||||
aspect-ratio: 16 / 9; /* Only for iframes since they don't have intrinsic ratio */
|
||||
display: block;
|
||||
border: none;
|
||||
outline: none;
|
||||
}
|
||||
|
||||
/* Focus states for accessibility */
|
||||
:where(.markdown-content) .video-container video:focus,
|
||||
:where(.markdown-content) .video-container iframe:focus {
|
||||
outline: 3px solid var(--color-primary);
|
||||
outline-offset: 3px;
|
||||
}
|
||||
|
||||
/* Video Metadata */
|
||||
:where(.markdown-content) .video-metadata {
|
||||
background-color: var(--color-bg-secondary);
|
||||
border: 1px solid var(--color-border);
|
||||
border-top: none;
|
||||
padding: 1rem 1.5rem;
|
||||
font-size: 0.875rem;
|
||||
color: var(--color-text-secondary);
|
||||
border-radius: 0 0 var(--radius-lg, 0.75rem) var(--radius-lg, 0.75rem);
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-metadata .video-title {
|
||||
font-weight: 600;
|
||||
color: var(--color-text);
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
/* Responsive Design */
|
||||
@media (max-width: 768px) {
|
||||
:where(.markdown-content) .video-container {
|
||||
margin: 1.5rem -0.5rem;
|
||||
border-radius: 0;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-metadata {
|
||||
padding: 0.75rem 1rem;
|
||||
font-size: 0.8125rem;
|
||||
border-radius: 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Dark Theme */
|
||||
[data-theme="dark"] :where(.markdown-content) .video-container {
|
||||
box-shadow: 0 12px 30px rgba(0,0,0,0.4);
|
||||
}
|
||||
|
||||
[data-theme="dark"] :where(.markdown-content) .video-metadata {
|
||||
background-color: var(--color-bg-tertiary);
|
||||
border-color: color-mix(in srgb, var(--color-border) 60%, transparent);
|
||||
}
|
||||
|
||||
/* Print Media */
|
||||
@media print {
|
||||
:where(.markdown-content) .video-container {
|
||||
border: 2px solid #ddd;
|
||||
background-color: #f5f5f5;
|
||||
padding: 2rem;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container video,
|
||||
:where(.markdown-content) .video-container iframe {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
:where(.markdown-content) .video-container::before {
|
||||
content: "📹 Video: " attr(data-video-title, "Embedded Video");
|
||||
display: block;
|
||||
font-weight: 600;
|
||||
}
|
||||
}
|
||||
@@ -1,62 +1,72 @@
|
||||
/* PALETTE OPTION 1: BLUEPRINT & AMBER */
|
||||
:root {
|
||||
/* Light Theme Colors */
|
||||
--color-bg: #fff;
|
||||
--color-bg-secondary: #f8fafc;
|
||||
--color-bg-tertiary: #e2e8f0;
|
||||
--color-text: #1e293b;
|
||||
--color-text-secondary: #64748b;
|
||||
--color-border: #cbd5e1;
|
||||
--color-primary: #2563eb;
|
||||
--color-primary-hover: #1d4ed8;
|
||||
--color-accent: #059669;
|
||||
--color-accent-hover: #047857;
|
||||
/* Light Theme */
|
||||
--color-bg: #ffffff;
|
||||
--color-bg-secondary: #f1f5f9; /* Slate 100 */
|
||||
--color-bg-tertiary: #e2e8f0; /* Slate 200 */
|
||||
--color-text: #0f172a; /* Slate 900 */
|
||||
--color-text-secondary: #475569; /* Slate 600 */
|
||||
--color-border: #cbd5e1; /* Slate 300 */
|
||||
|
||||
--color-primary: #334155; /* Slate 700 - A strong, serious primary */
|
||||
--color-primary-hover: #1e293b; /* Slate 800 */
|
||||
|
||||
--color-accent: #b45309; /* A sharp, focused amber for highlights */
|
||||
--color-accent-hover: #92400e;
|
||||
|
||||
--color-warning: #d97706;
|
||||
--color-error: #dc2626;
|
||||
|
||||
/* Enhanced card type colors */
|
||||
--color-hosted: #7c3aed;
|
||||
--color-hosted-bg: #f3f0ff;
|
||||
--color-oss: #059669;
|
||||
--color-oss-bg: #ecfdf5;
|
||||
--color-method: #0891b2;
|
||||
--color-method-bg: #f0f9ff;
|
||||
--color-concept: #ea580c;
|
||||
--color-error: #be123c; /* A deeper, more serious red */
|
||||
|
||||
/* Card/Tag Category Colors */
|
||||
--color-hosted: #4f46e5; /* Indigo */
|
||||
--color-hosted-bg: #eef2ff;
|
||||
--color-oss: #0d9488; /* Teal */
|
||||
--color-oss-bg: #f0fdfa;
|
||||
--color-method: #0891b2; /* Cyan */
|
||||
--color-method-bg: #ecfeff;
|
||||
--color-concept: #c2410c; /* Orange */
|
||||
--color-concept-bg: #fff7ed;
|
||||
|
||||
/* Shadows */
|
||||
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 5%);
|
||||
--shadow-md: 0 4px 6px -1px rgb(0 0 0 / 10%);
|
||||
--shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 10%);
|
||||
|
||||
|
||||
/* Shadows (Crisper) */
|
||||
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 6%);
|
||||
--shadow-md: 0 3px 5px -1px rgb(0 0 0 / 8%);
|
||||
--shadow-lg: 0 8px 12px -3px rgb(0 0 0 / 10%);
|
||||
|
||||
/* Transitions */
|
||||
--transition-fast: all 0.2s ease;
|
||||
--transition-medium: all 0.3s ease;
|
||||
}
|
||||
|
||||
[data-theme="dark"] {
|
||||
--color-bg: #0f172a;
|
||||
--color-bg-secondary: #1e293b;
|
||||
--color-bg-tertiary: #334155;
|
||||
--color-text: #f1f5f9;
|
||||
--color-text-secondary: #94a3b8;
|
||||
--color-border: #475569;
|
||||
--color-primary: #3b82f6;
|
||||
--color-primary-hover: #60a5fa;
|
||||
--color-accent: #10b981;
|
||||
--color-accent-hover: #34d399;
|
||||
/* Dark Theme */
|
||||
--color-bg: #0f172a; /* Slate 900 */
|
||||
--color-bg-secondary: #1e293b; /* Slate 800 */
|
||||
--color-bg-tertiary: #334155; /* Slate 700 */
|
||||
--color-text: #f1f5f9; /* Slate 100 */
|
||||
--color-text-secondary: #94a3b8; /* Slate 400 */
|
||||
--color-border: #475569; /* Slate 600 */
|
||||
|
||||
--color-primary: #64748b; /* Slate 500 */
|
||||
--color-primary-hover: #94a3b8; /* Slate 400 */
|
||||
|
||||
--color-accent: #f59e0b; /* A brighter amber for dark mode contrast */
|
||||
--color-accent-hover: #fbbf24;
|
||||
|
||||
--color-warning: #f59e0b;
|
||||
--color-error: #f87171;
|
||||
|
||||
--color-hosted: #a855f7;
|
||||
--color-hosted-bg: #2e1065;
|
||||
--color-oss: #10b981;
|
||||
--color-oss-bg: #064e3b;
|
||||
--color-method: #0891b2;
|
||||
--color-error: #f43f5e;
|
||||
|
||||
/* Card/Tag Category Colors */
|
||||
--color-hosted: #818cf8; /* Indigo */
|
||||
--color-hosted-bg: #3730a3;
|
||||
--color-oss: #2dd4bf; /* Teal */
|
||||
--color-oss-bg: #115e59;
|
||||
--color-method: #22d3ee; /* Cyan */
|
||||
--color-method-bg: #164e63;
|
||||
--color-concept: #f97316;
|
||||
--color-concept: #fb923c; /* Orange */
|
||||
--color-concept-bg: #7c2d12;
|
||||
|
||||
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 30%);
|
||||
--shadow-md: 0 4px 6px -1px rgb(0 0 0 / 40%);
|
||||
--shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 50%);
|
||||
|
||||
/* Shadows (Subtler for dark mode) */
|
||||
--shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 20%);
|
||||
--shadow-md: 0 4px 6px -1px rgb(0 0 0 / 30%);
|
||||
--shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 40%);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
137
src/utils/aiService.ts
Normal file
137
src/utils/aiService.ts
Normal file
@@ -0,0 +1,137 @@
|
||||
// src/utils/aiService.ts
|
||||
import 'dotenv/config';
|
||||
|
||||
export interface AIServiceConfig {
|
||||
endpoint: string;
|
||||
apiKey: string;
|
||||
model: string;
|
||||
}
|
||||
|
||||
export interface AICallOptions {
|
||||
temperature?: number;
|
||||
timeout?: number;
|
||||
}
|
||||
|
||||
export interface AIResponse {
|
||||
content: string;
|
||||
usage?: {
|
||||
promptTokens: number;
|
||||
completionTokens: number;
|
||||
totalTokens: number;
|
||||
};
|
||||
}
|
||||
|
||||
class AIService {
|
||||
private config: AIServiceConfig;
|
||||
private defaultOptions: AICallOptions;
|
||||
|
||||
constructor() {
|
||||
this.config = {
|
||||
endpoint: this.getRequiredEnv('AI_ANALYZER_ENDPOINT'),
|
||||
apiKey: this.getRequiredEnv('AI_ANALYZER_API_KEY'),
|
||||
model: this.getRequiredEnv('AI_ANALYZER_MODEL')
|
||||
};
|
||||
|
||||
this.defaultOptions = {
|
||||
temperature: 0.3,
|
||||
timeout: 60000
|
||||
};
|
||||
|
||||
console.log('[AI-SERVICE] Initialized with model:', this.config.model);
|
||||
}
|
||||
|
||||
private getRequiredEnv(key: string): string {
|
||||
const value = process.env[key];
|
||||
if (!value) {
|
||||
throw new Error(`Missing required environment variable: ${key}`);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
async callAI(prompt: string, options: AICallOptions = {}): Promise<AIResponse> {
|
||||
const mergedOptions = { ...this.defaultOptions, ...options };
|
||||
|
||||
console.log('[AI-SERVICE] Making API call:', {
|
||||
promptLength: prompt.length,
|
||||
temperature: mergedOptions.temperature
|
||||
});
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
|
||||
if (this.config.apiKey) {
|
||||
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
|
||||
}
|
||||
|
||||
const requestBody = {
|
||||
model: this.config.model,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
temperature: mergedOptions.temperature
|
||||
};
|
||||
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), mergedOptions.timeout);
|
||||
|
||||
const response = await fetch(`${this.config.endpoint}/v1/chat/completions`, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(requestBody),
|
||||
signal: controller.signal
|
||||
});
|
||||
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
if (!response.ok) {
|
||||
const errorText = await response.text();
|
||||
console.error('[AI-SERVICE] API Error:', response.status, errorText);
|
||||
throw new Error(`AI API error: ${response.status} - ${errorText}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
const content = data.choices?.[0]?.message?.content;
|
||||
|
||||
if (!content) {
|
||||
console.error('[AI-SERVICE] No response content from AI model');
|
||||
throw new Error('No response from AI model');
|
||||
}
|
||||
|
||||
console.log('[AI-SERVICE] API call successful:', {
|
||||
responseLength: content.length,
|
||||
usage: data.usage
|
||||
});
|
||||
|
||||
return {
|
||||
content: content.trim(),
|
||||
usage: data.usage
|
||||
};
|
||||
|
||||
} catch (error) {
|
||||
if (error.name === 'AbortError') {
|
||||
console.error('[AI-SERVICE] Request timeout');
|
||||
throw new Error('AI request timeout');
|
||||
}
|
||||
|
||||
console.error('[AI-SERVICE] API call failed:', error.message);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async callMicroTaskAI(prompt: string): Promise<AIResponse> {
|
||||
return this.callAI(prompt, {
|
||||
temperature: 0.3,
|
||||
timeout: 30000
|
||||
});
|
||||
}
|
||||
|
||||
estimateTokens(text: string): number {
|
||||
return Math.ceil(text.length / 4);
|
||||
}
|
||||
|
||||
getConfig(): AIServiceConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
}
|
||||
|
||||
export const aiService = new AIService();
|
||||
@@ -83,26 +83,21 @@ export const apiServerError = {
|
||||
};
|
||||
|
||||
export const apiSpecial = {
|
||||
// JSON parsing error
|
||||
invalidJSON: (): Response =>
|
||||
apiError.badRequest('Invalid JSON in request body'),
|
||||
|
||||
// Missing required fields
|
||||
missingRequired: (fields: string[]): Response =>
|
||||
apiError.badRequest(`Missing required fields: ${fields.join(', ')}`),
|
||||
|
||||
// Empty request body
|
||||
emptyBody: (): Response =>
|
||||
apiError.badRequest('Request body cannot be empty'),
|
||||
|
||||
// File upload responses
|
||||
uploadSuccess: (data: { url: string; filename: string; size: number; storage: string }): Response =>
|
||||
apiResponse.created(data),
|
||||
|
||||
uploadFailed: (error: string): Response =>
|
||||
apiServerError.internal(`Upload failed: ${error}`),
|
||||
|
||||
// Contribution responses
|
||||
contributionSuccess: (data: { prUrl?: string; branchName?: string; message: string }): Response =>
|
||||
apiResponse.created({ success: true, ...data }),
|
||||
|
||||
@@ -114,7 +109,6 @@ export const apiWithHeaders = {
|
||||
successWithHeaders: (data: any, headers: Record<string, string>): Response =>
|
||||
createAPIResponse(data, 200, headers),
|
||||
|
||||
// Redirect response
|
||||
redirect: (location: string, temporary: boolean = true): Response =>
|
||||
new Response(null, {
|
||||
status: temporary ? 302 : 301,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +1,4 @@
|
||||
// src/utils/auth.js (ENHANCED - Added gated content support)
|
||||
// src/utils/auth.js
|
||||
import type { AstroGlobal } from 'astro';
|
||||
import crypto from 'crypto';
|
||||
import { config } from 'dotenv';
|
||||
@@ -52,22 +52,17 @@ function getEnv(key: string): string {
|
||||
|
||||
export function getSessionFromRequest(request: Request): string | null {
|
||||
const cookieHeader = request.headers.get('cookie');
|
||||
console.log('[DEBUG] Cookie header:', cookieHeader ? 'present' : 'missing');
|
||||
|
||||
if (!cookieHeader) return null;
|
||||
|
||||
const cookies = parseCookie(cookieHeader);
|
||||
console.log('[DEBUG] Parsed cookies:', Object.keys(cookies));
|
||||
console.log('[DEBUG] Session cookie found:', !!cookies.session);
|
||||
|
||||
return cookies.session || null;
|
||||
}
|
||||
|
||||
export async function verifySession(sessionToken: string): Promise<SessionData | null> {
|
||||
try {
|
||||
console.log('[DEBUG] Verifying session token, length:', sessionToken.length);
|
||||
const { payload } = await jwtVerify(sessionToken, SECRET_KEY);
|
||||
console.log('[DEBUG] JWT verification successful, payload keys:', Object.keys(payload));
|
||||
|
||||
if (
|
||||
typeof payload.userId === 'string' &&
|
||||
@@ -75,7 +70,6 @@ export async function verifySession(sessionToken: string): Promise<SessionData |
|
||||
typeof payload.authenticated === 'boolean' &&
|
||||
typeof payload.exp === 'number'
|
||||
) {
|
||||
console.log('[DEBUG] Session validation successful for user:', payload.userId);
|
||||
return {
|
||||
userId: payload.userId,
|
||||
email: payload.email,
|
||||
@@ -84,17 +78,14 @@ export async function verifySession(sessionToken: string): Promise<SessionData |
|
||||
};
|
||||
}
|
||||
|
||||
console.log('[DEBUG] Session payload validation failed, payload:', payload);
|
||||
return null;
|
||||
} catch (error) {
|
||||
console.log('[DEBUG] Session verification failed:', error.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function createSession(userId: string, email: string): Promise<string> {
|
||||
const exp = Math.floor(Date.now() / 1000) + SESSION_DURATION;
|
||||
console.log('[DEBUG] Creating session for user:', userId, 'exp:', exp);
|
||||
|
||||
const token = await new SignJWT({
|
||||
userId,
|
||||
@@ -106,7 +97,6 @@ export async function createSession(userId: string, email: string): Promise<stri
|
||||
.setExpirationTime(exp)
|
||||
.sign(SECRET_KEY);
|
||||
|
||||
console.log('[DEBUG] Session token created, length:', token.length);
|
||||
return token;
|
||||
}
|
||||
|
||||
@@ -123,7 +113,6 @@ export function createSessionCookie(sessionToken: string): string {
|
||||
path: '/'
|
||||
});
|
||||
|
||||
console.log('[DEBUG] Created session cookie:', cookie.substring(0, 100) + '...');
|
||||
return cookie;
|
||||
}
|
||||
|
||||
@@ -292,8 +281,6 @@ export async function createSessionWithCookie(userInfo: UserInfo): Promise<{
|
||||
|
||||
export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'general'): Promise<AuthContext | Response> {
|
||||
const authRequired = getAuthRequirement(context);
|
||||
console.log(`[DEBUG PAGE] Auth required for ${context}:`, authRequired);
|
||||
console.log('[DEBUG PAGE] Request URL:', Astro.url.toString());
|
||||
|
||||
if (!authRequired) {
|
||||
return {
|
||||
@@ -305,10 +292,8 @@ export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'g
|
||||
}
|
||||
|
||||
const sessionToken = getSessionFromRequest(Astro.request);
|
||||
console.log('[DEBUG PAGE] Session token found:', !!sessionToken);
|
||||
|
||||
if (!sessionToken) {
|
||||
console.log('[DEBUG PAGE] No session token, redirecting to login');
|
||||
const loginUrl = `/api/auth/login?returnTo=${encodeURIComponent(Astro.url.toString())}`;
|
||||
return new Response(null, {
|
||||
status: 302,
|
||||
@@ -317,10 +302,8 @@ export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'g
|
||||
}
|
||||
|
||||
const session = await verifySession(sessionToken);
|
||||
console.log('[DEBUG PAGE] Session verification result:', !!session);
|
||||
|
||||
if (!session) {
|
||||
console.log('[DEBUG PAGE] Session verification failed, redirecting to login');
|
||||
const loginUrl = `/api/auth/login?returnTo=${encodeURIComponent(Astro.url.toString())}`;
|
||||
return new Response(null, {
|
||||
status: 302,
|
||||
@@ -328,7 +311,6 @@ export async function withAuth(Astro: AstroGlobal, context: AuthContextType = 'g
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[DEBUG PAGE] Page authentication successful for ${context}:`, session.userId);
|
||||
return {
|
||||
authenticated: true,
|
||||
session,
|
||||
@@ -354,10 +336,8 @@ export async function withAPIAuth(request: Request, context: AuthContextType = '
|
||||
}
|
||||
|
||||
const sessionToken = getSessionFromRequest(request);
|
||||
console.log(`[DEBUG API] Session token found for ${context}:`, !!sessionToken);
|
||||
|
||||
if (!sessionToken) {
|
||||
console.log(`[DEBUG API] No session token found for ${context}`);
|
||||
return {
|
||||
authenticated: false,
|
||||
userId: '',
|
||||
@@ -366,10 +346,8 @@ export async function withAPIAuth(request: Request, context: AuthContextType = '
|
||||
}
|
||||
|
||||
const session = await verifySession(sessionToken);
|
||||
console.log(`[DEBUG API] Session verification result for ${context}:`, !!session);
|
||||
|
||||
if (!session) {
|
||||
console.log(`[DEBUG API] Session verification failed for ${context}`);
|
||||
return {
|
||||
authenticated: false,
|
||||
userId: '',
|
||||
@@ -377,7 +355,6 @@ export async function withAPIAuth(request: Request, context: AuthContextType = '
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`[DEBUG API] Authentication successful for ${context}:`, session.userId);
|
||||
return {
|
||||
authenticated: true,
|
||||
userId: session.userId,
|
||||
@@ -390,12 +367,10 @@ export function getAuthRequirementForContext(context: AuthContextType): boolean
|
||||
return getAuthRequirement(context);
|
||||
}
|
||||
|
||||
// NEW: Helper function to check if gated content requires authentication
|
||||
export function isGatedContentAuthRequired(): boolean {
|
||||
return getAuthRequirement('gatedcontent');
|
||||
}
|
||||
|
||||
// NEW: Check if specific content should be gated
|
||||
export function shouldGateContent(isGatedContent: boolean): boolean {
|
||||
return isGatedContent && isGatedContentAuthRequired();
|
||||
}
|
||||
@@ -1,17 +1,16 @@
|
||||
// src/utils/clientUtils.ts
|
||||
// Client-side utilities that mirror server-side toolHelpers.ts
|
||||
|
||||
export function createToolSlug(toolName: string): string {
|
||||
if (!toolName || typeof toolName !== 'string') {
|
||||
console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
|
||||
console.warn('[CLIENT-UTILS] Invalid toolName provided to createToolSlug:', toolName);
|
||||
return '';
|
||||
}
|
||||
|
||||
return toolName.toLowerCase()
|
||||
.replace(/[^a-z0-9\s-]/g, '') // Remove special characters
|
||||
.replace(/\s+/g, '-') // Replace spaces with hyphens
|
||||
.replace(/-+/g, '-') // Remove duplicate hyphens
|
||||
.replace(/^-|-$/g, ''); // Remove leading/trailing hyphens
|
||||
.replace(/[^a-z0-9\s-]/g, '')
|
||||
.replace(/\s+/g, '-')
|
||||
.replace(/-+/g, '-')
|
||||
.replace(/^-|-$/g, '');
|
||||
}
|
||||
|
||||
export function findToolByIdentifier(tools: any[], identifier: string): any | undefined {
|
||||
@@ -30,7 +29,81 @@ export function isToolHosted(tool: any): boolean {
|
||||
tool.projectUrl.trim() !== "";
|
||||
}
|
||||
|
||||
// Consolidated Autocomplete Functionality
|
||||
export function sanitizeText(text: string): string {
|
||||
if (typeof text !== 'string') return '';
|
||||
|
||||
return text
|
||||
.replace(/^#{1,6}\s+/gm, '')
|
||||
.replace(/^\s*[-*+]\s+/gm, '')
|
||||
.replace(/^\s*\d+\.\s+/gm, '')
|
||||
.replace(/\*\*(.+?)\*\*/g, '$1')
|
||||
.replace(/\*(.+?)\*/g, '$1')
|
||||
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
|
||||
.replace(/```[\s\S]*?```/g, '[CODE BLOCK]')
|
||||
.replace(/`([^`]+)`/g, '$1')
|
||||
.replace(/<[^>]+>/g, '')
|
||||
.replace(/\n\s*\n\s*\n/g, '\n\n')
|
||||
.trim();
|
||||
}
|
||||
|
||||
export function escapeHtml(text: string): string {
|
||||
if (typeof text !== 'string') return String(text);
|
||||
const div = document.createElement('div');
|
||||
div.textContent = text;
|
||||
return div.innerHTML;
|
||||
}
|
||||
|
||||
export function truncateText(text: string, maxLength: number): string {
|
||||
if (!text || text.length <= maxLength) return text;
|
||||
return text.slice(0, maxLength) + '...';
|
||||
}
|
||||
|
||||
export function summarizeData(data: any): string {
|
||||
if (data === null || data === undefined) return 'null';
|
||||
if (typeof data === 'string') {
|
||||
return data.length > 100 ? data.slice(0, 100) + '...' : data;
|
||||
}
|
||||
if (typeof data === 'number' || typeof data === 'boolean') {
|
||||
return data.toString();
|
||||
}
|
||||
if (Array.isArray(data)) {
|
||||
if (data.length === 0) return '[]';
|
||||
if (data.length <= 3) return JSON.stringify(data);
|
||||
return `[${data.slice(0, 3).map(i => typeof i === 'string' ? i : JSON.stringify(i)).join(', ')}, ...+${data.length - 3}]`;
|
||||
}
|
||||
if (typeof data === 'object') {
|
||||
const keys = Object.keys(data);
|
||||
if (keys.length === 0) return '{}';
|
||||
if (keys.length <= 3) {
|
||||
return '{' + keys.map(k => `${k}: ${typeof data[k] === 'string' ? data[k].slice(0, 20) + (data[k].length > 20 ? '...' : '') : JSON.stringify(data[k])}`).join(', ') + '}';
|
||||
}
|
||||
return `{${keys.slice(0, 3).join(', ')}, ...+${keys.length - 3} keys}`;
|
||||
}
|
||||
return String(data);
|
||||
}
|
||||
|
||||
export function formatDuration(ms: number): string {
|
||||
if (ms < 1000) return '< 1s';
|
||||
if (ms < 60000) return `${Math.ceil(ms / 1000)}s`;
|
||||
const minutes = Math.floor(ms / 60000);
|
||||
const seconds = Math.ceil((ms % 60000) / 1000);
|
||||
return seconds > 0 ? `${minutes}m ${seconds}s` : `${minutes}m`;
|
||||
}
|
||||
|
||||
export function showElement(element: HTMLElement | null): void {
|
||||
if (element) {
|
||||
element.style.display = 'block';
|
||||
element.classList.remove('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
export function hideElement(element: HTMLElement | null): void {
|
||||
if (element) {
|
||||
element.style.display = 'none';
|
||||
element.classList.add('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
interface AutocompleteOptions {
|
||||
minLength?: number;
|
||||
maxResults?: number;
|
||||
@@ -97,7 +170,6 @@ export class AutocompleteManager {
|
||||
display: none;
|
||||
`;
|
||||
|
||||
// Insert dropdown after input
|
||||
const parentElement = this.input.parentNode as HTMLElement;
|
||||
parentElement.style.position = 'relative';
|
||||
parentElement.insertBefore(this.dropdown, this.input.nextSibling);
|
||||
@@ -119,7 +191,6 @@ export class AutocompleteManager {
|
||||
});
|
||||
|
||||
this.input.addEventListener('blur', () => {
|
||||
// Delay to allow click events on dropdown items
|
||||
setTimeout(() => {
|
||||
const activeElement = document.activeElement;
|
||||
if (!activeElement || !this.dropdown.contains(activeElement)) {
|
||||
@@ -205,7 +276,7 @@ export class AutocompleteManager {
|
||||
|
||||
defaultRender(item: any): string {
|
||||
const text = typeof item === 'string' ? item : item.name || item.label || item.toString();
|
||||
return `<div class="autocomplete-item">${this.escapeHtml(text)}</div>`;
|
||||
return `<div class="autocomplete-item">${escapeHtml(text)}</div>`;
|
||||
}
|
||||
|
||||
renderDropdown(): void {
|
||||
@@ -226,7 +297,6 @@ export class AutocompleteManager {
|
||||
})
|
||||
.join('');
|
||||
|
||||
// Bind click events
|
||||
this.dropdown.querySelectorAll('.autocomplete-option').forEach((option, index) => {
|
||||
option.addEventListener('click', () => {
|
||||
this.selectItem(this.filteredData[index]);
|
||||
@@ -260,7 +330,6 @@ export class AutocompleteManager {
|
||||
this.hideDropdown();
|
||||
}
|
||||
|
||||
// Trigger change event
|
||||
this.input.dispatchEvent(new CustomEvent('autocomplete:select', {
|
||||
detail: { item, text, selectedItems: Array.from(this.selectedItems) }
|
||||
}));
|
||||
@@ -289,8 +358,8 @@ export class AutocompleteManager {
|
||||
align-items: center;
|
||||
gap: 0.25rem;
|
||||
">
|
||||
${this.escapeHtml(item)}
|
||||
<button type="button" class="autocomplete-remove" data-item="${this.escapeHtml(item)}" style="
|
||||
${escapeHtml(item)}
|
||||
<button type="button" class="autocomplete-remove" data-item="${escapeHtml(item)}" style="
|
||||
background: none;
|
||||
border: none;
|
||||
color: white;
|
||||
@@ -307,7 +376,6 @@ export class AutocompleteManager {
|
||||
`)
|
||||
.join('');
|
||||
|
||||
// Bind remove events
|
||||
this.selectedContainer.querySelectorAll('.autocomplete-remove').forEach(btn => {
|
||||
btn.addEventListener('click', (e) => {
|
||||
e.preventDefault();
|
||||
@@ -333,12 +401,6 @@ export class AutocompleteManager {
|
||||
this.selectedIndex = -1;
|
||||
}
|
||||
|
||||
escapeHtml(text: string): string {
|
||||
const div = document.createElement('div');
|
||||
div.textContent = text;
|
||||
return div.innerHTML;
|
||||
}
|
||||
|
||||
setDataSource(newDataSource: any[]): void {
|
||||
this.dataSource = newDataSource;
|
||||
}
|
||||
|
||||
225
src/utils/confidenceScoring.ts
Normal file
225
src/utils/confidenceScoring.ts
Normal file
@@ -0,0 +1,225 @@
|
||||
// src/utils/confidenceScoring.ts
|
||||
import { isToolHosted } from './clientUtils.js';
|
||||
import 'dotenv/config';
|
||||
|
||||
export interface ConfidenceMetrics {
|
||||
overall: number;
|
||||
semanticRelevance: number;
|
||||
taskSuitability: number;
|
||||
uncertaintyFactors: string[];
|
||||
strengthIndicators: string[];
|
||||
}
|
||||
|
||||
export interface ConfidenceConfig {
|
||||
semanticWeight: number;
|
||||
suitabilityWeight: number;
|
||||
minimumThreshold: number;
|
||||
mediumThreshold: number;
|
||||
highThreshold: number;
|
||||
}
|
||||
|
||||
export interface AnalysisContext {
|
||||
userQuery: string;
|
||||
mode: string;
|
||||
embeddingsSimilarities: Map<string, number>;
|
||||
selectedTools?: Array<{
|
||||
tool: any;
|
||||
phase: string;
|
||||
priority: string;
|
||||
justification?: string;
|
||||
taskRelevance?: number;
|
||||
limitations?: string[];
|
||||
}>;
|
||||
}
|
||||
|
||||
class ConfidenceScoring {
|
||||
private config: ConfidenceConfig;
|
||||
|
||||
constructor() {
|
||||
this.config = {
|
||||
semanticWeight: this.getEnvFloat('CONFIDENCE_SEMANTIC_WEIGHT', 0.3),
|
||||
suitabilityWeight: this.getEnvFloat('CONFIDENCE_SUITABILITY_WEIGHT', 0.7),
|
||||
minimumThreshold: this.getEnvInt('CONFIDENCE_MINIMUM_THRESHOLD', 40),
|
||||
mediumThreshold: this.getEnvInt('CONFIDENCE_MEDIUM_THRESHOLD', 60),
|
||||
highThreshold: this.getEnvInt('CONFIDENCE_HIGH_THRESHOLD', 80)
|
||||
};
|
||||
|
||||
console.log('[CONFIDENCE-SCORING] Initialized with restored config:', this.config);
|
||||
}
|
||||
|
||||
private getEnvFloat(key: string, defaultValue: number): number {
|
||||
const value = process.env[key];
|
||||
return value ? parseFloat(value) : defaultValue;
|
||||
}
|
||||
|
||||
private getEnvInt(key: string, defaultValue: number): number {
|
||||
const value = process.env[key];
|
||||
return value ? parseInt(value, 10) : defaultValue;
|
||||
}
|
||||
|
||||
calculateRecommendationConfidence(
|
||||
tool: any,
|
||||
context: AnalysisContext,
|
||||
taskRelevance: number = 70,
|
||||
limitations: string[] = []
|
||||
): ConfidenceMetrics {
|
||||
console.log('[CONFIDENCE-SCORING] Calculating confidence for tool:', tool.name);
|
||||
|
||||
const rawSemanticRelevance = context.embeddingsSimilarities.has(tool.name) ?
|
||||
context.embeddingsSimilarities.get(tool.name)! * 100 : 50;
|
||||
|
||||
let enhancedTaskSuitability = taskRelevance;
|
||||
|
||||
if (context.mode === 'workflow') {
|
||||
const toolSelection = context.selectedTools?.find((st: any) => st.tool && st.tool.name === tool.name);
|
||||
if (toolSelection && tool.phases && Array.isArray(tool.phases) && tool.phases.includes(toolSelection.phase)) {
|
||||
const phaseBonus = Math.min(15, 100 - taskRelevance);
|
||||
enhancedTaskSuitability = Math.min(100, taskRelevance + phaseBonus);
|
||||
console.log('[CONFIDENCE-SCORING] Phase alignment bonus applied:', phaseBonus);
|
||||
}
|
||||
}
|
||||
|
||||
const overall = (
|
||||
rawSemanticRelevance * this.config.semanticWeight +
|
||||
enhancedTaskSuitability * this.config.suitabilityWeight
|
||||
);
|
||||
|
||||
const uncertaintyFactors = this.identifyUncertaintyFactors(tool, context, limitations, overall);
|
||||
const strengthIndicators = this.identifyStrengthIndicators(tool, context, overall);
|
||||
|
||||
const result = {
|
||||
overall: Math.round(overall),
|
||||
semanticRelevance: Math.round(rawSemanticRelevance),
|
||||
taskSuitability: Math.round(enhancedTaskSuitability),
|
||||
uncertaintyFactors,
|
||||
strengthIndicators
|
||||
};
|
||||
|
||||
console.log('[CONFIDENCE-SCORING] Confidence calculated:', {
|
||||
tool: tool.name,
|
||||
overall: result.overall,
|
||||
semantic: result.semanticRelevance,
|
||||
task: result.taskSuitability,
|
||||
uncertaintyCount: uncertaintyFactors.length,
|
||||
strengthCount: strengthIndicators.length
|
||||
});
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private identifyUncertaintyFactors(
|
||||
tool: any,
|
||||
context: AnalysisContext,
|
||||
limitations: string[],
|
||||
confidence: number
|
||||
): string[] {
|
||||
const factors: string[] = [];
|
||||
|
||||
if (limitations?.length > 0) {
|
||||
factors.push(...limitations.slice(0, 2));
|
||||
}
|
||||
|
||||
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
|
||||
if (similarity < 0.7) {
|
||||
factors.push('Geringe semantische Ähnlichkeit zur Anfrage');
|
||||
}
|
||||
|
||||
if (tool.skillLevel === 'expert' && /schnell|rapid|triage|urgent|sofort/i.test(context.userQuery)) {
|
||||
factors.push('Experten-Tool für zeitkritisches Szenario');
|
||||
}
|
||||
|
||||
if (tool.skillLevel === 'novice' && /komplex|erweitert|tiefgehend|advanced|forensisch/i.test(context.userQuery)) {
|
||||
factors.push('Einsteiger-Tool für komplexe Analyse');
|
||||
}
|
||||
|
||||
if (tool.type === 'software' && !isToolHosted(tool) && tool.accessType === 'download') {
|
||||
factors.push('Installation und Setup erforderlich');
|
||||
}
|
||||
|
||||
if (tool.license === 'Proprietary') {
|
||||
factors.push('Kommerzielle Software - Lizenzkosten zu beachten');
|
||||
}
|
||||
|
||||
if (confidence < 60) {
|
||||
factors.push('Moderate Gesamtbewertung - alternative Ansätze empfohlen');
|
||||
}
|
||||
|
||||
return factors.slice(0, 4);
|
||||
}
|
||||
|
||||
private identifyStrengthIndicators(tool: any, context: AnalysisContext, confidence: number): string[] {
|
||||
const indicators: string[] = [];
|
||||
|
||||
const similarity = context.embeddingsSimilarities.get(tool.name) || 0.5;
|
||||
if (similarity >= 0.7) {
|
||||
indicators.push('Sehr gute semantische Übereinstimmung mit Ihrer Anfrage');
|
||||
}
|
||||
|
||||
if (tool.knowledgebase === true) {
|
||||
indicators.push('Umfassende Dokumentation und Wissensbasis verfügbar');
|
||||
}
|
||||
|
||||
if (isToolHosted(tool)) {
|
||||
indicators.push('Sofort verfügbar über gehostete Lösung');
|
||||
}
|
||||
|
||||
if (tool.skillLevel === 'intermediate' || tool.skillLevel === 'advanced') {
|
||||
indicators.push('Ausgewogenes Verhältnis zwischen Funktionalität und Benutzerfreundlichkeit');
|
||||
}
|
||||
|
||||
if (tool.type === 'method' && /methodik|vorgehen|prozess|ansatz/i.test(context.userQuery)) {
|
||||
indicators.push('Methodischer Ansatz passt zu Ihrer prozeduralen Anfrage');
|
||||
}
|
||||
|
||||
return indicators.slice(0, 4);
|
||||
}
|
||||
|
||||
calculateSelectionConfidence(result: any, candidateCount: number): number {
|
||||
if (!result?.selectedTools) {
|
||||
console.log('[CONFIDENCE-SCORING] No selected tools for confidence calculation');
|
||||
return 30;
|
||||
}
|
||||
|
||||
const selectionRatio = result.selectedTools.length / candidateCount;
|
||||
const hasReasoning = result.reasoning && result.reasoning.length > 50;
|
||||
|
||||
let confidence = 60;
|
||||
|
||||
if (selectionRatio > 0.05 && selectionRatio < 0.3) confidence += 20;
|
||||
else if (selectionRatio <= 0.05) confidence -= 10;
|
||||
else confidence -= 15;
|
||||
|
||||
if (hasReasoning) confidence += 15;
|
||||
if (result.selectedConcepts?.length > 0) confidence += 5;
|
||||
|
||||
const finalConfidence = Math.min(95, Math.max(25, confidence));
|
||||
|
||||
console.log('[CONFIDENCE-SCORING] Selection confidence calculated:', {
|
||||
candidateCount,
|
||||
selectedCount: result.selectedTools.length,
|
||||
selectionRatio: selectionRatio.toFixed(3),
|
||||
hasReasoning,
|
||||
confidence: finalConfidence
|
||||
});
|
||||
|
||||
return finalConfidence;
|
||||
}
|
||||
|
||||
getConfidenceLevel(confidence: number): 'weak' | 'moderate' | 'strong' {
|
||||
if (confidence >= this.config.highThreshold) return 'strong';
|
||||
if (confidence >= this.config.mediumThreshold) return 'moderate';
|
||||
return 'weak';
|
||||
}
|
||||
|
||||
getConfidenceColor(confidence: number): string {
|
||||
if (confidence >= this.config.highThreshold) return 'var(--color-accent)';
|
||||
if (confidence >= this.config.mediumThreshold) return 'var(--color-warning)';
|
||||
return 'var(--color-error)';
|
||||
}
|
||||
|
||||
getConfig(): ConfidenceConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
}
|
||||
|
||||
export const confidenceScoring = new ConfidenceScoring();
|
||||
@@ -1,4 +1,4 @@
|
||||
// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
|
||||
// src/utils/dataService.ts
|
||||
import { promises as fs } from 'fs';
|
||||
import { load } from 'js-yaml';
|
||||
import path from 'path';
|
||||
@@ -85,7 +85,7 @@ let cachedData: ToolsData | null = null;
|
||||
let cachedRandomizedData: ToolsData | null = null;
|
||||
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
|
||||
let lastRandomizationDate: string | null = null;
|
||||
let dataVersion: string | null = null;
|
||||
let cachedToolsHash: string | null = null;
|
||||
|
||||
function seededRandom(seed: number): () => number {
|
||||
let x = Math.sin(seed) * 10000;
|
||||
@@ -110,17 +110,6 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
|
||||
return shuffled;
|
||||
}
|
||||
|
||||
function generateDataVersion(data: any): string {
|
||||
const str = JSON.stringify(data, Object.keys(data).sort());
|
||||
let hash = 0;
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
const char = str.charCodeAt(i);
|
||||
hash = ((hash << 5) - hash) + char;
|
||||
hash = hash & hash;
|
||||
}
|
||||
return Math.abs(hash).toString(36);
|
||||
}
|
||||
|
||||
async function loadRawData(): Promise<ToolsData> {
|
||||
if (!cachedData) {
|
||||
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
||||
@@ -142,8 +131,9 @@ async function loadRawData(): Promise<ToolsData> {
|
||||
};
|
||||
}
|
||||
|
||||
dataVersion = generateDataVersion(cachedData);
|
||||
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
|
||||
const { getToolsFileHash } = await import('./hashUtils.js');
|
||||
cachedToolsHash = await getToolsFileHash();
|
||||
console.log(`[DATA SERVICE] Loaded data with hash: ${cachedToolsHash.slice(0, 12)}...`);
|
||||
|
||||
} catch (error) {
|
||||
if (error instanceof z.ZodError) {
|
||||
@@ -234,7 +224,7 @@ export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedT
|
||||
}
|
||||
|
||||
export function getDataVersion(): string | null {
|
||||
return dataVersion;
|
||||
return cachedToolsHash;
|
||||
}
|
||||
|
||||
export function clearCache(): void {
|
||||
@@ -242,7 +232,7 @@ export function clearCache(): void {
|
||||
cachedRandomizedData = null;
|
||||
cachedCompressedData = null;
|
||||
lastRandomizationDate = null;
|
||||
dataVersion = null;
|
||||
cachedToolsHash = null;
|
||||
|
||||
console.log('[DATA SERVICE] Enhanced cache cleared');
|
||||
}
|
||||
@@ -1,11 +1,11 @@
|
||||
// src/utils/embeddings.ts
|
||||
// src/utils/embeddings.ts - Refactored
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import { getCompressedToolsDataForAI } from './dataService.js';
|
||||
import 'dotenv/config';
|
||||
import crypto from 'crypto';
|
||||
|
||||
interface EmbeddingData {
|
||||
export interface EmbeddingData {
|
||||
id: string;
|
||||
type: 'tool' | 'concept';
|
||||
name: string;
|
||||
@@ -20,14 +20,22 @@ interface EmbeddingData {
|
||||
};
|
||||
}
|
||||
|
||||
export interface SimilarityResult extends EmbeddingData {
|
||||
similarity: number;
|
||||
}
|
||||
|
||||
interface EmbeddingsDatabase {
|
||||
version: string;
|
||||
lastUpdated: number;
|
||||
embeddings: EmbeddingData[];
|
||||
}
|
||||
|
||||
interface SimilarityResult extends EmbeddingData {
|
||||
similarity: number;
|
||||
interface EmbeddingsConfig {
|
||||
endpoint?: string;
|
||||
apiKey?: string;
|
||||
model?: string;
|
||||
batchSize: number;
|
||||
batchDelay: number;
|
||||
}
|
||||
|
||||
class EmbeddingsService {
|
||||
@@ -35,48 +43,30 @@ class EmbeddingsService {
|
||||
private isInitialized = false;
|
||||
private initializationPromise: Promise<void> | null = null;
|
||||
private readonly embeddingsPath = path.join(process.cwd(), 'data', 'embeddings.json');
|
||||
private readonly batchSize: number;
|
||||
private readonly batchDelay: number;
|
||||
private enabled: boolean = false;
|
||||
private config: EmbeddingsConfig;
|
||||
|
||||
constructor() {
|
||||
this.batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
|
||||
this.batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
|
||||
|
||||
this.enabled = true;
|
||||
this.config = this.loadConfig();
|
||||
console.log('[EMBEDDINGS-SERVICE] Initialized:', {
|
||||
hasEndpoint: !!this.config.endpoint,
|
||||
hasModel: !!this.config.model
|
||||
});
|
||||
}
|
||||
|
||||
private async checkEnabledStatus(): Promise<void> {
|
||||
try {
|
||||
const envEnabled = process.env.AI_EMBEDDINGS_ENABLED;
|
||||
|
||||
if (envEnabled === 'true') {
|
||||
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
||||
const model = process.env.AI_EMBEDDINGS_MODEL;
|
||||
|
||||
if (!endpoint || !model) {
|
||||
console.warn('[EMBEDDINGS] Embeddings enabled but API configuration missing - disabling');
|
||||
this.enabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
console.log('[EMBEDDINGS] All requirements met - enabling embeddings');
|
||||
this.enabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await fs.stat(this.embeddingsPath);
|
||||
console.log('[EMBEDDINGS] Existing embeddings file found - enabling');
|
||||
this.enabled = true;
|
||||
} catch {
|
||||
console.log('[EMBEDDINGS] Embeddings not explicitly enabled - disabling');
|
||||
this.enabled = false;
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('[EMBEDDINGS] Error checking enabled status:', error);
|
||||
this.enabled = false;
|
||||
}
|
||||
private loadConfig(): EmbeddingsConfig {
|
||||
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
||||
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
|
||||
const model = process.env.AI_EMBEDDINGS_MODEL;
|
||||
const batchSize = parseInt(process.env.AI_EMBEDDINGS_BATCH_SIZE || '20', 10);
|
||||
const batchDelay = parseInt(process.env.AI_EMBEDDINGS_BATCH_DELAY_MS || '1000', 10);
|
||||
|
||||
return {
|
||||
endpoint,
|
||||
apiKey,
|
||||
model,
|
||||
batchSize,
|
||||
batchDelay
|
||||
};
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
@@ -93,63 +83,55 @@ class EmbeddingsService {
|
||||
}
|
||||
|
||||
private async performInitialization(): Promise<void> {
|
||||
await this.checkEnabledStatus();
|
||||
if (!this.enabled) {
|
||||
console.log('[EMBEDDINGS] Embeddings disabled, skipping initialization');
|
||||
return;
|
||||
}
|
||||
|
||||
const initStart = Date.now();
|
||||
|
||||
try {
|
||||
console.log('[EMBEDDINGS] Initializing embeddings system…');
|
||||
console.log('[EMBEDDINGS-SERVICE] Starting initialization');
|
||||
|
||||
/*if (!this.config.enabled) {
|
||||
console.log('[EMBEDDINGS-SERVICE] Service disabled via configuration');
|
||||
return;
|
||||
}*/
|
||||
|
||||
await fs.mkdir(path.dirname(this.embeddingsPath), { recursive: true });
|
||||
|
||||
const toolsData = await getCompressedToolsDataForAI();
|
||||
const currentDataHash = await this.hashToolsFile();
|
||||
const toolsData = await getCompressedToolsDataForAI();
|
||||
const { getToolsFileHash } = await import('./hashUtils.js');
|
||||
const currentDataHash = await getToolsFileHash();
|
||||
|
||||
const existing = await this.loadEmbeddings();
|
||||
console.log('[EMBEDDINGS] Current hash:', currentDataHash);
|
||||
console.log('[EMBEDDINGS] Existing file version:', existing?.version);
|
||||
console.log('[EMBEDDINGS] Existing embeddings length:', existing?.embeddings?.length);
|
||||
|
||||
const cacheIsUsable =
|
||||
existing &&
|
||||
const existing = await this.loadEmbeddings();
|
||||
|
||||
const cacheIsUsable = existing &&
|
||||
existing.version === currentDataHash &&
|
||||
Array.isArray(existing.embeddings) &&
|
||||
existing.embeddings.length > 0;
|
||||
|
||||
if (cacheIsUsable) {
|
||||
console.log('[EMBEDDINGS] Using cached embeddings');
|
||||
this.embeddings = existing.embeddings;
|
||||
console.log('[EMBEDDINGS-SERVICE] Using cached embeddings');
|
||||
this.embeddings = existing.embeddings;
|
||||
} else {
|
||||
console.log('[EMBEDDINGS] Generating new embeddings…');
|
||||
console.log('[EMBEDDINGS-SERVICE] Generating new embeddings');
|
||||
await this.generateEmbeddings(toolsData, currentDataHash);
|
||||
}
|
||||
|
||||
this.isInitialized = true;
|
||||
console.log(`[EMBEDDINGS] Initialized with ${this.embeddings.length} embeddings in ${Date.now() - initStart} ms`);
|
||||
} catch (err) {
|
||||
console.error('[EMBEDDINGS] Failed to initialize:', err);
|
||||
console.log(`[EMBEDDINGS-SERVICE] Initialized successfully with ${this.embeddings.length} embeddings in ${Date.now() - initStart}ms`);
|
||||
|
||||
} catch (error) {
|
||||
console.error('[EMBEDDINGS-SERVICE] Initialization failed:', error);
|
||||
this.isInitialized = false;
|
||||
throw err;
|
||||
throw error;
|
||||
} finally {
|
||||
this.initializationPromise = null;
|
||||
}
|
||||
}
|
||||
|
||||
private async hashToolsFile(): Promise<string> {
|
||||
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
|
||||
const raw = await fs.readFile(file, 'utf8');
|
||||
return crypto.createHash('sha256').update(raw).digest('hex');
|
||||
}
|
||||
|
||||
private async loadEmbeddings(): Promise<EmbeddingsDatabase | null> {
|
||||
try {
|
||||
const data = await fs.readFile(this.embeddingsPath, 'utf8');
|
||||
return JSON.parse(data);
|
||||
} catch (error) {
|
||||
console.log('[EMBEDDINGS] No existing embeddings found');
|
||||
console.log('[EMBEDDINGS-SERVICE] No existing embeddings file found');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -162,7 +144,7 @@ class EmbeddingsService {
|
||||
};
|
||||
|
||||
await fs.writeFile(this.embeddingsPath, JSON.stringify(database, null, 2));
|
||||
console.log(`[EMBEDDINGS] Saved ${this.embeddings.length} embeddings to disk`);
|
||||
console.log(`[EMBEDDINGS-SERVICE] Saved ${this.embeddings.length} embeddings to disk`);
|
||||
}
|
||||
|
||||
private createContentString(item: any): string {
|
||||
@@ -178,30 +160,23 @@ class EmbeddingsService {
|
||||
}
|
||||
|
||||
private async generateEmbeddingsBatch(contents: string[]): Promise<number[][]> {
|
||||
const endpoint = process.env.AI_EMBEDDINGS_ENDPOINT;
|
||||
const apiKey = process.env.AI_EMBEDDINGS_API_KEY;
|
||||
const model = process.env.AI_EMBEDDINGS_MODEL;
|
||||
|
||||
if (!endpoint || !model) {
|
||||
const missing: string[] = [];
|
||||
if (!endpoint) missing.push('AI_EMBEDDINGS_ENDPOINT');
|
||||
if (!model) missing.push('AI_EMBEDDINGS_MODEL');
|
||||
throw new Error(`Missing embeddings API configuration: ${missing.join(', ')}`);
|
||||
if (!this.config.endpoint || !this.config.model) {
|
||||
throw new Error('Missing embeddings API configuration');
|
||||
}
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
'Content-Type': 'application/json'
|
||||
};
|
||||
|
||||
if (apiKey) {
|
||||
headers['Authorization'] = `Bearer ${apiKey}`;
|
||||
if (this.config.apiKey) {
|
||||
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
|
||||
}
|
||||
|
||||
const response = await fetch(endpoint, {
|
||||
const response = await fetch(this.config.endpoint, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify({
|
||||
model,
|
||||
model: this.config.model,
|
||||
input: contents
|
||||
})
|
||||
});
|
||||
@@ -233,11 +208,16 @@ class EmbeddingsService {
|
||||
const contents = allItems.map(item => this.createContentString(item));
|
||||
this.embeddings = [];
|
||||
|
||||
for (let i = 0; i < contents.length; i += this.batchSize) {
|
||||
const batch = contents.slice(i, i + this.batchSize);
|
||||
const batchItems = allItems.slice(i, i + this.batchSize);
|
||||
console.log(`[EMBEDDINGS-SERVICE] Generating embeddings for ${contents.length} items`);
|
||||
|
||||
for (let i = 0; i < contents.length; i += this.config.batchSize) {
|
||||
const batch = contents.slice(i, i + this.config.batchSize);
|
||||
const batchItems = allItems.slice(i, i + this.config.batchSize);
|
||||
|
||||
console.log(`[EMBEDDINGS] Processing batch ${Math.ceil((i + 1) / this.batchSize)} of ${Math.ceil(contents.length / this.batchSize)}`);
|
||||
const batchNumber = Math.ceil((i + 1) / this.config.batchSize);
|
||||
const totalBatches = Math.ceil(contents.length / this.config.batchSize);
|
||||
|
||||
console.log(`[EMBEDDINGS-SERVICE] Processing batch ${batchNumber}/${totalBatches}`);
|
||||
|
||||
try {
|
||||
const embeddings = await this.generateEmbeddingsBatch(batch);
|
||||
@@ -260,12 +240,12 @@ class EmbeddingsService {
|
||||
});
|
||||
});
|
||||
|
||||
if (i + this.batchSize < contents.length) {
|
||||
await new Promise(resolve => setTimeout(resolve, this.batchDelay));
|
||||
if (i + this.config.batchSize < contents.length) {
|
||||
await new Promise(resolve => setTimeout(resolve, this.config.batchDelay));
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
console.error(`[EMBEDDINGS] Failed to process batch ${Math.ceil((i + 1) / this.batchSize)}:`, error);
|
||||
console.error(`[EMBEDDINGS-SERVICE] Batch ${batchNumber} failed:`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
@@ -273,18 +253,17 @@ class EmbeddingsService {
|
||||
await this.saveEmbeddings(version);
|
||||
}
|
||||
|
||||
public async embedText(text: string): Promise<number[]> {
|
||||
if (!this.enabled || !this.isInitialized) {
|
||||
async embedText(text: string): Promise<number[]> {
|
||||
if (!this.isInitialized) {
|
||||
throw new Error('Embeddings service not available');
|
||||
}
|
||||
|
||||
const [embedding] = await this.generateEmbeddingsBatch([text.toLowerCase()]);
|
||||
return embedding;
|
||||
}
|
||||
|
||||
async waitForInitialization(): Promise<void> {
|
||||
await this.checkEnabledStatus();
|
||||
|
||||
if (!this.enabled || this.isInitialized) {
|
||||
if (this.isInitialized) {
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
@@ -296,13 +275,6 @@ class EmbeddingsService {
|
||||
return this.initialize();
|
||||
}
|
||||
|
||||
async forceRecheckEnvironment(): Promise<void> {
|
||||
this.enabled = false;
|
||||
this.isInitialized = false;
|
||||
await this.checkEnabledStatus();
|
||||
console.log('[EMBEDDINGS] Environment status re-checked, enabled:', this.enabled);
|
||||
}
|
||||
|
||||
private cosineSimilarity(a: number[], b: number[]): number {
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
@@ -318,145 +290,62 @@ class EmbeddingsService {
|
||||
}
|
||||
|
||||
async findSimilar(query: string, maxResults: number = 30, threshold: number = 0.3): Promise<SimilarityResult[]> {
|
||||
if (!this.enabled) {
|
||||
console.log('[EMBEDDINGS] Service disabled for similarity search');
|
||||
/*if (!this.config.enabled) {
|
||||
console.log('[EMBEDDINGS-SERVICE] Service disabled, returning empty results');
|
||||
return [];
|
||||
}*/
|
||||
|
||||
if (!this.isInitialized || this.embeddings.length === 0) {
|
||||
console.log('[EMBEDDINGS-SERVICE] Not initialized or no embeddings available');
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
if (this.isInitialized && this.embeddings.length > 0) {
|
||||
console.log(`[EMBEDDINGS] Using embeddings data for similarity search: ${query}`);
|
||||
|
||||
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
|
||||
const queryEmbedding = queryEmbeddings[0];
|
||||
console.log(`[EMBEDDINGS-SERVICE] Finding similar items for query: "${query}"`);
|
||||
|
||||
const queryEmbeddings = await this.generateEmbeddingsBatch([query.toLowerCase()]);
|
||||
const queryEmbedding = queryEmbeddings[0];
|
||||
|
||||
console.log(`[EMBEDDINGS] Computing similarities for ${this.embeddings.length} items`);
|
||||
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
|
||||
...item,
|
||||
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
||||
}));
|
||||
|
||||
const similarities: SimilarityResult[] = this.embeddings.map(item => ({
|
||||
...item,
|
||||
similarity: this.cosineSimilarity(queryEmbedding, item.embedding)
|
||||
}));
|
||||
const topScore = Math.max(...similarities.map(s => s.similarity));
|
||||
const dynamicThreshold = Math.max(threshold, topScore * 0.85);
|
||||
|
||||
const topScore = Math.max(...similarities.map(s => s.similarity));
|
||||
const dynamicCutOff = Math.max(threshold, topScore * 0.85);
|
||||
const results = similarities
|
||||
.filter(item => item.similarity >= dynamicThreshold)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, maxResults);
|
||||
|
||||
const results = similarities
|
||||
.filter(item => item.similarity >= dynamicCutOff)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, maxResults);
|
||||
|
||||
|
||||
const orderingValid = results.every((item, index) => {
|
||||
if (index === 0) return true;
|
||||
return item.similarity <= results[index - 1].similarity;
|
||||
console.log(`[EMBEDDINGS-SERVICE] Found ${results.length} similar items (threshold: ${dynamicThreshold.toFixed(3)})`);
|
||||
|
||||
if (results.length > 0) {
|
||||
console.log('[EMBEDDINGS-SERVICE] Top 5 matches:');
|
||||
results.slice(0, 5).forEach((item, idx) => {
|
||||
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
|
||||
});
|
||||
|
||||
if (!orderingValid) {
|
||||
console.error('[EMBEDDINGS] CRITICAL: Similarity ordering is broken!');
|
||||
results.forEach((item, idx) => {
|
||||
console.error(` ${idx}: ${item.name} = ${item.similarity.toFixed(4)}`);
|
||||
});
|
||||
}
|
||||
|
||||
console.log(`[EMBEDDINGS] Found ${results.length} similar items (threshold: ${threshold})`);
|
||||
if (results.length > 0) {
|
||||
console.log('[EMBEDDINGS] Top 10 similarity matches:');
|
||||
results.slice(0, 10).forEach((item, idx) => {
|
||||
console.log(` ${idx + 1}. ${item.name} (${item.type}) = ${item.similarity.toFixed(4)}`);
|
||||
});
|
||||
|
||||
const topSimilarity = results[0].similarity;
|
||||
const hasHigherSimilarity = results.some(item => item.similarity > topSimilarity);
|
||||
if (hasHigherSimilarity) {
|
||||
console.error('[EMBEDDINGS] CRITICAL: Top result is not actually the highest similarity!');
|
||||
}
|
||||
}
|
||||
|
||||
return results;
|
||||
|
||||
} else {
|
||||
console.log(`[EMBEDDINGS] No embeddings data, using fallback text matching: ${query}`);
|
||||
|
||||
const { getToolsData } = await import('./dataService.js');
|
||||
const toolsData = await getToolsData();
|
||||
|
||||
const queryLower = query.toLowerCase();
|
||||
const queryWords = queryLower.split(/\s+/).filter(w => w.length > 2);
|
||||
|
||||
const similarities: SimilarityResult[] = toolsData.tools
|
||||
.map((tool: any) => {
|
||||
let similarity = 0;
|
||||
|
||||
if (tool.name.toLowerCase().includes(queryLower)) {
|
||||
similarity += 0.8;
|
||||
}
|
||||
|
||||
if (tool.description && tool.description.toLowerCase().includes(queryLower)) {
|
||||
similarity += 0.6;
|
||||
}
|
||||
|
||||
if (tool.tags && Array.isArray(tool.tags)) {
|
||||
const matchingTags = tool.tags.filter((tag: string) =>
|
||||
tag.toLowerCase().includes(queryLower) || queryLower.includes(tag.toLowerCase())
|
||||
);
|
||||
if (tool.tags.length > 0) {
|
||||
similarity += (matchingTags.length / tool.tags.length) * 0.4;
|
||||
}
|
||||
}
|
||||
|
||||
const toolText = `${tool.name} ${tool.description || ''} ${(tool.tags || []).join(' ')}`.toLowerCase();
|
||||
const matchingWords = queryWords.filter(word => toolText.includes(word));
|
||||
if (queryWords.length > 0) {
|
||||
similarity += (matchingWords.length / queryWords.length) * 0.3;
|
||||
}
|
||||
|
||||
return {
|
||||
id: `tool_${tool.name.replace(/[^a-zA-Z0-9]/g, '_').toLowerCase()}`,
|
||||
type: 'tool' as const,
|
||||
name: tool.name,
|
||||
content: toolText,
|
||||
embedding: [],
|
||||
metadata: {
|
||||
domains: tool.domains || [],
|
||||
phases: tool.phases || [],
|
||||
tags: tool.tags || [],
|
||||
skillLevel: tool.skillLevel,
|
||||
type: tool.type
|
||||
},
|
||||
similarity: Math.min(similarity, 1.0)
|
||||
};
|
||||
})
|
||||
.filter(item => item.similarity >= threshold)
|
||||
.sort((a, b) => b.similarity - a.similarity)
|
||||
.slice(0, maxResults);
|
||||
|
||||
console.log(`[EMBEDDINGS] Fallback found ${similarities.length} similar items`);
|
||||
return similarities;
|
||||
}
|
||||
|
||||
return results;
|
||||
|
||||
} catch (error) {
|
||||
console.error('[EMBEDDINGS] Failed to find similar items:', error);
|
||||
console.error('[EMBEDDINGS-SERVICE] Similarity search failed:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
isEnabled(): boolean {
|
||||
if (!this.enabled && !this.isInitialized) {
|
||||
this.checkEnabledStatus().catch(console.error);
|
||||
}
|
||||
|
||||
return this.enabled;
|
||||
}
|
||||
|
||||
getStats(): { enabled: boolean; initialized: boolean; count: number } {
|
||||
getStats(): {initialized: boolean; count: number } {
|
||||
return {
|
||||
enabled: this.enabled,
|
||||
initialized: this.isInitialized,
|
||||
count: this.embeddings.length
|
||||
};
|
||||
}
|
||||
|
||||
getConfig(): EmbeddingsConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
}
|
||||
|
||||
const embeddingsService = new EmbeddingsService();
|
||||
|
||||
export { embeddingsService, type EmbeddingData, type SimilarityResult };
|
||||
export const embeddingsService = new EmbeddingsService();
|
||||
20
src/utils/hashUtils.ts
Normal file
20
src/utils/hashUtils.ts
Normal file
@@ -0,0 +1,20 @@
|
||||
// src/utils/hashUtils.ts
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
|
||||
export async function getToolsFileHash(): Promise<string> {
|
||||
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
|
||||
const raw = await fs.readFile(file, 'utf8');
|
||||
return crypto.createHash('sha256').update(raw).digest('hex');
|
||||
}
|
||||
|
||||
export function getToolsFileHashSync(): string | null {
|
||||
try {
|
||||
const file = path.join(process.cwd(), 'src', 'data', 'tools.yaml');
|
||||
const raw = require('fs').readFileSync(file, 'utf8');
|
||||
return crypto.createHash('sha256').update(raw).digest('hex');
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
356
src/utils/jsonUtils.ts
Normal file
356
src/utils/jsonUtils.ts
Normal file
@@ -0,0 +1,356 @@
|
||||
// src/utils/jsonUtils.ts
|
||||
export class JSONParser {
|
||||
static safeParseJSON(jsonString: string, fallback: any = null): any {
|
||||
try {
|
||||
let cleaned = jsonString.trim();
|
||||
|
||||
const jsonBlockPatterns = [
|
||||
/```json\s*([\s\S]*?)\s*```/i,
|
||||
/```\s*([\s\S]*?)\s*```/i,
|
||||
/\{[\s\S]*\}/,
|
||||
];
|
||||
|
||||
for (const pattern of jsonBlockPatterns) {
|
||||
const match = cleaned.match(pattern);
|
||||
if (match) {
|
||||
cleaned = match[1] || match[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!cleaned.endsWith('}') && !cleaned.endsWith(']')) {
|
||||
console.warn('[JSON-PARSER] JSON appears truncated, attempting recovery');
|
||||
cleaned = this.repairTruncatedJSON(cleaned);
|
||||
}
|
||||
|
||||
const parsed = JSON.parse(cleaned);
|
||||
|
||||
if (parsed && typeof parsed === 'object') {
|
||||
if (!parsed.selectedTools) parsed.selectedTools = [];
|
||||
if (!parsed.selectedConcepts) parsed.selectedConcepts = [];
|
||||
if (!Array.isArray(parsed.selectedTools)) parsed.selectedTools = [];
|
||||
if (!Array.isArray(parsed.selectedConcepts)) parsed.selectedConcepts = [];
|
||||
}
|
||||
|
||||
return parsed;
|
||||
|
||||
} catch (error) {
|
||||
console.warn('[JSON-PARSER] JSON parsing failed:', error.message);
|
||||
return fallback;
|
||||
}
|
||||
}
|
||||
|
||||
private static repairTruncatedJSON(cleaned: string): string {
|
||||
let braceCount = 0;
|
||||
let bracketCount = 0;
|
||||
let inString = false;
|
||||
let escaped = false;
|
||||
let lastCompleteStructure = '';
|
||||
|
||||
for (let i = 0; i < cleaned.length; i++) {
|
||||
const char = cleaned[i];
|
||||
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '\\') {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (char === '"' && !escaped) {
|
||||
inString = !inString;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!inString) {
|
||||
if (char === '{') braceCount++;
|
||||
if (char === '}') braceCount--;
|
||||
if (char === '[') bracketCount++;
|
||||
if (char === ']') bracketCount--;
|
||||
|
||||
if (braceCount === 0 && bracketCount === 0 && (char === '}' || char === ']')) {
|
||||
lastCompleteStructure = cleaned.substring(0, i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (lastCompleteStructure) {
|
||||
return lastCompleteStructure;
|
||||
} else {
|
||||
if (braceCount > 0) cleaned += '}';
|
||||
if (bracketCount > 0) cleaned += ']';
|
||||
return cleaned;
|
||||
}
|
||||
}
|
||||
|
||||
static extractToolsFromMalformedJSON(jsonString: string): { selectedTools: string[]; selectedConcepts: string[] } {
|
||||
const selectedTools: string[] = [];
|
||||
const selectedConcepts: string[] = [];
|
||||
|
||||
const toolsMatch = jsonString.match(/"selectedTools"\s*:\s*\[([\s\S]*?)\]/i);
|
||||
if (toolsMatch) {
|
||||
const toolMatches = toolsMatch[1].match(/"([^"]+)"/g);
|
||||
if (toolMatches) {
|
||||
selectedTools.push(...toolMatches.map(match => match.replace(/"/g, '')));
|
||||
}
|
||||
}
|
||||
|
||||
const conceptsMatch = jsonString.match(/"selectedConcepts"\s*:\s*\[([\s\S]*?)\]/i);
|
||||
if (conceptsMatch) {
|
||||
const conceptMatches = conceptsMatch[1].match(/"([^"]+)"/g);
|
||||
if (conceptMatches) {
|
||||
selectedConcepts.push(...conceptMatches.map(match => match.replace(/"/g, '')));
|
||||
}
|
||||
}
|
||||
|
||||
if (selectedTools.length === 0 && selectedConcepts.length === 0) {
|
||||
const allMatches = jsonString.match(/"([^"]+)"/g);
|
||||
if (allMatches) {
|
||||
const possibleNames = allMatches
|
||||
.map(match => match.replace(/"/g, ''))
|
||||
.filter(name =>
|
||||
name.length > 2 &&
|
||||
!['selectedTools', 'selectedConcepts', 'reasoning'].includes(name) &&
|
||||
!name.includes(':') &&
|
||||
!name.match(/^\d+$/)
|
||||
)
|
||||
.slice(0, 15);
|
||||
|
||||
selectedTools.push(...possibleNames);
|
||||
}
|
||||
}
|
||||
|
||||
return { selectedTools, selectedConcepts };
|
||||
}
|
||||
|
||||
static secureParseJSON(jsonString: string, maxSize: number = 10 * 1024 * 1024): any {
|
||||
if (typeof jsonString !== 'string') {
|
||||
throw new Error('Input must be a string');
|
||||
}
|
||||
|
||||
if (jsonString.length > maxSize) {
|
||||
throw new Error(`JSON string too large (${jsonString.length} bytes, max ${maxSize})`);
|
||||
}
|
||||
|
||||
const suspiciousPatterns = [
|
||||
/<script/i,
|
||||
/javascript:/i,
|
||||
/eval\(/i,
|
||||
/function\s*\(/i,
|
||||
/__proto__/i,
|
||||
/constructor/i
|
||||
];
|
||||
|
||||
for (const pattern of suspiciousPatterns) {
|
||||
if (pattern.test(jsonString)) {
|
||||
throw new Error('Potentially malicious content detected in JSON');
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(jsonString);
|
||||
|
||||
if (typeof parsed !== 'object' || parsed === null) {
|
||||
throw new Error('JSON must be an object');
|
||||
}
|
||||
|
||||
return parsed;
|
||||
} catch (error) {
|
||||
if (error instanceof SyntaxError) {
|
||||
throw new Error(`Invalid JSON syntax: ${error.message}`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
static sanitizeForAudit(obj: any, maxDepth: number = 5, currentDepth: number = 0): any {
|
||||
if (currentDepth >= maxDepth) {
|
||||
return '[Max depth reached]';
|
||||
}
|
||||
|
||||
if (obj === null || obj === undefined) {
|
||||
return obj;
|
||||
}
|
||||
|
||||
if (typeof obj === 'string') {
|
||||
if (obj.length > 500) {
|
||||
return obj.slice(0, 500) + '...[truncated]';
|
||||
}
|
||||
return obj.replace(/<script[\s\S]*?<\/script>/gi, '[script removed]');
|
||||
}
|
||||
|
||||
if (typeof obj === 'number' || typeof obj === 'boolean') {
|
||||
return obj;
|
||||
}
|
||||
|
||||
if (Array.isArray(obj)) {
|
||||
if (obj.length > 20) {
|
||||
return [
|
||||
...obj.slice(0, 20).map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1)),
|
||||
`...[${obj.length - 20} more items]`
|
||||
];
|
||||
}
|
||||
return obj.map(item => this.sanitizeForAudit(item, maxDepth, currentDepth + 1));
|
||||
}
|
||||
|
||||
if (typeof obj === 'object') {
|
||||
const keys = Object.keys(obj);
|
||||
if (keys.length > 50) {
|
||||
const sanitized: any = {};
|
||||
keys.slice(0, 50).forEach(key => {
|
||||
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
|
||||
});
|
||||
sanitized['[truncated]'] = `${keys.length - 50} more properties`;
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
const sanitized: any = {};
|
||||
keys.forEach(key => {
|
||||
if (['__proto__', 'constructor', 'prototype'].includes(key)) {
|
||||
return;
|
||||
}
|
||||
sanitized[key] = this.sanitizeForAudit(obj[key], maxDepth, currentDepth + 1);
|
||||
});
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
return String(obj);
|
||||
}
|
||||
|
||||
static validateAuditExportStructure(data: any): { isValid: boolean; errors: string[] } {
|
||||
const errors: string[] = [];
|
||||
|
||||
if (!data || typeof data !== 'object') {
|
||||
errors.push('Export data must be an object');
|
||||
return { isValid: false, errors };
|
||||
}
|
||||
|
||||
const requiredProps = ['metadata', 'recommendation', 'auditTrail'];
|
||||
for (const prop of requiredProps) {
|
||||
if (!(prop in data)) {
|
||||
errors.push(`Missing required property: ${prop}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (data.metadata && typeof data.metadata === 'object') {
|
||||
const requiredMetadataProps = ['timestamp', 'version', 'userQuery', 'mode'];
|
||||
for (const prop of requiredMetadataProps) {
|
||||
if (!(prop in data.metadata)) {
|
||||
errors.push(`Missing required metadata property: ${prop}`);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
errors.push('Invalid metadata structure');
|
||||
}
|
||||
|
||||
if (!Array.isArray(data.auditTrail)) {
|
||||
errors.push('auditTrail must be an array');
|
||||
} else {
|
||||
data.auditTrail.forEach((entry: any, index: number) => {
|
||||
if (!entry || typeof entry !== 'object') {
|
||||
errors.push(`Audit entry ${index} is not a valid object`);
|
||||
return;
|
||||
}
|
||||
|
||||
const requiredEntryProps = ['timestamp', 'phase', 'action', 'confidence', 'processingTimeMs'];
|
||||
for (const prop of requiredEntryProps) {
|
||||
if (!(prop in entry)) {
|
||||
errors.push(`Audit entry ${index} missing required property: ${prop}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
isValid: errors.length === 0,
|
||||
errors
|
||||
};
|
||||
}
|
||||
|
||||
static prepareAuditExport(
|
||||
recommendation: any,
|
||||
userQuery: string,
|
||||
mode: string,
|
||||
auditTrail: any[] = [],
|
||||
additionalMetadata: any = {}
|
||||
): any {
|
||||
return {
|
||||
metadata: {
|
||||
timestamp: new Date().toISOString(),
|
||||
version: "1.0",
|
||||
userQuery: userQuery.slice(0, 1000),
|
||||
mode,
|
||||
exportedBy: 'ForensicPathways',
|
||||
toolsDataHash: additionalMetadata.toolsDataHash || 'unknown',
|
||||
aiModel: additionalMetadata.aiModel || 'unknown',
|
||||
aiParameters: additionalMetadata.aiParameters || {},
|
||||
processingStats: additionalMetadata.processingStats || {}
|
||||
},
|
||||
recommendation: this.sanitizeForAudit(recommendation, 6),
|
||||
auditTrail: auditTrail.map(entry => this.sanitizeForAudit(entry, 4)),
|
||||
rawContext: {
|
||||
selectedTools: additionalMetadata.selectedTools || [],
|
||||
backgroundKnowledge: additionalMetadata.backgroundKnowledge || [],
|
||||
contextHistory: additionalMetadata.contextHistory || [],
|
||||
embeddingsSimilarities: additionalMetadata.embeddingsSimilarities || {}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static validateUploadedAnalysis(data: any): { isValid: boolean; issues: string[]; warnings: string[] } {
|
||||
const issues: string[] = [];
|
||||
const warnings: string[] = [];
|
||||
|
||||
const structureValidation = this.validateAuditExportStructure(data);
|
||||
if (!structureValidation.isValid) {
|
||||
issues.push(...structureValidation.errors);
|
||||
return { isValid: false, issues, warnings };
|
||||
}
|
||||
|
||||
if (data.metadata) {
|
||||
const timestamp = new Date(data.metadata.timestamp);
|
||||
if (isNaN(timestamp.getTime())) {
|
||||
warnings.push('Invalid timestamp in metadata');
|
||||
} else {
|
||||
const age = Date.now() - timestamp.getTime();
|
||||
const maxAge = 30 * 24 * 60 * 60 * 1000; // 30 days
|
||||
if (age > maxAge) {
|
||||
warnings.push(`Analysis is ${Math.floor(age / (24 * 60 * 60 * 1000))} days old`);
|
||||
}
|
||||
}
|
||||
|
||||
if (!['workflow', 'tool'].includes(data.metadata.mode)) {
|
||||
warnings.push(`Unknown analysis mode: ${data.metadata.mode}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (Array.isArray(data.auditTrail)) {
|
||||
const aiDecisions = data.auditTrail.filter(e => e.action === 'ai-decision').length;
|
||||
const toolSelections = data.auditTrail.filter(e => e.action === 'selection-decision').length;
|
||||
|
||||
if (aiDecisions === 0) {
|
||||
warnings.push('No AI decisions found in audit trail');
|
||||
}
|
||||
|
||||
if (toolSelections === 0) {
|
||||
warnings.push('No tool selections found in audit trail');
|
||||
}
|
||||
|
||||
const entriesWithConfidence = data.auditTrail.filter(e => typeof e.confidence === 'number').length;
|
||||
const confidenceRatio = entriesWithConfidence / data.auditTrail.length;
|
||||
|
||||
if (confidenceRatio < 0.8) {
|
||||
warnings.push(`Only ${Math.round(confidenceRatio * 100)}% of audit entries have confidence scores`);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
isValid: issues.length === 0,
|
||||
issues,
|
||||
warnings
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
// src/utils/nextcloud.ts
|
||||
import { promises as fs } from 'fs';
|
||||
import path from 'path';
|
||||
import crypto from 'crypto';
|
||||
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
// src/utils/rateLimitedQueue.ts
|
||||
|
||||
import dotenv from "dotenv";
|
||||
|
||||
dotenv.config();
|
||||
|
||||
83
src/utils/remarkVideoPlugin.ts
Normal file
83
src/utils/remarkVideoPlugin.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
// src/utils/remarkVideoPlugin.ts
|
||||
import { visit } from 'unist-util-visit';
|
||||
import type { Plugin } from 'unified';
|
||||
import type { Root } from 'hast';
|
||||
|
||||
function escapeHtml(unsafe: string): string {
|
||||
if (typeof unsafe !== 'string') return '';
|
||||
|
||||
return unsafe
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
export const remarkVideoPlugin: Plugin<[], Root> = () => {
|
||||
return (tree: Root) => {
|
||||
visit(tree, 'html', (node: any, index: number | undefined, parent: any) => {
|
||||
if (node.value && node.value.includes('<video') && typeof index === 'number') {
|
||||
const srcMatch = node.value.match(/src=["']([^"']+)["']/);
|
||||
const titleMatch = node.value.match(/title=["']([^"']+)["']/);
|
||||
|
||||
if (srcMatch) {
|
||||
const originalSrc = srcMatch[1];
|
||||
const title = titleMatch?.[1] || 'Video';
|
||||
|
||||
const hasControls = node.value.includes('controls');
|
||||
const hasAutoplay = node.value.includes('autoplay');
|
||||
const hasMuted = node.value.includes('muted');
|
||||
const hasLoop = node.value.includes('loop');
|
||||
const preloadMatch = node.value.match(/preload=["']([^"']+)["']/);
|
||||
|
||||
const enhancedHTML = `
|
||||
<div class="video-container">
|
||||
<video
|
||||
src="${escapeHtml(originalSrc)}"
|
||||
${hasControls ? 'controls' : ''}
|
||||
${hasAutoplay ? 'autoplay' : ''}
|
||||
${hasMuted ? 'muted' : ''}
|
||||
${hasLoop ? 'loop' : ''}
|
||||
${preloadMatch ? `preload="${preloadMatch[1]}"` : 'preload="metadata"'}
|
||||
data-video-title="${escapeHtml(title)}"
|
||||
>
|
||||
<p>Your browser does not support the video element.</p>
|
||||
</video>
|
||||
${title !== 'Video' ? `
|
||||
<div class="video-metadata">
|
||||
<div class="video-title">${escapeHtml(title)}</div>
|
||||
</div>
|
||||
` : ''}
|
||||
</div>
|
||||
`.trim();
|
||||
|
||||
parent.children[index] = { type: 'html', value: enhancedHTML };
|
||||
console.log(`[VIDEO] Enhanced: ${title} (${originalSrc})`);
|
||||
}
|
||||
}
|
||||
|
||||
if (node.value && node.value.includes('<iframe') && typeof index === 'number' && parent) {
|
||||
|
||||
if (node.value.includes('video-container')) {
|
||||
return;
|
||||
}
|
||||
|
||||
const titleMatch = node.value.match(/title=["']([^"']+)["']/);
|
||||
const title = titleMatch?.[1] || 'Embedded Video';
|
||||
|
||||
const enhancedHTML = `
|
||||
<div class="video-container">
|
||||
${node.value}
|
||||
</div>
|
||||
<div class="video-metadata">
|
||||
<div class="video-title">${escapeHtml(title)}</div>
|
||||
</div>
|
||||
`.trim();
|
||||
|
||||
parent.children[index] = { type: 'html', value: enhancedHTML };
|
||||
console.log(`[VIDEO] Enhanced iframe: ${title}`);
|
||||
}
|
||||
});
|
||||
};
|
||||
};
|
||||
@@ -1,43 +0,0 @@
|
||||
export interface Tool {
|
||||
name: string;
|
||||
type?: 'software' | 'method' | 'concept';
|
||||
projectUrl?: string | null;
|
||||
license?: string;
|
||||
knowledgebase?: boolean;
|
||||
domains?: string[];
|
||||
phases?: string[];
|
||||
platforms?: string[];
|
||||
skillLevel?: string;
|
||||
description?: string;
|
||||
tags?: string[];
|
||||
related_concepts?: string[];
|
||||
}
|
||||
|
||||
export function createToolSlug(toolName: string): string {
|
||||
if (!toolName || typeof toolName !== 'string') {
|
||||
console.warn('[toolHelpers] Invalid toolName provided to createToolSlug:', toolName);
|
||||
return '';
|
||||
}
|
||||
|
||||
return toolName.toLowerCase()
|
||||
.replace(/[^a-z0-9\s-]/g, '') // Remove special characters
|
||||
.replace(/\s+/g, '-') // Replace spaces with hyphens
|
||||
.replace(/-+/g, '-') // Remove duplicate hyphens
|
||||
.replace(/^-|-$/g, ''); // Remove leading/trailing hyphens
|
||||
}
|
||||
|
||||
export function findToolByIdentifier(tools: Tool[], identifier: string): Tool | undefined {
|
||||
if (!identifier || !Array.isArray(tools)) return undefined;
|
||||
|
||||
return tools.find(tool =>
|
||||
tool.name === identifier ||
|
||||
createToolSlug(tool.name) === identifier.toLowerCase()
|
||||
);
|
||||
}
|
||||
|
||||
export function isToolHosted(tool: Tool): boolean {
|
||||
return tool.projectUrl !== undefined &&
|
||||
tool.projectUrl !== null &&
|
||||
tool.projectUrl !== "" &&
|
||||
tool.projectUrl.trim() !== "";
|
||||
}
|
||||
372
src/utils/toolSelector.ts
Normal file
372
src/utils/toolSelector.ts
Normal file
@@ -0,0 +1,372 @@
|
||||
// src/utils/toolSelector.ts
|
||||
import { aiService } from './aiService.js';
|
||||
import { embeddingsService, type SimilarityResult } from './embeddings.js';
|
||||
import { confidenceScoring } from './confidenceScoring.js';
|
||||
import { JSONParser } from './jsonUtils.js';
|
||||
import { getPrompt } from '../config/prompts.js';
|
||||
import 'dotenv/config';
|
||||
|
||||
export interface ToolSelectionConfig {
|
||||
maxSelectedItems: number;
|
||||
embeddingCandidates: number;
|
||||
similarityThreshold: number;
|
||||
embeddingSelectionLimit: number;
|
||||
embeddingConceptsLimit: number;
|
||||
embeddingsMinTools: number;
|
||||
embeddingsMaxReductionRatio: number;
|
||||
methodSelectionRatio: number;
|
||||
softwareSelectionRatio: number;
|
||||
}
|
||||
|
||||
|
||||
export interface SelectionContext {
|
||||
userQuery: string;
|
||||
mode: string;
|
||||
embeddingsSimilarities: Map<string, number>;
|
||||
seenToolNames: Set<string>;
|
||||
selectedTools?: Array<{
|
||||
tool: any;
|
||||
phase: string;
|
||||
priority: string;
|
||||
justification?: string;
|
||||
taskRelevance?: number;
|
||||
limitations?: string[];
|
||||
}>;
|
||||
}
|
||||
|
||||
export interface ToolSelectionResult {
|
||||
selectedTools: any[];
|
||||
selectedConcepts: any[];
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
class ToolSelector {
|
||||
private config: ToolSelectionConfig;
|
||||
|
||||
constructor() {
|
||||
this.config = {
|
||||
maxSelectedItems: this.getEnvInt('AI_MAX_SELECTED_ITEMS', 25),
|
||||
embeddingCandidates: this.getEnvInt('AI_EMBEDDING_CANDIDATES', 50),
|
||||
similarityThreshold: this.getEnvFloat('AI_SIMILARITY_THRESHOLD', 0.3),
|
||||
embeddingSelectionLimit: this.getEnvInt('AI_EMBEDDING_SELECTION_LIMIT', 30),
|
||||
embeddingConceptsLimit: this.getEnvInt('AI_EMBEDDING_CONCEPTS_LIMIT', 15),
|
||||
embeddingsMinTools: this.getEnvInt('AI_EMBEDDINGS_MIN_TOOLS', 8),
|
||||
embeddingsMaxReductionRatio: this.getEnvFloat('AI_EMBEDDINGS_MAX_REDUCTION_RATIO', 0.75),
|
||||
methodSelectionRatio: this.getEnvFloat('AI_METHOD_SELECTION_RATIO', 0.4),
|
||||
softwareSelectionRatio: this.getEnvFloat('AI_SOFTWARE_SELECTION_RATIO', 0.5),
|
||||
};
|
||||
console.log('[TOOL-SELECTOR] Initialized with config:', this.config);
|
||||
}
|
||||
|
||||
private getEnvInt(key: string, defaultValue: number): number {
|
||||
const value = process.env[key];
|
||||
return value ? parseInt(value, 10) : defaultValue;
|
||||
}
|
||||
|
||||
private getEnvFloat(key: string, defaultValue: number): number {
|
||||
const value = process.env[key];
|
||||
return value ? parseFloat(value) : defaultValue;
|
||||
}
|
||||
|
||||
async getIntelligentCandidates(
|
||||
userQuery: string,
|
||||
toolsData: any,
|
||||
mode: string,
|
||||
context: SelectionContext
|
||||
): Promise<{
|
||||
tools: any[];
|
||||
concepts: any[];
|
||||
domains: any[];
|
||||
phases: any[];
|
||||
'domain-agnostic-software': any[];
|
||||
}> {
|
||||
console.log('[TOOL-SELECTOR] Getting intelligent candidates for query');
|
||||
|
||||
let candidateTools: any[] = [];
|
||||
let candidateConcepts: any[] = [];
|
||||
|
||||
context.embeddingsSimilarities.clear();
|
||||
|
||||
try {
|
||||
await embeddingsService.waitForInitialization();
|
||||
} catch (error) {
|
||||
console.error('[TOOL-SELECTOR] Embeddings initialization failed:', error);
|
||||
}
|
||||
|
||||
console.log('[TOOL-SELECTOR] Using embeddings for candidate selection');
|
||||
|
||||
const embeddingsSearchStart = Date.now();
|
||||
|
||||
const similarItems = await embeddingsService.findSimilar(
|
||||
userQuery,
|
||||
this.config.embeddingCandidates,
|
||||
this.config.similarityThreshold
|
||||
) as SimilarityResult[];
|
||||
|
||||
console.log('[TOOL-SELECTOR] Embeddings found', similarItems.length, 'similar items');
|
||||
|
||||
const { auditService } = await import('./auditService.js');
|
||||
const { getDataVersion } = await import('./dataService.js');
|
||||
|
||||
const toolsDataHash = getDataVersion() || 'unknown';
|
||||
|
||||
auditService.addEmbeddingsSearch(
|
||||
userQuery,
|
||||
similarItems,
|
||||
this.config.similarityThreshold,
|
||||
embeddingsSearchStart,
|
||||
{
|
||||
toolsDataHash: toolsDataHash,
|
||||
selectionPhase: 'initial-candidate-selection',
|
||||
candidateLimit: this.config.embeddingCandidates,
|
||||
mode: mode,
|
||||
reasoning: `Initiale semantische Suche für ${mode}-Modus - Reduzierung der ${toolsData.tools.length} verfügbaren Tools auf ${similarItems.length} relevante Kandidaten`
|
||||
}
|
||||
);
|
||||
|
||||
similarItems.forEach(item => {
|
||||
context.embeddingsSimilarities.set(item.name, item.similarity);
|
||||
});
|
||||
|
||||
const toolsMap = new Map(toolsData.tools.map((tool: any) => [tool.name, tool]));
|
||||
const conceptsMap = new Map(toolsData.concepts.map((concept: any) => [concept.name, concept]));
|
||||
|
||||
const similarTools = similarItems
|
||||
.filter((item: any) => item.type === 'tool')
|
||||
.map((item: any) => toolsMap.get(item.name))
|
||||
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
|
||||
|
||||
const similarConcepts = similarItems
|
||||
.filter((item: any) => item.type === 'concept')
|
||||
.map((item: any) => conceptsMap.get(item.name))
|
||||
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
|
||||
|
||||
const totalAvailableTools = toolsData.tools.length;
|
||||
const reductionRatio = similarTools.length / totalAvailableTools;
|
||||
|
||||
if (similarTools.length >= this.config.embeddingsMinTools && reductionRatio <= this.config.embeddingsMaxReductionRatio) {
|
||||
candidateTools = similarTools;
|
||||
candidateConcepts = similarConcepts;
|
||||
|
||||
console.log('[TOOL-SELECTOR] Using embeddings filtering:', totalAvailableTools, '→', similarTools.length, 'tools');
|
||||
} else {
|
||||
console.log('[TOOL-SELECTOR] Embeddings filtering insufficient, using full dataset');
|
||||
candidateTools = toolsData.tools;
|
||||
candidateConcepts = toolsData.concepts;
|
||||
}
|
||||
|
||||
const selection = await this.performAISelection(
|
||||
userQuery,
|
||||
candidateTools,
|
||||
candidateConcepts,
|
||||
mode,
|
||||
context
|
||||
);
|
||||
|
||||
return {
|
||||
tools: selection.selectedTools,
|
||||
concepts: selection.selectedConcepts,
|
||||
domains: toolsData.domains,
|
||||
phases: toolsData.phases,
|
||||
'domain-agnostic-software': toolsData['domain-agnostic-software']
|
||||
};
|
||||
}
|
||||
|
||||
private async performAISelection(
|
||||
userQuery: string,
|
||||
candidateTools: any[],
|
||||
candidateConcepts: any[],
|
||||
mode: string,
|
||||
context: SelectionContext
|
||||
): Promise<ToolSelectionResult> {
|
||||
console.log('[TOOL-SELECTOR] Performing AI selection');
|
||||
|
||||
const candidateMethods = candidateTools.filter((t: any) => t && t.type === 'method');
|
||||
const candidateSoftware = candidateTools.filter((t: any) => t && t.type === 'software');
|
||||
|
||||
console.log('[TOOL-SELECTOR] Candidates:',
|
||||
candidateMethods.length, 'methods,',
|
||||
candidateSoftware.length, 'software,',
|
||||
candidateConcepts.length, 'concepts'
|
||||
);
|
||||
|
||||
const methodsWithFullData = candidateMethods.map(this.createToolData);
|
||||
const softwareWithFullData = candidateSoftware.map(this.createToolData);
|
||||
const conceptsWithFullData = candidateConcepts.map(this.createConceptData);
|
||||
|
||||
const maxTools = Math.min(this.config.embeddingSelectionLimit, candidateTools.length);
|
||||
const maxConcepts = Math.min(this.config.embeddingConceptsLimit, candidateConcepts.length);
|
||||
|
||||
const methodRatio = Math.max(0, Math.min(1, this.config.methodSelectionRatio));
|
||||
const softwareRatio = Math.max(0, Math.min(1, this.config.softwareSelectionRatio));
|
||||
|
||||
let methodLimit = Math.round(maxTools * methodRatio);
|
||||
let softwareLimit = Math.round(maxTools * softwareRatio);
|
||||
|
||||
if (methodLimit + softwareLimit > maxTools) {
|
||||
const scale = maxTools / (methodLimit + softwareLimit);
|
||||
methodLimit = Math.floor(methodLimit * scale);
|
||||
softwareLimit = Math.floor(softwareLimit * scale);
|
||||
}
|
||||
|
||||
const methodsPrimary = methodsWithFullData.slice(0, methodLimit);
|
||||
const softwarePrimary = softwareWithFullData.slice(0, softwareLimit);
|
||||
|
||||
const toolsToSend: any[] = [...methodsPrimary, ...softwarePrimary];
|
||||
|
||||
let mIdx = methodsPrimary.length;
|
||||
let sIdx = softwarePrimary.length;
|
||||
|
||||
while (toolsToSend.length < maxTools && (mIdx < methodsWithFullData.length || sIdx < softwareWithFullData.length)) {
|
||||
const remM = methodsWithFullData.length - mIdx;
|
||||
const remS = softwareWithFullData.length - sIdx;
|
||||
|
||||
if (remS >= remM && sIdx < softwareWithFullData.length) {
|
||||
toolsToSend.push(softwareWithFullData[sIdx++]);
|
||||
} else if (mIdx < methodsWithFullData.length) {
|
||||
toolsToSend.push(methodsWithFullData[mIdx++]);
|
||||
} else if (sIdx < softwareWithFullData.length) {
|
||||
toolsToSend.push(softwareWithFullData[sIdx++]);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const conceptsToSend = conceptsWithFullData.slice(0, maxConcepts);
|
||||
|
||||
console.log('[TOOL-SELECTOR-DEBUG] maxTools:', maxTools, 'maxConcepts:', maxConcepts);
|
||||
console.log('[TOOL-SELECTOR] Sending to AI:',
|
||||
toolsToSend.filter((t: any) => t.type === 'method').length, 'methods,',
|
||||
toolsToSend.filter((t: any) => t.type === 'software').length, 'software,',
|
||||
conceptsToSend.length, 'concepts'
|
||||
);
|
||||
|
||||
const basePrompt = getPrompt('toolSelection', mode, userQuery, this.config.maxSelectedItems);
|
||||
const prompt = getPrompt('toolSelectionWithData', basePrompt, toolsToSend, conceptsToSend);
|
||||
|
||||
try {
|
||||
const response = await aiService.callAI(prompt);
|
||||
const result = JSONParser.safeParseJSON(response.content, null);
|
||||
|
||||
if (!result || !Array.isArray(result.selectedTools) || !Array.isArray(result.selectedConcepts)) {
|
||||
console.error('[TOOL-SELECTOR] AI selection returned invalid structure');
|
||||
throw new Error('AI selection failed to return valid tool and concept selection');
|
||||
}
|
||||
|
||||
const totalSelected = result.selectedTools.length + result.selectedConcepts.length;
|
||||
if (totalSelected === 0) {
|
||||
throw new Error('AI selection returned empty selection');
|
||||
}
|
||||
|
||||
const toolsMap = new Map(candidateTools.map((tool: any) => [tool.name, tool]));
|
||||
const conceptsMap = new Map(candidateConcepts.map((concept: any) => [concept.name, concept]));
|
||||
|
||||
const selectedTools = result.selectedTools
|
||||
.map((name: string) => toolsMap.get(name))
|
||||
.filter((tool: any): tool is NonNullable<any> => tool !== undefined && tool !== null);
|
||||
|
||||
const selectedConcepts = result.selectedConcepts
|
||||
.map((name: string) => conceptsMap.get(name))
|
||||
.filter((concept: any): concept is NonNullable<any> => concept !== undefined && concept !== null);
|
||||
|
||||
const selectedMethods = selectedTools.filter((t: any) => t && t.type === 'method');
|
||||
const selectedSoftware = selectedTools.filter((t: any) => t && t.type === 'software');
|
||||
|
||||
console.log('[TOOL-SELECTOR] AI selected:',
|
||||
selectedMethods.length, 'methods,',
|
||||
selectedSoftware.length, 'software,',
|
||||
selectedConcepts.length, 'concepts'
|
||||
);
|
||||
|
||||
const confidence = confidenceScoring.calculateSelectionConfidence(
|
||||
result,
|
||||
candidateTools.length + candidateConcepts.length
|
||||
);
|
||||
|
||||
return { selectedTools, selectedConcepts, confidence };
|
||||
} catch (error) {
|
||||
console.error('[TOOL-SELECTOR] AI selection failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async selectToolsForPhase(
|
||||
userQuery: string,
|
||||
phase: any,
|
||||
availableTools: any[],
|
||||
context: SelectionContext
|
||||
): Promise<Array<{
|
||||
toolName: string;
|
||||
taskRelevance: number;
|
||||
justification: string;
|
||||
limitations: string[];
|
||||
}>> {
|
||||
console.log('[TOOL-SELECTOR] Selecting tools for phase:', phase.id);
|
||||
|
||||
if (availableTools.length === 0) {
|
||||
console.log('[TOOL-SELECTOR] No tools available for phase:', phase.id);
|
||||
return [];
|
||||
}
|
||||
|
||||
const prompt = getPrompt('phaseToolSelection', userQuery, phase, availableTools);
|
||||
|
||||
try {
|
||||
const response = await aiService.callMicroTaskAI(prompt);
|
||||
const selections = JSONParser.safeParseJSON(response.content, []);
|
||||
|
||||
if (Array.isArray(selections)) {
|
||||
const validSelections = selections.filter((sel: any) => {
|
||||
const matchingTool = availableTools.find((tool: any) => tool && tool.name === sel.toolName);
|
||||
if (!matchingTool) {
|
||||
console.warn('[TOOL-SELECTOR] Invalid tool selection for phase:', phase.id, sel.toolName);
|
||||
}
|
||||
return !!matchingTool;
|
||||
});
|
||||
|
||||
console.log('[TOOL-SELECTOR] Valid selections for phase:', phase.id, validSelections.length);
|
||||
return validSelections;
|
||||
}
|
||||
|
||||
return [];
|
||||
|
||||
} catch (error) {
|
||||
console.error('[TOOL-SELECTOR] Phase tool selection failed:', error);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
private createToolData = (tool: any) => ({
|
||||
name: tool.name,
|
||||
type: tool.type,
|
||||
description: tool.description,
|
||||
domains: tool.domains,
|
||||
phases: tool.phases,
|
||||
platforms: tool.platforms || [],
|
||||
tags: tool.tags || [],
|
||||
skillLevel: tool.skillLevel,
|
||||
license: tool.license,
|
||||
accessType: tool.accessType,
|
||||
projectUrl: tool.projectUrl,
|
||||
knowledgebase: tool.knowledgebase,
|
||||
related_concepts: tool.related_concepts || [],
|
||||
related_software: tool.related_software || []
|
||||
});
|
||||
|
||||
private createConceptData = (concept: any) => ({
|
||||
name: concept.name,
|
||||
type: 'concept',
|
||||
description: concept.description,
|
||||
domains: concept.domains,
|
||||
phases: concept.phases,
|
||||
tags: concept.tags || [],
|
||||
skillLevel: concept.skillLevel,
|
||||
related_concepts: concept.related_concepts || [],
|
||||
related_software: concept.related_software || []
|
||||
});
|
||||
|
||||
getConfig(): ToolSelectionConfig {
|
||||
return { ...this.config };
|
||||
}
|
||||
}
|
||||
|
||||
export const toolSelector = new ToolSelector();
|
||||
Reference in New Issue
Block a user