forensic-pathways/src/utils/dataService.ts
2025-08-04 20:35:21 +02:00

239 lines
7.7 KiB
TypeScript

// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
import { z } from 'zod';
const ToolSchema = z.object({
name: z.string(),
icon: z.string().optional().nullable(),
type: z.enum(['software', 'method', 'concept']),
description: z.string(),
domains: z.array(z.string()).optional().nullable().default([]),
phases: z.array(z.string()).optional().nullable().default([]),
platforms: z.array(z.string()).default([]),
skillLevel: z.string(),
url: z.string(),
license: z.string().optional().nullable(),
tags: z.array(z.string()).default([]),
projectUrl: z.string().optional().nullable(),
knowledgebase: z.boolean().optional().nullable(),
statusUrl: z.string().optional().nullable(),
accessType: z.string().optional().nullable(),
'domain-agnostic-software': z.array(z.string()).optional().nullable(),
related_concepts: z.array(z.string()).optional().nullable().default([]),
related_software: z.array(z.string()).optional().nullable().default([]),
});
const ToolsDataSchema = z.object({
tools: z.array(ToolSchema),
domains: z.array(z.object({
id: z.string(),
name: z.string(),
description: z.string().optional()
})),
phases: z.array(z.object({
id: z.string(),
name: z.string(),
description: z.string().optional(),
typical_tools: z.array(z.string()).optional().default([]),
key_activities: z.array(z.string()).optional().default([])
})),
'domain-agnostic-software': z.array(z.object({
id: z.string(),
name: z.string(),
description: z.string().optional(),
use_cases: z.array(z.string()).optional().default([])
})).optional().default([]),
scenarios: z.array(z.object({
id: z.string(),
icon: z.string(),
friendly_name: z.string(),
description: z.string().optional(),
typical_phases: z.array(z.string()).optional().default([]),
complexity: z.enum(['low', 'medium', 'high']).optional()
})).optional().default([]),
skill_levels: z.object({
novice: z.string().optional(),
beginner: z.string().optional(),
intermediate: z.string().optional(),
advanced: z.string().optional(),
expert: z.string().optional()
}).optional().default({})
});
interface ToolsData {
tools: any[];
domains: any[];
phases: any[];
'domain-agnostic-software': any[];
scenarios: any[];
skill_levels?: any;
}
interface EnhancedCompressedToolsData {
tools: any[];
concepts: any[];
domains: any[];
phases: any[];
'domain-agnostic-software': any[];
scenarios?: any[];
skill_levels: any;
}
let cachedData: ToolsData | null = null;
let cachedRandomizedData: ToolsData | null = null;
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
let lastRandomizationDate: string | null = null;
let dataVersion: string | null = null;
function seededRandom(seed: number): () => number {
let x = Math.sin(seed) * 10000;
return function() {
x = Math.sin(x) * 10000;
return x - Math.floor(x);
};
}
function getDailySeed(): number {
const today = new Date().toDateString();
const processStart = process.uptime();
return today.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0) + Math.floor(processStart);
}
function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
const shuffled = [...array];
for (let i = shuffled.length - 1; i > 0; i--) {
const j = Math.floor(randomFn() * (i + 1));
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
}
return shuffled;
}
function generateDataVersion(data: any): string {
const str = JSON.stringify(data, Object.keys(data).sort());
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash;
}
return Math.abs(hash).toString(36);
}
async function loadRawData(): Promise<ToolsData> {
if (!cachedData) {
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
const yamlContent = await fs.readFile(yamlPath, 'utf8');
const rawData = load(yamlContent);
try {
cachedData = ToolsDataSchema.parse(rawData);
if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
cachedData.skill_levels = {
novice: "Minimal technical background required, guided interfaces",
beginner: "Basic IT knowledge, some command-line familiarity helpful",
intermediate: "Solid technical foundation, comfortable with various tools",
advanced: "Extensive experience, deep technical understanding required",
expert: "Specialist knowledge, cutting-edge techniques and complex scenarios"
};
}
dataVersion = generateDataVersion(cachedData);
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
} catch (error) {
console.error('YAML validation failed:', error);
throw new Error('Invalid tools.yaml structure');
}
}
return cachedData;
}
export async function getToolsData(): Promise<ToolsData> {
const today = new Date().toDateString();
if (!cachedRandomizedData || lastRandomizationDate !== today) {
const rawData = await loadRawData();
const seed = getDailySeed();
const randomFn = seededRandom(seed);
const randomizedTools = shuffleArray(rawData.tools, randomFn);
cachedRandomizedData = {
...rawData,
tools: randomizedTools
};
lastRandomizationDate = today;
cachedCompressedData = null;
}
return cachedRandomizedData;
}
export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedToolsData> {
if (!cachedCompressedData) {
const data = await getToolsData();
const compressedTools = data.tools
.filter(tool => tool.type !== 'concept')
.map(tool => {
const { projectUrl, statusUrl, ...compressedTool } = tool;
return {
...compressedTool,
is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
is_open_source: tool.license && tool.license !== 'Proprietary',
complexity_score: tool.skillLevel === 'expert' ? 5 :
tool.skillLevel === 'advanced' ? 4 :
tool.skillLevel === 'intermediate' ? 3 :
tool.skillLevel === 'beginner' ? 2 : 1,
phase_suitability: tool.phases?.map(phase => ({
phase,
primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
})) || []
};
});
const concepts = data.tools
.filter(tool => tool.type === 'concept')
.map(concept => {
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
return {
...compressedConcept,
learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
concept.skillLevel === 'advanced' ? 'high' :
concept.skillLevel === 'intermediate' ? 'medium' :
'low'
};
});
cachedCompressedData = {
tools: compressedTools,
concepts: concepts,
domains: data.domains,
phases: data.phases,
'domain-agnostic-software': data['domain-agnostic-software'],
scenarios: data.scenarios,
skill_levels: data.skill_levels || {},
};
}
return cachedCompressedData;
}
export function getDataVersion(): string | null {
return dataVersion;
}
export function clearCache(): void {
cachedData = null;
cachedRandomizedData = null;
cachedCompressedData = null;
lastRandomizationDate = null;
dataVersion = null;
console.log('[DATA SERVICE] Enhanced cache cleared');
}