forensic-pathways/src/utils/dataService.ts
2025-07-20 22:59:08 +02:00

171 lines
5.3 KiB
TypeScript

import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
import { z } from 'zod';
const ToolSchema = z.object({
name: z.string(),
icon: z.string().optional().nullable(),
type: z.enum(['software', 'method', 'concept']), // Make this more explicit
description: z.string(),
domains: z.array(z.string()).optional().nullable().default([]),
phases: z.array(z.string()).optional().nullable().default([]),
platforms: z.array(z.string()).default([]),
skillLevel: z.string(),
url: z.string(),
license: z.string().optional().nullable(),
tags: z.array(z.string()).default([]),
// Optional fields that can be null, undefined, or empty
projectUrl: z.string().optional().nullable(),
knowledgebase: z.boolean().optional().nullable(),
statusUrl: z.string().optional().nullable(),
accessType: z.string().optional().nullable(),
'domain-agnostic-software': z.array(z.string()).optional().nullable(),
related_concepts: z.array(z.string()).optional().nullable().default([]),
});
const ToolsDataSchema = z.object({
tools: z.array(ToolSchema),
domains: z.array(z.object({
id: z.string(),
name: z.string()
})),
phases: z.array(z.object({
id: z.string(),
name: z.string(),
description: z.string().optional()
})),
'domain-agnostic-software': z.array(z.object({
id: z.string(),
name: z.string(),
description: z.string().optional()
})).optional().default([]),
});
interface ToolsData {
tools: any[];
domains: any[];
phases: any[];
'domain-agnostic-software': any[];
}
interface CompressedToolsData extends Omit<ToolsData, 'tools'> {
tools: any[];
concepts: any[]; // NEW: Add concepts for AI background knowledge
}
let cachedData: ToolsData | null = null;
let cachedRandomizedData: ToolsData | null = null;
let cachedCompressedData: CompressedToolsData | null = null;
let lastRandomizationDate: string | null = null;
// Create a seeded random number generator
function seededRandom(seed: number): () => number {
let x = Math.sin(seed) * 10000;
return function() {
x = Math.sin(x) * 10000;
return x - Math.floor(x);
};
}
// Get today's date as seed + process start time for consistency within day/session
function getDailySeed(): number {
const today = new Date().toDateString();
const processStart = process.uptime();
return today.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0) + Math.floor(processStart);
}
// Fisher-Yates shuffle with seeded random
function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
const shuffled = [...array];
for (let i = shuffled.length - 1; i > 0; i--) {
const j = Math.floor(randomFn() * (i + 1));
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
}
return shuffled;
}
// Load raw data from YAML
async function loadRawData(): Promise<ToolsData> {
if (!cachedData) {
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
const yamlContent = await fs.readFile(yamlPath, 'utf8');
const rawData = load(yamlContent);
try {
cachedData = ToolsDataSchema.parse(rawData);
} catch (error) {
console.error('YAML validation failed:', error);
throw new Error('Invalid tools.yaml structure');
}
}
return cachedData;
}
// Get tools data with randomized tool order (daily seed)
export async function getToolsData(): Promise<ToolsData> {
const today = new Date().toDateString();
// Check if we need to re-randomize (new day or first load)
if (!cachedRandomizedData || lastRandomizationDate !== today) {
const rawData = await loadRawData();
const seed = getDailySeed();
const randomFn = seededRandom(seed);
// Randomize tools array while keeping other data intact
const randomizedTools = shuffleArray(rawData.tools, randomFn);
cachedRandomizedData = {
...rawData,
tools: randomizedTools
};
lastRandomizationDate = today;
// Clear compressed cache when we re-randomize
cachedCompressedData = null;
}
return cachedRandomizedData;
}
// Get compressed data for AI (excludes concepts from tools, but includes them separately for background knowledge)
export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData> {
if (!cachedCompressedData) {
const data = await getToolsData();
// Separate tools and concepts
const compressedTools = data.tools
.filter(tool => tool.type !== 'concept') // Exclude concepts from tool recommendations
.map(tool => {
const { projectUrl, statusUrl, ...compressedTool } = tool;
return compressedTool;
});
// Extract concepts for background knowledge - keep essential fields only
const concepts = data.tools
.filter(tool => tool.type === 'concept')
.map(concept => {
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
return compressedConcept;
});
cachedCompressedData = {
tools: compressedTools,
concepts: concepts,
domains: data.domains,
phases: data.phases,
'domain-agnostic-software': data['domain-agnostic-software']
};
}
return cachedCompressedData;
}
// Force cache refresh (useful for development)
export function clearCache(): void {
cachedData = null;
cachedRandomizedData = null;
cachedCompressedData = null;
lastRandomizationDate = null;
}