// src/utils/dataService.ts - Enhanced for micro-task AI pipeline import { promises as fs } from 'fs'; import { load } from 'js-yaml'; import path from 'path'; import { z } from 'zod'; const ToolSchema = z.object({ name: z.string(), icon: z.string().optional().nullable(), type: z.enum(['software', 'method', 'concept']), description: z.string(), domains: z.array(z.string()).optional().nullable().default([]), phases: z.array(z.string()).optional().nullable().default([]), platforms: z.array(z.string()).default([]), skillLevel: z.string(), url: z.string(), license: z.string().optional().nullable(), tags: z.array(z.string()).default([]), projectUrl: z.string().optional().nullable(), knowledgebase: z.boolean().optional().nullable(), statusUrl: z.string().optional().nullable(), accessType: z.string().optional().nullable(), 'domain-agnostic-software': z.array(z.string()).optional().nullable(), related_concepts: z.array(z.string()).optional().nullable().default([]), related_software: z.array(z.string()).optional().nullable().default([]), }); const ToolsDataSchema = z.object({ tools: z.array(ToolSchema), domains: z.array(z.object({ id: z.string(), name: z.string(), description: z.string().optional() })), phases: z.array(z.object({ id: z.string(), name: z.string(), description: z.string().optional(), typical_tools: z.array(z.string()).optional().default([]), key_activities: z.array(z.string()).optional().default([]) })), 'domain-agnostic-software': z.array(z.object({ id: z.string(), name: z.string(), description: z.string().optional(), use_cases: z.array(z.string()).optional().default([]) })).optional().default([]), scenarios: z.array(z.object({ id: z.string(), icon: z.string(), friendly_name: z.string(), description: z.string().optional(), typical_phases: z.array(z.string()).optional().default([]), complexity: z.enum(['low', 'medium', 'high']).optional() })).optional().default([]), skill_levels: z.object({ novice: z.string().optional(), beginner: z.string().optional(), intermediate: z.string().optional(), advanced: z.string().optional(), expert: z.string().optional() }).optional().default({}) }); interface ToolsData { tools: any[]; domains: any[]; phases: any[]; 'domain-agnostic-software': any[]; scenarios: any[]; skill_levels?: any; } interface EnhancedCompressedToolsData { tools: any[]; concepts: any[]; domains: any[]; phases: any[]; 'domain-agnostic-software': any[]; scenarios?: any[]; skill_levels: any; } let cachedData: ToolsData | null = null; let cachedRandomizedData: ToolsData | null = null; let cachedCompressedData: EnhancedCompressedToolsData | null = null; let lastRandomizationDate: string | null = null; let dataVersion: string | null = null; function seededRandom(seed: number): () => number { let x = Math.sin(seed) * 10000; return function() { x = Math.sin(x) * 10000; return x - Math.floor(x); }; } function getDailySeed(): number { const today = new Date().toDateString(); const processStart = process.uptime(); return today.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0) + Math.floor(processStart); } function shuffleArray(array: T[], randomFn: () => number): T[] { const shuffled = [...array]; for (let i = shuffled.length - 1; i > 0; i--) { const j = Math.floor(randomFn() * (i + 1)); [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]]; } return shuffled; } function generateDataVersion(data: any): string { const str = JSON.stringify(data, Object.keys(data).sort()); let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; } return Math.abs(hash).toString(36); } async function loadRawData(): Promise { if (!cachedData) { const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml'); const yamlContent = await fs.readFile(yamlPath, 'utf8'); const rawData = load(yamlContent); try { cachedData = ToolsDataSchema.parse(rawData); if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) { cachedData.skill_levels = { novice: "Minimal technical background required, guided interfaces", beginner: "Basic IT knowledge, some command-line familiarity helpful", intermediate: "Solid technical foundation, comfortable with various tools", advanced: "Extensive experience, deep technical understanding required", expert: "Specialist knowledge, cutting-edge techniques and complex scenarios" }; } dataVersion = generateDataVersion(cachedData); console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`); } catch (error) { console.error('YAML validation failed:', error); throw new Error('Invalid tools.yaml structure'); } } return cachedData; } export async function getToolsData(): Promise { const today = new Date().toDateString(); if (!cachedRandomizedData || lastRandomizationDate !== today) { const rawData = await loadRawData(); const seed = getDailySeed(); const randomFn = seededRandom(seed); const randomizedTools = shuffleArray(rawData.tools, randomFn); cachedRandomizedData = { ...rawData, tools: randomizedTools }; lastRandomizationDate = today; cachedCompressedData = null; } return cachedRandomizedData; } export async function getCompressedToolsDataForAI(): Promise { if (!cachedCompressedData) { const data = await getToolsData(); const compressedTools = data.tools .filter(tool => tool.type !== 'concept') .map(tool => { const { projectUrl, statusUrl, ...compressedTool } = tool; return { ...compressedTool, is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "", is_open_source: tool.license && tool.license !== 'Proprietary', complexity_score: tool.skillLevel === 'expert' ? 5 : tool.skillLevel === 'advanced' ? 4 : tool.skillLevel === 'intermediate' ? 3 : tool.skillLevel === 'beginner' ? 2 : 1, phase_suitability: tool.phases?.map(phase => ({ phase, primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary' })) || [] }; }); const concepts = data.tools .filter(tool => tool.type === 'concept') .map(concept => { const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept; return { ...compressedConcept, learning_complexity: concept.skillLevel === 'expert' ? 'very_high' : concept.skillLevel === 'advanced' ? 'high' : concept.skillLevel === 'intermediate' ? 'medium' : 'low' }; }); cachedCompressedData = { tools: compressedTools, concepts: concepts, domains: data.domains, phases: data.phases, 'domain-agnostic-software': data['domain-agnostic-software'], scenarios: data.scenarios, skill_levels: data.skill_levels || {}, }; } return cachedCompressedData; } export function getDataVersion(): string | null { return dataVersion; } export function clearCache(): void { cachedData = null; cachedRandomizedData = null; cachedCompressedData = null; lastRandomizationDate = null; dataVersion = null; console.log('[DATA SERVICE] Enhanced cache cleared'); }