// src/utils/dataService.ts - Enhanced for micro-task AI pipeline import { promises as fs } from 'fs'; import { load } from 'js-yaml'; import path from 'path'; import { z } from 'zod'; const ToolSchema = z.object({ name: z.string(), icon: z.string().optional().nullable(), type: z.enum(['software', 'method', 'concept']), description: z.string(), domains: z.array(z.string()).optional().nullable().default([]), phases: z.array(z.string()).optional().nullable().default([]), platforms: z.array(z.string()).default([]), skillLevel: z.string(), url: z.string(), license: z.string().optional().nullable(), tags: z.array(z.string()).default([]), projectUrl: z.string().optional().nullable(), knowledgebase: z.boolean().optional().nullable(), statusUrl: z.string().optional().nullable(), accessType: z.string().optional().nullable(), 'domain-agnostic-software': z.array(z.string()).optional().nullable(), related_concepts: z.array(z.string()).optional().nullable().default([]), related_software: z.array(z.string()).optional().nullable().default([]), }); const ToolsDataSchema = z.object({ tools: z.array(ToolSchema), domains: z.array(z.object({ id: z.string(), name: z.string(), description: z.string().optional() })), phases: z.array(z.object({ id: z.string(), name: z.string(), description: z.string().optional(), typical_tools: z.array(z.string()).optional().default([]), key_activities: z.array(z.string()).optional().default([]) })), 'domain-agnostic-software': z.array(z.object({ id: z.string(), name: z.string(), description: z.string().optional(), use_cases: z.array(z.string()).optional().default([]) })).optional().default([]), scenarios: z.array(z.object({ id: z.string(), icon: z.string(), friendly_name: z.string(), description: z.string().optional(), typical_phases: z.array(z.string()).optional().default([]), complexity: z.enum(['low', 'medium', 'high']).optional() })).optional().default([]), skill_levels: z.object({ novice: z.string().optional(), beginner: z.string().optional(), intermediate: z.string().optional(), advanced: z.string().optional(), expert: z.string().optional() }).optional().default({}) }); interface ToolsData { tools: any[]; domains: any[]; phases: any[]; 'domain-agnostic-software': any[]; scenarios: any[]; skill_levels?: any; } interface EnhancedCompressedToolsData { tools: any[]; concepts: any[]; domains: any[]; phases: any[]; 'domain-agnostic-software': any[]; scenarios?: any[]; // Optional for AI processing skill_levels: any; // Enhanced context for micro-tasks domain_relationships: DomainRelationship[]; phase_dependencies: PhaseDependency[]; tool_compatibility_matrix: CompatibilityMatrix[]; } interface DomainRelationship { domain_id: string; tool_count: number; common_tags: string[]; skill_distribution: Record; } interface PhaseDependency { phase_id: string; order: number; depends_on: string | null; enables: string | null; is_parallel_capable: boolean; typical_duration: string; } interface CompatibilityMatrix { type: string; groups: Record; } let cachedData: ToolsData | null = null; let cachedRandomizedData: ToolsData | null = null; let cachedCompressedData: EnhancedCompressedToolsData | null = null; let lastRandomizationDate: string | null = null; let dataVersion: string | null = null; function seededRandom(seed: number): () => number { let x = Math.sin(seed) * 10000; return function() { x = Math.sin(x) * 10000; return x - Math.floor(x); }; } function getDailySeed(): number { const today = new Date().toDateString(); const processStart = process.uptime(); return today.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0) + Math.floor(processStart); } function shuffleArray(array: T[], randomFn: () => number): T[] { const shuffled = [...array]; for (let i = shuffled.length - 1; i > 0; i--) { const j = Math.floor(randomFn() * (i + 1)); [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]]; } return shuffled; } function generateDataVersion(data: any): string { const str = JSON.stringify(data, Object.keys(data).sort()); let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; } return Math.abs(hash).toString(36); } // Enhanced: Generate domain relationships for better AI understanding function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] { const relationships: DomainRelationship[] = []; for (const domain of domains) { const domainTools = tools.filter(tool => tool.domains && tool.domains.includes(domain.id) ); const commonTags = domainTools .flatMap(tool => tool.tags || []) .reduce((acc: any, tag: string) => { acc[tag] = (acc[tag] || 0) + 1; return acc; }, {}); const topTags = Object.entries(commonTags) .sort(([,a], [,b]) => (b as number) - (a as number)) .slice(0, 5) .map(([tag]) => tag); relationships.push({ domain_id: domain.id, tool_count: domainTools.length, common_tags: topTags, skill_distribution: domainTools.reduce((acc: any, tool: any) => { acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1; return acc; }, {}) }); } return relationships; } // Enhanced: Generate phase dependencies function generatePhaseDependencies(phases: any[]): PhaseDependency[] { const dependencies: PhaseDependency[] = []; for (let i = 0; i < phases.length; i++) { const phase = phases[i]; const nextPhase = phases[i + 1]; const prevPhase = phases[i - 1]; dependencies.push({ phase_id: phase.id, order: i + 1, depends_on: prevPhase?.id || null, enables: nextPhase?.id || null, is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel typical_duration: phase.id === 'data-collection' ? 'hours-days' : phase.id === 'examination' ? 'hours-weeks' : phase.id === 'analysis' ? 'days-weeks' : 'hours-days' }); } return dependencies; } // Enhanced: Generate tool compatibility matrix function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] { const matrix: CompatibilityMatrix[] = []; // Group tools by common characteristics const platformGroups = tools.reduce((acc: any, tool: any) => { if (tool.platforms) { tool.platforms.forEach((platform: string) => { if (!acc[platform]) acc[platform] = []; acc[platform].push(tool.name); }); } return acc; }, {}); const phaseGroups = tools.reduce((acc: any, tool: any) => { if (tool.phases) { tool.phases.forEach((phase: string) => { if (!acc[phase]) acc[phase] = []; acc[phase].push(tool.name); }); } return acc; }, {}); matrix.push({ type: 'platform_compatibility', groups: platformGroups }); matrix.push({ type: 'phase_synergy', groups: phaseGroups }); return matrix; } async function loadRawData(): Promise { if (!cachedData) { const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml'); const yamlContent = await fs.readFile(yamlPath, 'utf8'); const rawData = load(yamlContent); try { cachedData = ToolsDataSchema.parse(rawData); // Enhanced: Add default skill level descriptions if not provided if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) { cachedData.skill_levels = { novice: "Minimal technical background required, guided interfaces", beginner: "Basic IT knowledge, some command-line familiarity helpful", intermediate: "Solid technical foundation, comfortable with various tools", advanced: "Extensive experience, deep technical understanding required", expert: "Specialist knowledge, cutting-edge techniques and complex scenarios" }; } dataVersion = generateDataVersion(cachedData); console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`); } catch (error) { console.error('YAML validation failed:', error); throw new Error('Invalid tools.yaml structure'); } } return cachedData; } export async function getToolsData(): Promise { const today = new Date().toDateString(); if (!cachedRandomizedData || lastRandomizationDate !== today) { const rawData = await loadRawData(); const seed = getDailySeed(); const randomFn = seededRandom(seed); const randomizedTools = shuffleArray(rawData.tools, randomFn); cachedRandomizedData = { ...rawData, tools: randomizedTools }; lastRandomizationDate = today; cachedCompressedData = null; } return cachedRandomizedData; } export async function getCompressedToolsDataForAI(): Promise { if (!cachedCompressedData) { const data = await getToolsData(); // Enhanced: More detailed tool information for micro-tasks const compressedTools = data.tools .filter(tool => tool.type !== 'concept') .map(tool => { const { projectUrl, statusUrl, ...compressedTool } = tool; return { ...compressedTool, // Enhanced: Add computed fields for AI is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "", is_open_source: tool.license && tool.license !== 'Proprietary', complexity_score: tool.skillLevel === 'expert' ? 5 : tool.skillLevel === 'advanced' ? 4 : tool.skillLevel === 'intermediate' ? 3 : tool.skillLevel === 'beginner' ? 2 : 1, // Enhanced: Phase-specific suitability hints phase_suitability: tool.phases?.map(phase => ({ phase, primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary' })) || [] }; }); const concepts = data.tools .filter(tool => tool.type === 'concept') .map(concept => { const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept; return { ...compressedConcept, // Enhanced: Learning difficulty indicator learning_complexity: concept.skillLevel === 'expert' ? 'very_high' : concept.skillLevel === 'advanced' ? 'high' : concept.skillLevel === 'intermediate' ? 'medium' : 'low' }; }); // Enhanced: Add rich context data const domainRelationships = generateDomainRelationships(data.domains, compressedTools); const phaseDependencies = generatePhaseDependencies(data.phases); const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools); cachedCompressedData = { tools: compressedTools, concepts: concepts, domains: data.domains, phases: data.phases, 'domain-agnostic-software': data['domain-agnostic-software'], scenarios: data.scenarios, // Include scenarios for context skill_levels: data.skill_levels || {}, // Enhanced context for micro-tasks domain_relationships: domainRelationships, phase_dependencies: phaseDependencies, tool_compatibility_matrix: toolCompatibilityMatrix }; console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`); console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`); } return cachedCompressedData; } export function getDataVersion(): string | null { return dataVersion; } export function clearCache(): void { cachedData = null; cachedRandomizedData = null; cachedCompressedData = null; lastRandomizationDate = null; dataVersion = null; console.log('[DATA SERVICE] Enhanced cache cleared'); }