378 lines
12 KiB
TypeScript
378 lines
12 KiB
TypeScript
// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
|
|
import { promises as fs } from 'fs';
|
|
import { load } from 'js-yaml';
|
|
import path from 'path';
|
|
import { z } from 'zod';
|
|
|
|
const ToolSchema = z.object({
|
|
name: z.string(),
|
|
icon: z.string().optional().nullable(),
|
|
type: z.enum(['software', 'method', 'concept']),
|
|
description: z.string(),
|
|
domains: z.array(z.string()).optional().nullable().default([]),
|
|
phases: z.array(z.string()).optional().nullable().default([]),
|
|
platforms: z.array(z.string()).default([]),
|
|
skillLevel: z.string(),
|
|
url: z.string(),
|
|
license: z.string().optional().nullable(),
|
|
tags: z.array(z.string()).default([]),
|
|
projectUrl: z.string().optional().nullable(),
|
|
knowledgebase: z.boolean().optional().nullable(),
|
|
statusUrl: z.string().optional().nullable(),
|
|
accessType: z.string().optional().nullable(),
|
|
'domain-agnostic-software': z.array(z.string()).optional().nullable(),
|
|
related_concepts: z.array(z.string()).optional().nullable().default([]),
|
|
related_software: z.array(z.string()).optional().nullable().default([]),
|
|
});
|
|
|
|
const ToolsDataSchema = z.object({
|
|
tools: z.array(ToolSchema),
|
|
domains: z.array(z.object({
|
|
id: z.string(),
|
|
name: z.string(),
|
|
description: z.string().optional()
|
|
})),
|
|
phases: z.array(z.object({
|
|
id: z.string(),
|
|
name: z.string(),
|
|
description: z.string().optional(),
|
|
typical_tools: z.array(z.string()).optional().default([]),
|
|
key_activities: z.array(z.string()).optional().default([])
|
|
})),
|
|
'domain-agnostic-software': z.array(z.object({
|
|
id: z.string(),
|
|
name: z.string(),
|
|
description: z.string().optional(),
|
|
use_cases: z.array(z.string()).optional().default([])
|
|
})).optional().default([]),
|
|
scenarios: z.array(z.object({
|
|
id: z.string(),
|
|
icon: z.string(),
|
|
friendly_name: z.string(),
|
|
description: z.string().optional(),
|
|
typical_phases: z.array(z.string()).optional().default([]),
|
|
complexity: z.enum(['low', 'medium', 'high']).optional()
|
|
})).optional().default([]),
|
|
skill_levels: z.object({
|
|
novice: z.string().optional(),
|
|
beginner: z.string().optional(),
|
|
intermediate: z.string().optional(),
|
|
advanced: z.string().optional(),
|
|
expert: z.string().optional()
|
|
}).optional().default({})
|
|
});
|
|
|
|
interface ToolsData {
|
|
tools: any[];
|
|
domains: any[];
|
|
phases: any[];
|
|
'domain-agnostic-software': any[];
|
|
scenarios: any[];
|
|
skill_levels?: any;
|
|
}
|
|
|
|
interface EnhancedCompressedToolsData {
|
|
tools: any[];
|
|
concepts: any[];
|
|
domains: any[];
|
|
phases: any[];
|
|
'domain-agnostic-software': any[];
|
|
scenarios?: any[]; // Optional for AI processing
|
|
skill_levels: any;
|
|
// Enhanced context for micro-tasks
|
|
domain_relationships: DomainRelationship[];
|
|
phase_dependencies: PhaseDependency[];
|
|
tool_compatibility_matrix: CompatibilityMatrix[];
|
|
}
|
|
|
|
interface DomainRelationship {
|
|
domain_id: string;
|
|
tool_count: number;
|
|
common_tags: string[];
|
|
skill_distribution: Record<string, number>;
|
|
}
|
|
|
|
interface PhaseDependency {
|
|
phase_id: string;
|
|
order: number;
|
|
depends_on: string | null;
|
|
enables: string | null;
|
|
is_parallel_capable: boolean;
|
|
typical_duration: string;
|
|
}
|
|
|
|
interface CompatibilityMatrix {
|
|
type: string;
|
|
groups: Record<string, string[]>;
|
|
}
|
|
|
|
let cachedData: ToolsData | null = null;
|
|
let cachedRandomizedData: ToolsData | null = null;
|
|
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
|
|
let lastRandomizationDate: string | null = null;
|
|
let dataVersion: string | null = null;
|
|
|
|
function seededRandom(seed: number): () => number {
|
|
let x = Math.sin(seed) * 10000;
|
|
return function() {
|
|
x = Math.sin(x) * 10000;
|
|
return x - Math.floor(x);
|
|
};
|
|
}
|
|
|
|
function getDailySeed(): number {
|
|
const today = new Date().toDateString();
|
|
const processStart = process.uptime();
|
|
return today.split('').reduce((acc, char) => acc + char.charCodeAt(0), 0) + Math.floor(processStart);
|
|
}
|
|
|
|
function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
|
|
const shuffled = [...array];
|
|
for (let i = shuffled.length - 1; i > 0; i--) {
|
|
const j = Math.floor(randomFn() * (i + 1));
|
|
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
|
|
}
|
|
return shuffled;
|
|
}
|
|
|
|
function generateDataVersion(data: any): string {
|
|
const str = JSON.stringify(data, Object.keys(data).sort());
|
|
let hash = 0;
|
|
for (let i = 0; i < str.length; i++) {
|
|
const char = str.charCodeAt(i);
|
|
hash = ((hash << 5) - hash) + char;
|
|
hash = hash & hash;
|
|
}
|
|
return Math.abs(hash).toString(36);
|
|
}
|
|
|
|
// Enhanced: Generate domain relationships for better AI understanding
|
|
function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
|
|
const relationships: DomainRelationship[] = [];
|
|
|
|
for (const domain of domains) {
|
|
const domainTools = tools.filter(tool =>
|
|
tool.domains && tool.domains.includes(domain.id)
|
|
);
|
|
|
|
const commonTags = domainTools
|
|
.flatMap(tool => tool.tags || [])
|
|
.reduce((acc: any, tag: string) => {
|
|
acc[tag] = (acc[tag] || 0) + 1;
|
|
return acc;
|
|
}, {});
|
|
|
|
const topTags = Object.entries(commonTags)
|
|
.sort(([,a], [,b]) => (b as number) - (a as number))
|
|
.slice(0, 5)
|
|
.map(([tag]) => tag);
|
|
|
|
relationships.push({
|
|
domain_id: domain.id,
|
|
tool_count: domainTools.length,
|
|
common_tags: topTags,
|
|
skill_distribution: domainTools.reduce((acc: any, tool: any) => {
|
|
acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
|
|
return acc;
|
|
}, {})
|
|
});
|
|
}
|
|
|
|
return relationships;
|
|
}
|
|
|
|
// Enhanced: Generate phase dependencies
|
|
function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
|
|
const dependencies: PhaseDependency[] = [];
|
|
|
|
for (let i = 0; i < phases.length; i++) {
|
|
const phase = phases[i];
|
|
const nextPhase = phases[i + 1];
|
|
const prevPhase = phases[i - 1];
|
|
|
|
dependencies.push({
|
|
phase_id: phase.id,
|
|
order: i + 1,
|
|
depends_on: prevPhase?.id || null,
|
|
enables: nextPhase?.id || null,
|
|
is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
|
|
typical_duration: phase.id === 'data-collection' ? 'hours-days' :
|
|
phase.id === 'examination' ? 'hours-weeks' :
|
|
phase.id === 'analysis' ? 'days-weeks' :
|
|
'hours-days'
|
|
});
|
|
}
|
|
|
|
return dependencies;
|
|
}
|
|
|
|
// Enhanced: Generate tool compatibility matrix
|
|
function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
|
|
const matrix: CompatibilityMatrix[] = [];
|
|
|
|
// Group tools by common characteristics
|
|
const platformGroups = tools.reduce((acc: any, tool: any) => {
|
|
if (tool.platforms) {
|
|
tool.platforms.forEach((platform: string) => {
|
|
if (!acc[platform]) acc[platform] = [];
|
|
acc[platform].push(tool.name);
|
|
});
|
|
}
|
|
return acc;
|
|
}, {});
|
|
|
|
const phaseGroups = tools.reduce((acc: any, tool: any) => {
|
|
if (tool.phases) {
|
|
tool.phases.forEach((phase: string) => {
|
|
if (!acc[phase]) acc[phase] = [];
|
|
acc[phase].push(tool.name);
|
|
});
|
|
}
|
|
return acc;
|
|
}, {});
|
|
|
|
matrix.push({
|
|
type: 'platform_compatibility',
|
|
groups: platformGroups
|
|
});
|
|
|
|
matrix.push({
|
|
type: 'phase_synergy',
|
|
groups: phaseGroups
|
|
});
|
|
|
|
return matrix;
|
|
}
|
|
|
|
async function loadRawData(): Promise<ToolsData> {
|
|
if (!cachedData) {
|
|
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
|
const yamlContent = await fs.readFile(yamlPath, 'utf8');
|
|
const rawData = load(yamlContent);
|
|
|
|
try {
|
|
cachedData = ToolsDataSchema.parse(rawData);
|
|
|
|
// Enhanced: Add default skill level descriptions if not provided
|
|
if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
|
|
cachedData.skill_levels = {
|
|
novice: "Minimal technical background required, guided interfaces",
|
|
beginner: "Basic IT knowledge, some command-line familiarity helpful",
|
|
intermediate: "Solid technical foundation, comfortable with various tools",
|
|
advanced: "Extensive experience, deep technical understanding required",
|
|
expert: "Specialist knowledge, cutting-edge techniques and complex scenarios"
|
|
};
|
|
}
|
|
|
|
dataVersion = generateDataVersion(cachedData);
|
|
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
|
|
|
|
} catch (error) {
|
|
console.error('YAML validation failed:', error);
|
|
throw new Error('Invalid tools.yaml structure');
|
|
}
|
|
}
|
|
return cachedData;
|
|
}
|
|
|
|
export async function getToolsData(): Promise<ToolsData> {
|
|
const today = new Date().toDateString();
|
|
|
|
if (!cachedRandomizedData || lastRandomizationDate !== today) {
|
|
const rawData = await loadRawData();
|
|
const seed = getDailySeed();
|
|
const randomFn = seededRandom(seed);
|
|
|
|
const randomizedTools = shuffleArray(rawData.tools, randomFn);
|
|
|
|
cachedRandomizedData = {
|
|
...rawData,
|
|
tools: randomizedTools
|
|
};
|
|
|
|
lastRandomizationDate = today;
|
|
cachedCompressedData = null;
|
|
}
|
|
|
|
return cachedRandomizedData;
|
|
}
|
|
|
|
export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedToolsData> {
|
|
if (!cachedCompressedData) {
|
|
const data = await getToolsData();
|
|
|
|
// Enhanced: More detailed tool information for micro-tasks
|
|
const compressedTools = data.tools
|
|
.filter(tool => tool.type !== 'concept')
|
|
.map(tool => {
|
|
const { projectUrl, statusUrl, ...compressedTool } = tool;
|
|
return {
|
|
...compressedTool,
|
|
// Enhanced: Add computed fields for AI
|
|
is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
|
|
is_open_source: tool.license && tool.license !== 'Proprietary',
|
|
complexity_score: tool.skillLevel === 'expert' ? 5 :
|
|
tool.skillLevel === 'advanced' ? 4 :
|
|
tool.skillLevel === 'intermediate' ? 3 :
|
|
tool.skillLevel === 'beginner' ? 2 : 1,
|
|
// Enhanced: Phase-specific suitability hints
|
|
phase_suitability: tool.phases?.map(phase => ({
|
|
phase,
|
|
primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
|
|
})) || []
|
|
};
|
|
});
|
|
|
|
const concepts = data.tools
|
|
.filter(tool => tool.type === 'concept')
|
|
.map(concept => {
|
|
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
|
|
return {
|
|
...compressedConcept,
|
|
// Enhanced: Learning difficulty indicator
|
|
learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
|
|
concept.skillLevel === 'advanced' ? 'high' :
|
|
concept.skillLevel === 'intermediate' ? 'medium' :
|
|
'low'
|
|
};
|
|
});
|
|
|
|
// Enhanced: Add rich context data
|
|
const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
|
|
const phaseDependencies = generatePhaseDependencies(data.phases);
|
|
const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
|
|
|
|
cachedCompressedData = {
|
|
tools: compressedTools,
|
|
concepts: concepts,
|
|
domains: data.domains,
|
|
phases: data.phases,
|
|
'domain-agnostic-software': data['domain-agnostic-software'],
|
|
scenarios: data.scenarios, // Include scenarios for context
|
|
skill_levels: data.skill_levels || {},
|
|
// Enhanced context for micro-tasks
|
|
domain_relationships: domainRelationships,
|
|
phase_dependencies: phaseDependencies,
|
|
tool_compatibility_matrix: toolCompatibilityMatrix
|
|
};
|
|
|
|
console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
|
|
console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
|
|
}
|
|
|
|
return cachedCompressedData;
|
|
}
|
|
|
|
export function getDataVersion(): string | null {
|
|
return dataVersion;
|
|
}
|
|
|
|
export function clearCache(): void {
|
|
cachedData = null;
|
|
cachedRandomizedData = null;
|
|
cachedCompressedData = null;
|
|
lastRandomizationDate = null;
|
|
dataVersion = null;
|
|
|
|
console.log('[DATA SERVICE] Enhanced cache cleared');
|
|
} |