progress
This commit is contained in:
		
							parent
							
								
									c96aa70413
								
							
						
					
					
						commit
						680a2c311d
					
				
							
								
								
									
										37
									
								
								.env.example
									
									
									
									
									
								
							
							
						
						
									
										37
									
								
								.env.example
									
									
									
									
									
								
							@ -40,9 +40,40 @@ AI_EMBEDDINGS_BATCH_DELAY_MS=1000
 | 
			
		||||
AI_EMBEDDING_CANDIDATES=30
 | 
			
		||||
AI_SIMILARITY_THRESHOLD=0.3
 | 
			
		||||
 | 
			
		||||
# === AI Processing Configuration ===
 | 
			
		||||
AI_MAX_SELECTED_ITEMS=15
 | 
			
		||||
AI_RATE_LIMIT_DELAY_MS=2000
 | 
			
		||||
# Delay between micro-tasks to respect rate limits (milliseconds)
 | 
			
		||||
AI_MICRO_TASK_DELAY_MS=500
 | 
			
		||||
 | 
			
		||||
# Micro-task specific rate limiting (requests per minute per user)
 | 
			
		||||
AI_MICRO_TASK_RATE_LIMIT=30
 | 
			
		||||
 | 
			
		||||
# Maximum parallel micro-tasks (for future parallel processing)
 | 
			
		||||
AI_MAX_PARALLEL_TASKS=3
 | 
			
		||||
 | 
			
		||||
# Micro-task timeout settings (milliseconds)
 | 
			
		||||
AI_MICRO_TASK_TIMEOUT_MS=15000
 | 
			
		||||
 | 
			
		||||
# ENHANCED: Rate Limiting Configuration
 | 
			
		||||
# Main query rate limiting (reduced due to micro-tasks)
 | 
			
		||||
AI_RATE_LIMIT_DELAY_MS=3000
 | 
			
		||||
AI_RATE_LIMIT_MAX_REQUESTS=8
 | 
			
		||||
 | 
			
		||||
# Smart prompting rate limiting 
 | 
			
		||||
AI_SMART_PROMPTING_RATE_LIMIT=5
 | 
			
		||||
AI_SMART_PROMPTING_WINDOW_MS=60000
 | 
			
		||||
 | 
			
		||||
# Queue management settings
 | 
			
		||||
AI_QUEUE_MAX_SIZE=50
 | 
			
		||||
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
 | 
			
		||||
 | 
			
		||||
# === Performance & Monitoring ===
 | 
			
		||||
# Enable detailed micro-task logging
 | 
			
		||||
AI_MICRO_TASK_DEBUG=false
 | 
			
		||||
 | 
			
		||||
# Enable performance metrics collection
 | 
			
		||||
AI_PERFORMANCE_METRICS=true
 | 
			
		||||
 | 
			
		||||
# Cache settings for AI responses
 | 
			
		||||
AI_RESPONSE_CACHE_TTL_MS=3600000
 | 
			
		||||
 | 
			
		||||
# === Application Configuration ===
 | 
			
		||||
PUBLIC_BASE_URL=http://localhost:4321
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										203524
									
								
								data/embeddings.json
									
									
									
									
									
								
							
							
						
						
									
										203524
									
								
								data/embeddings.json
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -1,4 +1,4 @@
 | 
			
		||||
// src/pages/api/ai/query.ts
 | 
			
		||||
// src/pages/api/ai/query.ts - Enhanced for micro-task pipeline
 | 
			
		||||
import type { APIRoute } from 'astro';
 | 
			
		||||
import { withAPIAuth } from '../../../utils/auth.js';
 | 
			
		||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
 | 
			
		||||
@ -8,8 +8,14 @@ import { aiPipeline } from '../../../utils/aiPipeline.js';
 | 
			
		||||
export const prerender = false;
 | 
			
		||||
 | 
			
		||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
			
		||||
const RATE_LIMIT_WINDOW = 60 * 1000; 
 | 
			
		||||
const RATE_LIMIT_MAX = 10; 
 | 
			
		||||
 | 
			
		||||
// Enhanced rate limiting for micro-task architecture
 | 
			
		||||
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
 | 
			
		||||
const RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '8', 10); // Reduced due to micro-tasks
 | 
			
		||||
 | 
			
		||||
// Micro-task specific rate limiting
 | 
			
		||||
const MICRO_TASK_RATE_LIMIT = parseInt(process.env.AI_MICRO_TASK_RATE_LIMIT || '30', 10);
 | 
			
		||||
const microTaskRateLimitStore = new Map<string, { count: number; resetTime: number }>();
 | 
			
		||||
 | 
			
		||||
function sanitizeInput(input: string): string {
 | 
			
		||||
  let sanitized = input
 | 
			
		||||
@ -41,13 +47,40 @@ function checkRateLimit(userId: string): boolean {
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Enhanced: Check micro-task rate limiting
 | 
			
		||||
function checkMicroTaskRateLimit(userId: string): { allowed: boolean; remaining: number } {
 | 
			
		||||
  const now = Date.now();
 | 
			
		||||
  const userLimit = microTaskRateLimitStore.get(userId);
 | 
			
		||||
  
 | 
			
		||||
  if (!userLimit || now > userLimit.resetTime) {
 | 
			
		||||
    microTaskRateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
 | 
			
		||||
    return { allowed: true, remaining: MICRO_TASK_RATE_LIMIT - 1 };
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  if (userLimit.count >= MICRO_TASK_RATE_LIMIT) {
 | 
			
		||||
    return { allowed: false, remaining: 0 };
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  userLimit.count++;
 | 
			
		||||
  return { allowed: true, remaining: MICRO_TASK_RATE_LIMIT - userLimit.count };
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function cleanupExpiredRateLimits() {
 | 
			
		||||
  const now = Date.now();
 | 
			
		||||
  
 | 
			
		||||
  // Clean up main rate limits
 | 
			
		||||
  for (const [userId, limit] of rateLimitStore.entries()) {
 | 
			
		||||
    if (now > limit.resetTime) {
 | 
			
		||||
      rateLimitStore.delete(userId);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // Clean up micro-task rate limits
 | 
			
		||||
  for (const [userId, limit] of microTaskRateLimitStore.entries()) {
 | 
			
		||||
    if (now > limit.resetTime) {
 | 
			
		||||
      microTaskRateLimitStore.delete(userId);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
 | 
			
		||||
@ -61,73 +94,104 @@ export const POST: APIRoute = async ({ request }) => {
 | 
			
		||||
    
 | 
			
		||||
    const userId = authResult.userId;
 | 
			
		||||
 | 
			
		||||
    // Check main rate limit
 | 
			
		||||
    if (!checkRateLimit(userId)) {
 | 
			
		||||
      return apiError.rateLimit('Rate limit exceeded');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Enhanced: Check micro-task rate limit
 | 
			
		||||
    const microTaskLimit = checkMicroTaskRateLimit(userId);
 | 
			
		||||
    if (!microTaskLimit.allowed) {
 | 
			
		||||
      return apiError.rateLimit(
 | 
			
		||||
        `Micro-task rate limit exceeded. The new AI pipeline uses multiple smaller requests. Please wait before trying again.`
 | 
			
		||||
      );
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const body = await request.json();
 | 
			
		||||
    const { query, mode = 'workflow', taskId: clientTaskId } = body;
 | 
			
		||||
 | 
			
		||||
    console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
 | 
			
		||||
    console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
 | 
			
		||||
    console.log(`[MICRO-TASK API] Micro-task rate limit remaining: ${microTaskLimit.remaining}`);
 | 
			
		||||
 | 
			
		||||
    if (!query || typeof query !== 'string') {
 | 
			
		||||
      console.log(`[AI API] Invalid query for task ${clientTaskId}`);
 | 
			
		||||
      console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
 | 
			
		||||
      return apiError.badRequest('Query required');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!['workflow', 'tool'].includes(mode)) {
 | 
			
		||||
      console.log(`[AI API] Invalid mode for task ${clientTaskId}: ${mode}`);
 | 
			
		||||
      console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
 | 
			
		||||
      return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const sanitizedQuery = sanitizeInput(query);
 | 
			
		||||
    if (sanitizedQuery.includes('[FILTERED]')) {
 | 
			
		||||
      console.log(`[AI API] Filtered input detected for task ${clientTaskId}`);
 | 
			
		||||
      console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
 | 
			
		||||
      return apiError.badRequest('Invalid input detected');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[AI API] About to enqueue task ${taskId}`);
 | 
			
		||||
    console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
 | 
			
		||||
    
 | 
			
		||||
    // Use the new AI pipeline instead of direct API calls
 | 
			
		||||
    // Use the enhanced micro-task AI pipeline
 | 
			
		||||
    const result = await enqueueApiCall(() => 
 | 
			
		||||
      aiPipeline.processQuery(sanitizedQuery, mode)
 | 
			
		||||
    , taskId);
 | 
			
		||||
 | 
			
		||||
    if (!result || !result.recommendation) {
 | 
			
		||||
      return apiServerError.unavailable('No response from AI pipeline');
 | 
			
		||||
      return apiServerError.unavailable('No response from micro-task AI pipeline');
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Add processing statistics to the response for debugging/monitoring
 | 
			
		||||
    console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}`);
 | 
			
		||||
    console.log(`[AI Query] Processing stats:`, result.processingStats);
 | 
			
		||||
    console.log(`[AI Query] Tools: ${result.recommendation.recommended_tools?.length || 0}, Concepts: ${result.recommendation.background_knowledge?.length || 0}`);
 | 
			
		||||
    // Enhanced: Log micro-task statistics
 | 
			
		||||
    const stats = result.processingStats;
 | 
			
		||||
    console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
 | 
			
		||||
    console.log(`  - Mode: ${mode}`);
 | 
			
		||||
    console.log(`  - User: ${userId}`);
 | 
			
		||||
    console.log(`  - Query length: ${sanitizedQuery.length}`);
 | 
			
		||||
    console.log(`  - Processing time: ${stats.processingTimeMs}ms`);
 | 
			
		||||
    console.log(`  - Micro-tasks completed: ${stats.microTasksCompleted}`);
 | 
			
		||||
    console.log(`  - Micro-tasks failed: ${stats.microTasksFailed}`);
 | 
			
		||||
    console.log(`  - Embeddings used: ${stats.embeddingsUsed}`);
 | 
			
		||||
    console.log(`  - Final items: ${stats.finalSelectedItems}`);
 | 
			
		||||
 | 
			
		||||
    // Enhanced: Include pipeline information in response
 | 
			
		||||
    return new Response(JSON.stringify({
 | 
			
		||||
      success: true,
 | 
			
		||||
      mode,
 | 
			
		||||
      taskId,
 | 
			
		||||
      recommendation: result.recommendation,
 | 
			
		||||
      query: sanitizedQuery,
 | 
			
		||||
      processingStats: result.processingStats // Include stats for monitoring
 | 
			
		||||
      processingStats: {
 | 
			
		||||
        ...result.processingStats,
 | 
			
		||||
        // Add micro-task specific info
 | 
			
		||||
        pipelineType: 'micro-task',
 | 
			
		||||
        microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
 | 
			
		||||
        averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed)
 | 
			
		||||
      },
 | 
			
		||||
      // Enhanced: Rate limiting info for client
 | 
			
		||||
      rateLimitInfo: {
 | 
			
		||||
        remaining: microTaskLimit.remaining,
 | 
			
		||||
        resetTime: Date.now() + RATE_LIMIT_WINDOW
 | 
			
		||||
      }
 | 
			
		||||
    }), {
 | 
			
		||||
      status: 200,
 | 
			
		||||
      headers: { 'Content-Type': 'application/json' }
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
  } catch (error) {
 | 
			
		||||
    console.error('AI query error:', error);
 | 
			
		||||
    console.error('[MICRO-TASK API] Pipeline error:', error);
 | 
			
		||||
    
 | 
			
		||||
    // Provide more specific error messages based on error type
 | 
			
		||||
    // Enhanced: More specific error messages for micro-task pipeline
 | 
			
		||||
    if (error.message.includes('embeddings')) {
 | 
			
		||||
      return apiServerError.unavailable('Embeddings service error - falling back to basic processing');
 | 
			
		||||
      return apiServerError.unavailable('Embeddings service error - falling back to selector AI');
 | 
			
		||||
    } else if (error.message.includes('micro-task')) {
 | 
			
		||||
      return apiServerError.unavailable('Micro-task pipeline error - some analysis steps may have failed');
 | 
			
		||||
    } else if (error.message.includes('selector')) {
 | 
			
		||||
      return apiServerError.unavailable('AI selector service error');
 | 
			
		||||
    } else if (error.message.includes('analyzer')) {
 | 
			
		||||
      return apiServerError.unavailable('AI analyzer service error');
 | 
			
		||||
    } else if (error.message.includes('rate limit')) {
 | 
			
		||||
      return apiError.rateLimit('AI service rate limits exceeded due to micro-task processing');
 | 
			
		||||
    } else {
 | 
			
		||||
      return apiServerError.internal('Internal server error');
 | 
			
		||||
      return apiServerError.internal('Micro-task AI pipeline error');
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
@ -1818,6 +1818,130 @@ input[type="checkbox"] {
 | 
			
		||||
  border-left-color: var(--color-warning);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Add to src/styles/global.css - Micro-Task Progress Styles */
 | 
			
		||||
 | 
			
		||||
/* Micro-task progress indicator */
 | 
			
		||||
.micro-task-progress {
 | 
			
		||||
  background-color: var(--color-bg-secondary);
 | 
			
		||||
  border: 1px solid var(--color-border);
 | 
			
		||||
  border-radius: 0.5rem;
 | 
			
		||||
  padding: 1rem;
 | 
			
		||||
  margin: 1rem 0;
 | 
			
		||||
  transition: var(--transition-fast);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-task-header {
 | 
			
		||||
  display: flex;
 | 
			
		||||
  justify-content: space-between;
 | 
			
		||||
  align-items: center;
 | 
			
		||||
  margin-bottom: 0.75rem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-task-label {
 | 
			
		||||
  font-weight: 600;
 | 
			
		||||
  color: var(--color-primary);
 | 
			
		||||
  font-size: 0.875rem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-task-counter {
 | 
			
		||||
  background-color: var(--color-primary);
 | 
			
		||||
  color: white;
 | 
			
		||||
  padding: 0.25rem 0.5rem;
 | 
			
		||||
  border-radius: 1rem;
 | 
			
		||||
  font-size: 0.75rem;
 | 
			
		||||
  font-weight: 600;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-task-steps {
 | 
			
		||||
  display: grid;
 | 
			
		||||
  grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
 | 
			
		||||
  gap: 0.5rem;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step {
 | 
			
		||||
  background-color: var(--color-bg);
 | 
			
		||||
  border: 1px solid var(--color-border);
 | 
			
		||||
  border-radius: 0.375rem;
 | 
			
		||||
  padding: 0.5rem;
 | 
			
		||||
  font-size: 0.75rem;
 | 
			
		||||
  text-align: center;
 | 
			
		||||
  transition: var(--transition-fast);
 | 
			
		||||
  opacity: 0.6;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step.active {
 | 
			
		||||
  background-color: var(--color-primary);
 | 
			
		||||
  color: white;
 | 
			
		||||
  border-color: var(--color-primary);
 | 
			
		||||
  opacity: 1;
 | 
			
		||||
  transform: scale(1.05);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step.completed {
 | 
			
		||||
  background-color: var(--color-accent);
 | 
			
		||||
  color: white;
 | 
			
		||||
  border-color: var(--color-accent);
 | 
			
		||||
  opacity: 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step.failed {
 | 
			
		||||
  background-color: var(--color-error);
 | 
			
		||||
  color: white;
 | 
			
		||||
  border-color: var(--color-error);
 | 
			
		||||
  opacity: 1;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Enhanced queue status for micro-tasks */
 | 
			
		||||
.queue-status-card.micro-task-mode {
 | 
			
		||||
  border-left: 4px solid var(--color-primary);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.queue-status-card.micro-task-mode .queue-header {
 | 
			
		||||
  background: linear-gradient(135deg, var(--color-primary) 0%, var(--color-accent) 100%);
 | 
			
		||||
  color: white;
 | 
			
		||||
  margin: -1rem -1rem 1rem -1rem;
 | 
			
		||||
  padding: 1rem;
 | 
			
		||||
  border-radius: 0.5rem 0.5rem 0 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Mobile responsive adjustments */
 | 
			
		||||
@media (max-width: 768px) {
 | 
			
		||||
  .micro-task-steps {
 | 
			
		||||
    grid-template-columns: repeat(2, 1fr);
 | 
			
		||||
    gap: 0.375rem;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  .micro-step {
 | 
			
		||||
    font-size: 0.6875rem;
 | 
			
		||||
    padding: 0.375rem;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  .micro-task-header {
 | 
			
		||||
    flex-direction: column;
 | 
			
		||||
    gap: 0.5rem;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* Animation for micro-task progress */
 | 
			
		||||
@keyframes micro-task-pulse {
 | 
			
		||||
  0%, 100% { opacity: 1; }
 | 
			
		||||
  50% { opacity: 0.7; }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step.active {
 | 
			
		||||
  animation: micro-task-pulse 2s ease-in-out infinite;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@keyframes micro-task-complete {
 | 
			
		||||
  0% { transform: scale(1); }
 | 
			
		||||
  50% { transform: scale(1.1); }
 | 
			
		||||
  100% { transform: scale(1); }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.micro-step.completed {
 | 
			
		||||
  animation: micro-task-complete 0.6s ease-out;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* ===================================================================
 | 
			
		||||
   17. WORKFLOW SYSTEM (CONSOLIDATED)
 | 
			
		||||
   ================================================================= */
 | 
			
		||||
 | 
			
		||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@ -1,3 +1,4 @@
 | 
			
		||||
// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
 | 
			
		||||
import { promises as fs } from 'fs';
 | 
			
		||||
import { load } from 'js-yaml';
 | 
			
		||||
import path from 'path';
 | 
			
		||||
@ -28,23 +29,38 @@ const ToolsDataSchema = z.object({
 | 
			
		||||
  tools: z.array(ToolSchema),
 | 
			
		||||
  domains: z.array(z.object({
 | 
			
		||||
    id: z.string(),
 | 
			
		||||
    name: z.string()
 | 
			
		||||
    name: z.string(),
 | 
			
		||||
    description: z.string().optional() // Enhanced: allow descriptions
 | 
			
		||||
  })),
 | 
			
		||||
  phases: z.array(z.object({
 | 
			
		||||
    id: z.string(), 
 | 
			
		||||
    name: z.string(),
 | 
			
		||||
    description: z.string().optional()
 | 
			
		||||
    description: z.string().optional(),
 | 
			
		||||
    typical_tools: z.array(z.string()).optional().default([]), // Enhanced: example tools
 | 
			
		||||
    key_activities: z.array(z.string()).optional().default([]) // Enhanced: key activities
 | 
			
		||||
  })),
 | 
			
		||||
  'domain-agnostic-software': z.array(z.object({
 | 
			
		||||
    id: z.string(),
 | 
			
		||||
    name: z.string(),
 | 
			
		||||
    description: z.string().optional()
 | 
			
		||||
    description: z.string().optional(),
 | 
			
		||||
    use_cases: z.array(z.string()).optional().default([]) // Enhanced: use cases
 | 
			
		||||
  })).optional().default([]),
 | 
			
		||||
  scenarios: z.array(z.object({
 | 
			
		||||
    id: z.string(),
 | 
			
		||||
    icon: z.string(),
 | 
			
		||||
    friendly_name: z.string()
 | 
			
		||||
    friendly_name: z.string(),
 | 
			
		||||
    description: z.string().optional(), // Enhanced: scenario descriptions
 | 
			
		||||
    typical_phases: z.array(z.string()).optional().default([]), // Enhanced: typical phases
 | 
			
		||||
    complexity: z.enum(['low', 'medium', 'high']).optional() // Enhanced: complexity indicator
 | 
			
		||||
  })).optional().default([]),
 | 
			
		||||
  // Enhanced: Skill level definitions for better AI understanding
 | 
			
		||||
  skill_levels: z.object({
 | 
			
		||||
    novice: z.string().optional(),
 | 
			
		||||
    beginner: z.string().optional(), 
 | 
			
		||||
    intermediate: z.string().optional(),
 | 
			
		||||
    advanced: z.string().optional(),
 | 
			
		||||
    expert: z.string().optional()
 | 
			
		||||
  }).optional().default({})
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
interface ToolsData {
 | 
			
		||||
@ -53,21 +69,49 @@ interface ToolsData {
 | 
			
		||||
  phases: any[];
 | 
			
		||||
  'domain-agnostic-software': any[];
 | 
			
		||||
  scenarios: any[];
 | 
			
		||||
  skill_levels?: any;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface CompressedToolsData {
 | 
			
		||||
interface EnhancedCompressedToolsData {
 | 
			
		||||
  tools: any[];
 | 
			
		||||
  concepts: any[];
 | 
			
		||||
  domains: any[];
 | 
			
		||||
  phases: any[];
 | 
			
		||||
  'domain-agnostic-software': any[];
 | 
			
		||||
  scenarios?: any[]; // Optional for AI processing
 | 
			
		||||
  skill_levels: any;
 | 
			
		||||
  // Enhanced context for micro-tasks
 | 
			
		||||
  domain_relationships: DomainRelationship[];
 | 
			
		||||
  phase_dependencies: PhaseDependency[];
 | 
			
		||||
  tool_compatibility_matrix: CompatibilityMatrix[];
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface DomainRelationship {
 | 
			
		||||
  domain_id: string;
 | 
			
		||||
  tool_count: number;
 | 
			
		||||
  common_tags: string[];
 | 
			
		||||
  skill_distribution: Record<string, number>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface PhaseDependency {
 | 
			
		||||
  phase_id: string;
 | 
			
		||||
  order: number;
 | 
			
		||||
  depends_on: string | null;
 | 
			
		||||
  enables: string | null;
 | 
			
		||||
  is_parallel_capable: boolean;
 | 
			
		||||
  typical_duration: string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
interface CompatibilityMatrix {
 | 
			
		||||
  type: string;
 | 
			
		||||
  groups: Record<string, string[]>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let cachedData: ToolsData | null = null;
 | 
			
		||||
let cachedRandomizedData: ToolsData | null = null;
 | 
			
		||||
let cachedCompressedData: CompressedToolsData | null = null;
 | 
			
		||||
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
 | 
			
		||||
let lastRandomizationDate: string | null = null;
 | 
			
		||||
let dataVersion: string | null = null; // Add version tracking for embeddings
 | 
			
		||||
let dataVersion: string | null = null;
 | 
			
		||||
 | 
			
		||||
function seededRandom(seed: number): () => number {
 | 
			
		||||
  let x = Math.sin(seed) * 10000;
 | 
			
		||||
@ -92,18 +136,115 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
 | 
			
		||||
  return shuffled;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Generate a simple hash of the data for version tracking
 | 
			
		||||
function generateDataVersion(data: any): string {
 | 
			
		||||
  const str = JSON.stringify(data, Object.keys(data).sort());
 | 
			
		||||
  let hash = 0;
 | 
			
		||||
  for (let i = 0; i < str.length; i++) {
 | 
			
		||||
    const char = str.charCodeAt(i);
 | 
			
		||||
    hash = ((hash << 5) - hash) + char;
 | 
			
		||||
    hash = hash & hash; // Convert to 32-bit integer
 | 
			
		||||
    hash = hash & hash;
 | 
			
		||||
  }
 | 
			
		||||
  return Math.abs(hash).toString(36);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Enhanced: Generate domain relationships for better AI understanding
 | 
			
		||||
function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
 | 
			
		||||
  const relationships: DomainRelationship[] = [];
 | 
			
		||||
  
 | 
			
		||||
  for (const domain of domains) {
 | 
			
		||||
    const domainTools = tools.filter(tool => 
 | 
			
		||||
      tool.domains && tool.domains.includes(domain.id)
 | 
			
		||||
    );
 | 
			
		||||
    
 | 
			
		||||
    const commonTags = domainTools
 | 
			
		||||
      .flatMap(tool => tool.tags || [])
 | 
			
		||||
      .reduce((acc: any, tag: string) => {
 | 
			
		||||
        acc[tag] = (acc[tag] || 0) + 1;
 | 
			
		||||
        return acc;
 | 
			
		||||
      }, {});
 | 
			
		||||
      
 | 
			
		||||
    const topTags = Object.entries(commonTags)
 | 
			
		||||
      .sort(([,a], [,b]) => (b as number) - (a as number))
 | 
			
		||||
      .slice(0, 5)
 | 
			
		||||
      .map(([tag]) => tag);
 | 
			
		||||
    
 | 
			
		||||
    relationships.push({
 | 
			
		||||
      domain_id: domain.id,
 | 
			
		||||
      tool_count: domainTools.length,
 | 
			
		||||
      common_tags: topTags,
 | 
			
		||||
      skill_distribution: domainTools.reduce((acc: any, tool: any) => {
 | 
			
		||||
        acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
 | 
			
		||||
        return acc;
 | 
			
		||||
      }, {})
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return relationships;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Enhanced: Generate phase dependencies
 | 
			
		||||
function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
 | 
			
		||||
  const dependencies: PhaseDependency[] = [];
 | 
			
		||||
  
 | 
			
		||||
  for (let i = 0; i < phases.length; i++) {
 | 
			
		||||
    const phase = phases[i];
 | 
			
		||||
    const nextPhase = phases[i + 1];
 | 
			
		||||
    const prevPhase = phases[i - 1];
 | 
			
		||||
    
 | 
			
		||||
    dependencies.push({
 | 
			
		||||
      phase_id: phase.id,
 | 
			
		||||
      order: i + 1,
 | 
			
		||||
      depends_on: prevPhase?.id || null,
 | 
			
		||||
      enables: nextPhase?.id || null,
 | 
			
		||||
      is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
 | 
			
		||||
      typical_duration: phase.id === 'data-collection' ? 'hours-days' :
 | 
			
		||||
                       phase.id === 'examination' ? 'hours-weeks' :
 | 
			
		||||
                       phase.id === 'analysis' ? 'days-weeks' :
 | 
			
		||||
                       'hours-days'
 | 
			
		||||
    });
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return dependencies;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Enhanced: Generate tool compatibility matrix
 | 
			
		||||
function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
 | 
			
		||||
  const matrix: CompatibilityMatrix[] = [];
 | 
			
		||||
  
 | 
			
		||||
  // Group tools by common characteristics
 | 
			
		||||
  const platformGroups = tools.reduce((acc: any, tool: any) => {
 | 
			
		||||
    if (tool.platforms) {
 | 
			
		||||
      tool.platforms.forEach((platform: string) => {
 | 
			
		||||
        if (!acc[platform]) acc[platform] = [];
 | 
			
		||||
        acc[platform].push(tool.name);
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
    return acc;
 | 
			
		||||
  }, {});
 | 
			
		||||
  
 | 
			
		||||
  const phaseGroups = tools.reduce((acc: any, tool: any) => {
 | 
			
		||||
    if (tool.phases) {
 | 
			
		||||
      tool.phases.forEach((phase: string) => {
 | 
			
		||||
        if (!acc[phase]) acc[phase] = [];
 | 
			
		||||
        acc[phase].push(tool.name);
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
    return acc;
 | 
			
		||||
  }, {});
 | 
			
		||||
  
 | 
			
		||||
  matrix.push({
 | 
			
		||||
    type: 'platform_compatibility',
 | 
			
		||||
    groups: platformGroups
 | 
			
		||||
  });
 | 
			
		||||
  
 | 
			
		||||
  matrix.push({
 | 
			
		||||
    type: 'phase_synergy',
 | 
			
		||||
    groups: phaseGroups
 | 
			
		||||
  });
 | 
			
		||||
  
 | 
			
		||||
  return matrix;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function loadRawData(): Promise<ToolsData> {
 | 
			
		||||
  if (!cachedData) {
 | 
			
		||||
    const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
 | 
			
		||||
@ -113,9 +254,19 @@ async function loadRawData(): Promise<ToolsData> {
 | 
			
		||||
    try {
 | 
			
		||||
      cachedData = ToolsDataSchema.parse(rawData);
 | 
			
		||||
      
 | 
			
		||||
      // Generate data version for embeddings tracking
 | 
			
		||||
      // Enhanced: Add default skill level descriptions if not provided
 | 
			
		||||
      if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
 | 
			
		||||
        cachedData.skill_levels = {
 | 
			
		||||
          novice: "Minimal technical background required, guided interfaces",
 | 
			
		||||
          beginner: "Basic IT knowledge, some command-line familiarity helpful",
 | 
			
		||||
          intermediate: "Solid technical foundation, comfortable with various tools",
 | 
			
		||||
          advanced: "Extensive experience, deep technical understanding required",
 | 
			
		||||
          expert: "Specialist knowledge, cutting-edge techniques and complex scenarios"
 | 
			
		||||
        };
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      dataVersion = generateDataVersion(cachedData);
 | 
			
		||||
      console.log(`[DATA SERVICE] Loaded data version: ${dataVersion}`);
 | 
			
		||||
      console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
 | 
			
		||||
      
 | 
			
		||||
    } catch (error) {
 | 
			
		||||
      console.error('YAML validation failed:', error);
 | 
			
		||||
@ -141,42 +292,73 @@ export async function getToolsData(): Promise<ToolsData> {
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    lastRandomizationDate = today;
 | 
			
		||||
    
 | 
			
		||||
    // Clear compressed cache when randomized data changes
 | 
			
		||||
    cachedCompressedData = null;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return cachedRandomizedData;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData> {
 | 
			
		||||
export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedToolsData> {
 | 
			
		||||
  if (!cachedCompressedData) {
 | 
			
		||||
    const data = await getToolsData();
 | 
			
		||||
    
 | 
			
		||||
    // Enhanced: More detailed tool information for micro-tasks
 | 
			
		||||
    const compressedTools = data.tools
 | 
			
		||||
      .filter(tool => tool.type !== 'concept') 
 | 
			
		||||
      .map(tool => {
 | 
			
		||||
        const { projectUrl, statusUrl, ...compressedTool } = tool;
 | 
			
		||||
        return compressedTool;
 | 
			
		||||
        return {
 | 
			
		||||
          ...compressedTool,
 | 
			
		||||
          // Enhanced: Add computed fields for AI
 | 
			
		||||
          is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
 | 
			
		||||
          is_open_source: tool.license && tool.license !== 'Proprietary',
 | 
			
		||||
          complexity_score: tool.skillLevel === 'expert' ? 5 :
 | 
			
		||||
                           tool.skillLevel === 'advanced' ? 4 :
 | 
			
		||||
                           tool.skillLevel === 'intermediate' ? 3 :
 | 
			
		||||
                           tool.skillLevel === 'beginner' ? 2 : 1,
 | 
			
		||||
          // Enhanced: Phase-specific suitability hints
 | 
			
		||||
          phase_suitability: tool.phases?.map(phase => ({
 | 
			
		||||
            phase,
 | 
			
		||||
            primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
 | 
			
		||||
          })) || []
 | 
			
		||||
        };
 | 
			
		||||
      });
 | 
			
		||||
    
 | 
			
		||||
    const concepts = data.tools
 | 
			
		||||
      .filter(tool => tool.type === 'concept')
 | 
			
		||||
      .map(concept => {
 | 
			
		||||
        const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
 | 
			
		||||
        return compressedConcept;
 | 
			
		||||
        return {
 | 
			
		||||
          ...compressedConcept,
 | 
			
		||||
          // Enhanced: Learning difficulty indicator
 | 
			
		||||
          learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
 | 
			
		||||
                              concept.skillLevel === 'advanced' ? 'high' :
 | 
			
		||||
                              concept.skillLevel === 'intermediate' ? 'medium' :
 | 
			
		||||
                              'low'
 | 
			
		||||
        };
 | 
			
		||||
      });
 | 
			
		||||
    
 | 
			
		||||
    // Enhanced: Add rich context data
 | 
			
		||||
    const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
 | 
			
		||||
    const phaseDependencies = generatePhaseDependencies(data.phases);
 | 
			
		||||
    const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
 | 
			
		||||
    
 | 
			
		||||
    cachedCompressedData = {
 | 
			
		||||
      tools: compressedTools,
 | 
			
		||||
      concepts: concepts,
 | 
			
		||||
      domains: data.domains,
 | 
			
		||||
      phases: data.phases,
 | 
			
		||||
      'domain-agnostic-software': data['domain-agnostic-software']
 | 
			
		||||
      // scenarios intentionally excluded from AI data
 | 
			
		||||
      'domain-agnostic-software': data['domain-agnostic-software'],
 | 
			
		||||
      scenarios: data.scenarios, // Include scenarios for context
 | 
			
		||||
      skill_levels: data.skill_levels || {},
 | 
			
		||||
      // Enhanced context for micro-tasks
 | 
			
		||||
      domain_relationships: domainRelationships,
 | 
			
		||||
      phase_dependencies: phaseDependencies,
 | 
			
		||||
      tool_compatibility_matrix: toolCompatibilityMatrix
 | 
			
		||||
    };
 | 
			
		||||
    
 | 
			
		||||
    console.log(`[DATA SERVICE] Generated compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
 | 
			
		||||
    console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
 | 
			
		||||
    console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  return cachedCompressedData;
 | 
			
		||||
@ -193,5 +375,5 @@ export function clearCache(): void {
 | 
			
		||||
  lastRandomizationDate = null;
 | 
			
		||||
  dataVersion = null;
 | 
			
		||||
  
 | 
			
		||||
  console.log('[DATA SERVICE] Cache cleared');
 | 
			
		||||
  console.log('[DATA SERVICE] Enhanced cache cleared');
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user