embeddings-1 #2
37
.env.example
37
.env.example
@ -40,9 +40,40 @@ AI_EMBEDDINGS_BATCH_DELAY_MS=1000
|
|||||||
AI_EMBEDDING_CANDIDATES=30
|
AI_EMBEDDING_CANDIDATES=30
|
||||||
AI_SIMILARITY_THRESHOLD=0.3
|
AI_SIMILARITY_THRESHOLD=0.3
|
||||||
|
|
||||||
# === AI Processing Configuration ===
|
# Delay between micro-tasks to respect rate limits (milliseconds)
|
||||||
AI_MAX_SELECTED_ITEMS=15
|
AI_MICRO_TASK_DELAY_MS=500
|
||||||
AI_RATE_LIMIT_DELAY_MS=2000
|
|
||||||
|
# Micro-task specific rate limiting (requests per minute per user)
|
||||||
|
AI_MICRO_TASK_RATE_LIMIT=30
|
||||||
|
|
||||||
|
# Maximum parallel micro-tasks (for future parallel processing)
|
||||||
|
AI_MAX_PARALLEL_TASKS=3
|
||||||
|
|
||||||
|
# Micro-task timeout settings (milliseconds)
|
||||||
|
AI_MICRO_TASK_TIMEOUT_MS=15000
|
||||||
|
|
||||||
|
# ENHANCED: Rate Limiting Configuration
|
||||||
|
# Main query rate limiting (reduced due to micro-tasks)
|
||||||
|
AI_RATE_LIMIT_DELAY_MS=3000
|
||||||
|
AI_RATE_LIMIT_MAX_REQUESTS=8
|
||||||
|
|
||||||
|
# Smart prompting rate limiting
|
||||||
|
AI_SMART_PROMPTING_RATE_LIMIT=5
|
||||||
|
AI_SMART_PROMPTING_WINDOW_MS=60000
|
||||||
|
|
||||||
|
# Queue management settings
|
||||||
|
AI_QUEUE_MAX_SIZE=50
|
||||||
|
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
|
||||||
|
|
||||||
|
# === Performance & Monitoring ===
|
||||||
|
# Enable detailed micro-task logging
|
||||||
|
AI_MICRO_TASK_DEBUG=false
|
||||||
|
|
||||||
|
# Enable performance metrics collection
|
||||||
|
AI_PERFORMANCE_METRICS=true
|
||||||
|
|
||||||
|
# Cache settings for AI responses
|
||||||
|
AI_RESPONSE_CACHE_TTL_MS=3600000
|
||||||
|
|
||||||
# === Application Configuration ===
|
# === Application Configuration ===
|
||||||
PUBLIC_BASE_URL=http://localhost:4321
|
PUBLIC_BASE_URL=http://localhost:4321
|
||||||
|
203524
data/embeddings.json
203524
data/embeddings.json
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
|||||||
// src/pages/api/ai/query.ts
|
// src/pages/api/ai/query.ts - Enhanced for micro-task pipeline
|
||||||
import type { APIRoute } from 'astro';
|
import type { APIRoute } from 'astro';
|
||||||
import { withAPIAuth } from '../../../utils/auth.js';
|
import { withAPIAuth } from '../../../utils/auth.js';
|
||||||
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
|
||||||
@ -8,8 +8,14 @@ import { aiPipeline } from '../../../utils/aiPipeline.js';
|
|||||||
export const prerender = false;
|
export const prerender = false;
|
||||||
|
|
||||||
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
||||||
const RATE_LIMIT_WINDOW = 60 * 1000;
|
|
||||||
const RATE_LIMIT_MAX = 10;
|
// Enhanced rate limiting for micro-task architecture
|
||||||
|
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
|
||||||
|
const RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '8', 10); // Reduced due to micro-tasks
|
||||||
|
|
||||||
|
// Micro-task specific rate limiting
|
||||||
|
const MICRO_TASK_RATE_LIMIT = parseInt(process.env.AI_MICRO_TASK_RATE_LIMIT || '30', 10);
|
||||||
|
const microTaskRateLimitStore = new Map<string, { count: number; resetTime: number }>();
|
||||||
|
|
||||||
function sanitizeInput(input: string): string {
|
function sanitizeInput(input: string): string {
|
||||||
let sanitized = input
|
let sanitized = input
|
||||||
@ -41,13 +47,40 @@ function checkRateLimit(userId: string): boolean {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enhanced: Check micro-task rate limiting
|
||||||
|
function checkMicroTaskRateLimit(userId: string): { allowed: boolean; remaining: number } {
|
||||||
|
const now = Date.now();
|
||||||
|
const userLimit = microTaskRateLimitStore.get(userId);
|
||||||
|
|
||||||
|
if (!userLimit || now > userLimit.resetTime) {
|
||||||
|
microTaskRateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
|
||||||
|
return { allowed: true, remaining: MICRO_TASK_RATE_LIMIT - 1 };
|
||||||
|
}
|
||||||
|
|
||||||
|
if (userLimit.count >= MICRO_TASK_RATE_LIMIT) {
|
||||||
|
return { allowed: false, remaining: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
userLimit.count++;
|
||||||
|
return { allowed: true, remaining: MICRO_TASK_RATE_LIMIT - userLimit.count };
|
||||||
|
}
|
||||||
|
|
||||||
function cleanupExpiredRateLimits() {
|
function cleanupExpiredRateLimits() {
|
||||||
const now = Date.now();
|
const now = Date.now();
|
||||||
|
|
||||||
|
// Clean up main rate limits
|
||||||
for (const [userId, limit] of rateLimitStore.entries()) {
|
for (const [userId, limit] of rateLimitStore.entries()) {
|
||||||
if (now > limit.resetTime) {
|
if (now > limit.resetTime) {
|
||||||
rateLimitStore.delete(userId);
|
rateLimitStore.delete(userId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clean up micro-task rate limits
|
||||||
|
for (const [userId, limit] of microTaskRateLimitStore.entries()) {
|
||||||
|
if (now > limit.resetTime) {
|
||||||
|
microTaskRateLimitStore.delete(userId);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
|
||||||
@ -61,73 +94,104 @@ export const POST: APIRoute = async ({ request }) => {
|
|||||||
|
|
||||||
const userId = authResult.userId;
|
const userId = authResult.userId;
|
||||||
|
|
||||||
|
// Check main rate limit
|
||||||
if (!checkRateLimit(userId)) {
|
if (!checkRateLimit(userId)) {
|
||||||
return apiError.rateLimit('Rate limit exceeded');
|
return apiError.rateLimit('Rate limit exceeded');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enhanced: Check micro-task rate limit
|
||||||
|
const microTaskLimit = checkMicroTaskRateLimit(userId);
|
||||||
|
if (!microTaskLimit.allowed) {
|
||||||
|
return apiError.rateLimit(
|
||||||
|
`Micro-task rate limit exceeded. The new AI pipeline uses multiple smaller requests. Please wait before trying again.`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const body = await request.json();
|
const body = await request.json();
|
||||||
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
const { query, mode = 'workflow', taskId: clientTaskId } = body;
|
||||||
|
|
||||||
console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
|
||||||
|
console.log(`[MICRO-TASK API] Micro-task rate limit remaining: ${microTaskLimit.remaining}`);
|
||||||
|
|
||||||
if (!query || typeof query !== 'string') {
|
if (!query || typeof query !== 'string') {
|
||||||
console.log(`[AI API] Invalid query for task ${clientTaskId}`);
|
console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
|
||||||
return apiError.badRequest('Query required');
|
return apiError.badRequest('Query required');
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!['workflow', 'tool'].includes(mode)) {
|
if (!['workflow', 'tool'].includes(mode)) {
|
||||||
console.log(`[AI API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
|
||||||
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
|
||||||
}
|
}
|
||||||
|
|
||||||
const sanitizedQuery = sanitizeInput(query);
|
const sanitizedQuery = sanitizeInput(query);
|
||||||
if (sanitizedQuery.includes('[FILTERED]')) {
|
if (sanitizedQuery.includes('[FILTERED]')) {
|
||||||
console.log(`[AI API] Filtered input detected for task ${clientTaskId}`);
|
console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
|
||||||
return apiError.badRequest('Invalid input detected');
|
return apiError.badRequest('Invalid input detected');
|
||||||
}
|
}
|
||||||
|
|
||||||
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
|
||||||
|
|
||||||
console.log(`[AI API] About to enqueue task ${taskId}`);
|
console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
|
||||||
|
|
||||||
// Use the new AI pipeline instead of direct API calls
|
// Use the enhanced micro-task AI pipeline
|
||||||
const result = await enqueueApiCall(() =>
|
const result = await enqueueApiCall(() =>
|
||||||
aiPipeline.processQuery(sanitizedQuery, mode)
|
aiPipeline.processQuery(sanitizedQuery, mode)
|
||||||
, taskId);
|
, taskId);
|
||||||
|
|
||||||
if (!result || !result.recommendation) {
|
if (!result || !result.recommendation) {
|
||||||
return apiServerError.unavailable('No response from AI pipeline');
|
return apiServerError.unavailable('No response from micro-task AI pipeline');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add processing statistics to the response for debugging/monitoring
|
// Enhanced: Log micro-task statistics
|
||||||
console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}`);
|
const stats = result.processingStats;
|
||||||
console.log(`[AI Query] Processing stats:`, result.processingStats);
|
console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
|
||||||
console.log(`[AI Query] Tools: ${result.recommendation.recommended_tools?.length || 0}, Concepts: ${result.recommendation.background_knowledge?.length || 0}`);
|
console.log(` - Mode: ${mode}`);
|
||||||
|
console.log(` - User: ${userId}`);
|
||||||
|
console.log(` - Query length: ${sanitizedQuery.length}`);
|
||||||
|
console.log(` - Processing time: ${stats.processingTimeMs}ms`);
|
||||||
|
console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`);
|
||||||
|
console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`);
|
||||||
|
console.log(` - Embeddings used: ${stats.embeddingsUsed}`);
|
||||||
|
console.log(` - Final items: ${stats.finalSelectedItems}`);
|
||||||
|
|
||||||
|
// Enhanced: Include pipeline information in response
|
||||||
return new Response(JSON.stringify({
|
return new Response(JSON.stringify({
|
||||||
success: true,
|
success: true,
|
||||||
mode,
|
mode,
|
||||||
taskId,
|
taskId,
|
||||||
recommendation: result.recommendation,
|
recommendation: result.recommendation,
|
||||||
query: sanitizedQuery,
|
query: sanitizedQuery,
|
||||||
processingStats: result.processingStats // Include stats for monitoring
|
processingStats: {
|
||||||
|
...result.processingStats,
|
||||||
|
// Add micro-task specific info
|
||||||
|
pipelineType: 'micro-task',
|
||||||
|
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
|
||||||
|
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed)
|
||||||
|
},
|
||||||
|
// Enhanced: Rate limiting info for client
|
||||||
|
rateLimitInfo: {
|
||||||
|
remaining: microTaskLimit.remaining,
|
||||||
|
resetTime: Date.now() + RATE_LIMIT_WINDOW
|
||||||
|
}
|
||||||
}), {
|
}), {
|
||||||
status: 200,
|
status: 200,
|
||||||
headers: { 'Content-Type': 'application/json' }
|
headers: { 'Content-Type': 'application/json' }
|
||||||
});
|
});
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('AI query error:', error);
|
console.error('[MICRO-TASK API] Pipeline error:', error);
|
||||||
|
|
||||||
// Provide more specific error messages based on error type
|
// Enhanced: More specific error messages for micro-task pipeline
|
||||||
if (error.message.includes('embeddings')) {
|
if (error.message.includes('embeddings')) {
|
||||||
return apiServerError.unavailable('Embeddings service error - falling back to basic processing');
|
return apiServerError.unavailable('Embeddings service error - falling back to selector AI');
|
||||||
|
} else if (error.message.includes('micro-task')) {
|
||||||
|
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps may have failed');
|
||||||
} else if (error.message.includes('selector')) {
|
} else if (error.message.includes('selector')) {
|
||||||
return apiServerError.unavailable('AI selector service error');
|
return apiServerError.unavailable('AI selector service error');
|
||||||
} else if (error.message.includes('analyzer')) {
|
} else if (error.message.includes('rate limit')) {
|
||||||
return apiServerError.unavailable('AI analyzer service error');
|
return apiError.rateLimit('AI service rate limits exceeded due to micro-task processing');
|
||||||
} else {
|
} else {
|
||||||
return apiServerError.internal('Internal server error');
|
return apiServerError.internal('Micro-task AI pipeline error');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
@ -1818,6 +1818,130 @@ input[type="checkbox"] {
|
|||||||
border-left-color: var(--color-warning);
|
border-left-color: var(--color-warning);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Add to src/styles/global.css - Micro-Task Progress Styles */
|
||||||
|
|
||||||
|
/* Micro-task progress indicator */
|
||||||
|
.micro-task-progress {
|
||||||
|
background-color: var(--color-bg-secondary);
|
||||||
|
border: 1px solid var(--color-border);
|
||||||
|
border-radius: 0.5rem;
|
||||||
|
padding: 1rem;
|
||||||
|
margin: 1rem 0;
|
||||||
|
transition: var(--transition-fast);
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-task-header {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
margin-bottom: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-task-label {
|
||||||
|
font-weight: 600;
|
||||||
|
color: var(--color-primary);
|
||||||
|
font-size: 0.875rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-task-counter {
|
||||||
|
background-color: var(--color-primary);
|
||||||
|
color: white;
|
||||||
|
padding: 0.25rem 0.5rem;
|
||||||
|
border-radius: 1rem;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-task-steps {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step {
|
||||||
|
background-color: var(--color-bg);
|
||||||
|
border: 1px solid var(--color-border);
|
||||||
|
border-radius: 0.375rem;
|
||||||
|
padding: 0.5rem;
|
||||||
|
font-size: 0.75rem;
|
||||||
|
text-align: center;
|
||||||
|
transition: var(--transition-fast);
|
||||||
|
opacity: 0.6;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step.active {
|
||||||
|
background-color: var(--color-primary);
|
||||||
|
color: white;
|
||||||
|
border-color: var(--color-primary);
|
||||||
|
opacity: 1;
|
||||||
|
transform: scale(1.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step.completed {
|
||||||
|
background-color: var(--color-accent);
|
||||||
|
color: white;
|
||||||
|
border-color: var(--color-accent);
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step.failed {
|
||||||
|
background-color: var(--color-error);
|
||||||
|
color: white;
|
||||||
|
border-color: var(--color-error);
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Enhanced queue status for micro-tasks */
|
||||||
|
.queue-status-card.micro-task-mode {
|
||||||
|
border-left: 4px solid var(--color-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.queue-status-card.micro-task-mode .queue-header {
|
||||||
|
background: linear-gradient(135deg, var(--color-primary) 0%, var(--color-accent) 100%);
|
||||||
|
color: white;
|
||||||
|
margin: -1rem -1rem 1rem -1rem;
|
||||||
|
padding: 1rem;
|
||||||
|
border-radius: 0.5rem 0.5rem 0 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Mobile responsive adjustments */
|
||||||
|
@media (max-width: 768px) {
|
||||||
|
.micro-task-steps {
|
||||||
|
grid-template-columns: repeat(2, 1fr);
|
||||||
|
gap: 0.375rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step {
|
||||||
|
font-size: 0.6875rem;
|
||||||
|
padding: 0.375rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-task-header {
|
||||||
|
flex-direction: column;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Animation for micro-task progress */
|
||||||
|
@keyframes micro-task-pulse {
|
||||||
|
0%, 100% { opacity: 1; }
|
||||||
|
50% { opacity: 0.7; }
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step.active {
|
||||||
|
animation: micro-task-pulse 2s ease-in-out infinite;
|
||||||
|
}
|
||||||
|
|
||||||
|
@keyframes micro-task-complete {
|
||||||
|
0% { transform: scale(1); }
|
||||||
|
50% { transform: scale(1.1); }
|
||||||
|
100% { transform: scale(1); }
|
||||||
|
}
|
||||||
|
|
||||||
|
.micro-step.completed {
|
||||||
|
animation: micro-task-complete 0.6s ease-out;
|
||||||
|
}
|
||||||
|
|
||||||
/* ===================================================================
|
/* ===================================================================
|
||||||
17. WORKFLOW SYSTEM (CONSOLIDATED)
|
17. WORKFLOW SYSTEM (CONSOLIDATED)
|
||||||
================================================================= */
|
================================================================= */
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
|||||||
|
// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
|
||||||
import { promises as fs } from 'fs';
|
import { promises as fs } from 'fs';
|
||||||
import { load } from 'js-yaml';
|
import { load } from 'js-yaml';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
@ -28,23 +29,38 @@ const ToolsDataSchema = z.object({
|
|||||||
tools: z.array(ToolSchema),
|
tools: z.array(ToolSchema),
|
||||||
domains: z.array(z.object({
|
domains: z.array(z.object({
|
||||||
id: z.string(),
|
id: z.string(),
|
||||||
name: z.string()
|
name: z.string(),
|
||||||
|
description: z.string().optional() // Enhanced: allow descriptions
|
||||||
})),
|
})),
|
||||||
phases: z.array(z.object({
|
phases: z.array(z.object({
|
||||||
id: z.string(),
|
id: z.string(),
|
||||||
name: z.string(),
|
name: z.string(),
|
||||||
description: z.string().optional()
|
description: z.string().optional(),
|
||||||
|
typical_tools: z.array(z.string()).optional().default([]), // Enhanced: example tools
|
||||||
|
key_activities: z.array(z.string()).optional().default([]) // Enhanced: key activities
|
||||||
})),
|
})),
|
||||||
'domain-agnostic-software': z.array(z.object({
|
'domain-agnostic-software': z.array(z.object({
|
||||||
id: z.string(),
|
id: z.string(),
|
||||||
name: z.string(),
|
name: z.string(),
|
||||||
description: z.string().optional()
|
description: z.string().optional(),
|
||||||
|
use_cases: z.array(z.string()).optional().default([]) // Enhanced: use cases
|
||||||
})).optional().default([]),
|
})).optional().default([]),
|
||||||
scenarios: z.array(z.object({
|
scenarios: z.array(z.object({
|
||||||
id: z.string(),
|
id: z.string(),
|
||||||
icon: z.string(),
|
icon: z.string(),
|
||||||
friendly_name: z.string()
|
friendly_name: z.string(),
|
||||||
|
description: z.string().optional(), // Enhanced: scenario descriptions
|
||||||
|
typical_phases: z.array(z.string()).optional().default([]), // Enhanced: typical phases
|
||||||
|
complexity: z.enum(['low', 'medium', 'high']).optional() // Enhanced: complexity indicator
|
||||||
})).optional().default([]),
|
})).optional().default([]),
|
||||||
|
// Enhanced: Skill level definitions for better AI understanding
|
||||||
|
skill_levels: z.object({
|
||||||
|
novice: z.string().optional(),
|
||||||
|
beginner: z.string().optional(),
|
||||||
|
intermediate: z.string().optional(),
|
||||||
|
advanced: z.string().optional(),
|
||||||
|
expert: z.string().optional()
|
||||||
|
}).optional().default({})
|
||||||
});
|
});
|
||||||
|
|
||||||
interface ToolsData {
|
interface ToolsData {
|
||||||
@ -53,21 +69,49 @@ interface ToolsData {
|
|||||||
phases: any[];
|
phases: any[];
|
||||||
'domain-agnostic-software': any[];
|
'domain-agnostic-software': any[];
|
||||||
scenarios: any[];
|
scenarios: any[];
|
||||||
|
skill_levels?: any;
|
||||||
}
|
}
|
||||||
|
|
||||||
interface CompressedToolsData {
|
interface EnhancedCompressedToolsData {
|
||||||
tools: any[];
|
tools: any[];
|
||||||
concepts: any[];
|
concepts: any[];
|
||||||
domains: any[];
|
domains: any[];
|
||||||
phases: any[];
|
phases: any[];
|
||||||
'domain-agnostic-software': any[];
|
'domain-agnostic-software': any[];
|
||||||
|
scenarios?: any[]; // Optional for AI processing
|
||||||
|
skill_levels: any;
|
||||||
|
// Enhanced context for micro-tasks
|
||||||
|
domain_relationships: DomainRelationship[];
|
||||||
|
phase_dependencies: PhaseDependency[];
|
||||||
|
tool_compatibility_matrix: CompatibilityMatrix[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface DomainRelationship {
|
||||||
|
domain_id: string;
|
||||||
|
tool_count: number;
|
||||||
|
common_tags: string[];
|
||||||
|
skill_distribution: Record<string, number>;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface PhaseDependency {
|
||||||
|
phase_id: string;
|
||||||
|
order: number;
|
||||||
|
depends_on: string | null;
|
||||||
|
enables: string | null;
|
||||||
|
is_parallel_capable: boolean;
|
||||||
|
typical_duration: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface CompatibilityMatrix {
|
||||||
|
type: string;
|
||||||
|
groups: Record<string, string[]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let cachedData: ToolsData | null = null;
|
let cachedData: ToolsData | null = null;
|
||||||
let cachedRandomizedData: ToolsData | null = null;
|
let cachedRandomizedData: ToolsData | null = null;
|
||||||
let cachedCompressedData: CompressedToolsData | null = null;
|
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
|
||||||
let lastRandomizationDate: string | null = null;
|
let lastRandomizationDate: string | null = null;
|
||||||
let dataVersion: string | null = null; // Add version tracking for embeddings
|
let dataVersion: string | null = null;
|
||||||
|
|
||||||
function seededRandom(seed: number): () => number {
|
function seededRandom(seed: number): () => number {
|
||||||
let x = Math.sin(seed) * 10000;
|
let x = Math.sin(seed) * 10000;
|
||||||
@ -92,18 +136,115 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
|
|||||||
return shuffled;
|
return shuffled;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate a simple hash of the data for version tracking
|
|
||||||
function generateDataVersion(data: any): string {
|
function generateDataVersion(data: any): string {
|
||||||
const str = JSON.stringify(data, Object.keys(data).sort());
|
const str = JSON.stringify(data, Object.keys(data).sort());
|
||||||
let hash = 0;
|
let hash = 0;
|
||||||
for (let i = 0; i < str.length; i++) {
|
for (let i = 0; i < str.length; i++) {
|
||||||
const char = str.charCodeAt(i);
|
const char = str.charCodeAt(i);
|
||||||
hash = ((hash << 5) - hash) + char;
|
hash = ((hash << 5) - hash) + char;
|
||||||
hash = hash & hash; // Convert to 32-bit integer
|
hash = hash & hash;
|
||||||
}
|
}
|
||||||
return Math.abs(hash).toString(36);
|
return Math.abs(hash).toString(36);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Enhanced: Generate domain relationships for better AI understanding
|
||||||
|
function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
|
||||||
|
const relationships: DomainRelationship[] = [];
|
||||||
|
|
||||||
|
for (const domain of domains) {
|
||||||
|
const domainTools = tools.filter(tool =>
|
||||||
|
tool.domains && tool.domains.includes(domain.id)
|
||||||
|
);
|
||||||
|
|
||||||
|
const commonTags = domainTools
|
||||||
|
.flatMap(tool => tool.tags || [])
|
||||||
|
.reduce((acc: any, tag: string) => {
|
||||||
|
acc[tag] = (acc[tag] || 0) + 1;
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const topTags = Object.entries(commonTags)
|
||||||
|
.sort(([,a], [,b]) => (b as number) - (a as number))
|
||||||
|
.slice(0, 5)
|
||||||
|
.map(([tag]) => tag);
|
||||||
|
|
||||||
|
relationships.push({
|
||||||
|
domain_id: domain.id,
|
||||||
|
tool_count: domainTools.length,
|
||||||
|
common_tags: topTags,
|
||||||
|
skill_distribution: domainTools.reduce((acc: any, tool: any) => {
|
||||||
|
acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
|
||||||
|
return acc;
|
||||||
|
}, {})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return relationships;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enhanced: Generate phase dependencies
|
||||||
|
function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
|
||||||
|
const dependencies: PhaseDependency[] = [];
|
||||||
|
|
||||||
|
for (let i = 0; i < phases.length; i++) {
|
||||||
|
const phase = phases[i];
|
||||||
|
const nextPhase = phases[i + 1];
|
||||||
|
const prevPhase = phases[i - 1];
|
||||||
|
|
||||||
|
dependencies.push({
|
||||||
|
phase_id: phase.id,
|
||||||
|
order: i + 1,
|
||||||
|
depends_on: prevPhase?.id || null,
|
||||||
|
enables: nextPhase?.id || null,
|
||||||
|
is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
|
||||||
|
typical_duration: phase.id === 'data-collection' ? 'hours-days' :
|
||||||
|
phase.id === 'examination' ? 'hours-weeks' :
|
||||||
|
phase.id === 'analysis' ? 'days-weeks' :
|
||||||
|
'hours-days'
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return dependencies;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enhanced: Generate tool compatibility matrix
|
||||||
|
function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
|
||||||
|
const matrix: CompatibilityMatrix[] = [];
|
||||||
|
|
||||||
|
// Group tools by common characteristics
|
||||||
|
const platformGroups = tools.reduce((acc: any, tool: any) => {
|
||||||
|
if (tool.platforms) {
|
||||||
|
tool.platforms.forEach((platform: string) => {
|
||||||
|
if (!acc[platform]) acc[platform] = [];
|
||||||
|
acc[platform].push(tool.name);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
const phaseGroups = tools.reduce((acc: any, tool: any) => {
|
||||||
|
if (tool.phases) {
|
||||||
|
tool.phases.forEach((phase: string) => {
|
||||||
|
if (!acc[phase]) acc[phase] = [];
|
||||||
|
acc[phase].push(tool.name);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return acc;
|
||||||
|
}, {});
|
||||||
|
|
||||||
|
matrix.push({
|
||||||
|
type: 'platform_compatibility',
|
||||||
|
groups: platformGroups
|
||||||
|
});
|
||||||
|
|
||||||
|
matrix.push({
|
||||||
|
type: 'phase_synergy',
|
||||||
|
groups: phaseGroups
|
||||||
|
});
|
||||||
|
|
||||||
|
return matrix;
|
||||||
|
}
|
||||||
|
|
||||||
async function loadRawData(): Promise<ToolsData> {
|
async function loadRawData(): Promise<ToolsData> {
|
||||||
if (!cachedData) {
|
if (!cachedData) {
|
||||||
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
|
||||||
@ -113,9 +254,19 @@ async function loadRawData(): Promise<ToolsData> {
|
|||||||
try {
|
try {
|
||||||
cachedData = ToolsDataSchema.parse(rawData);
|
cachedData = ToolsDataSchema.parse(rawData);
|
||||||
|
|
||||||
// Generate data version for embeddings tracking
|
// Enhanced: Add default skill level descriptions if not provided
|
||||||
|
if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
|
||||||
|
cachedData.skill_levels = {
|
||||||
|
novice: "Minimal technical background required, guided interfaces",
|
||||||
|
beginner: "Basic IT knowledge, some command-line familiarity helpful",
|
||||||
|
intermediate: "Solid technical foundation, comfortable with various tools",
|
||||||
|
advanced: "Extensive experience, deep technical understanding required",
|
||||||
|
expert: "Specialist knowledge, cutting-edge techniques and complex scenarios"
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
dataVersion = generateDataVersion(cachedData);
|
dataVersion = generateDataVersion(cachedData);
|
||||||
console.log(`[DATA SERVICE] Loaded data version: ${dataVersion}`);
|
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('YAML validation failed:', error);
|
console.error('YAML validation failed:', error);
|
||||||
@ -141,42 +292,73 @@ export async function getToolsData(): Promise<ToolsData> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
lastRandomizationDate = today;
|
lastRandomizationDate = today;
|
||||||
|
|
||||||
// Clear compressed cache when randomized data changes
|
|
||||||
cachedCompressedData = null;
|
cachedCompressedData = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return cachedRandomizedData;
|
return cachedRandomizedData;
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData> {
|
export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedToolsData> {
|
||||||
if (!cachedCompressedData) {
|
if (!cachedCompressedData) {
|
||||||
const data = await getToolsData();
|
const data = await getToolsData();
|
||||||
|
|
||||||
|
// Enhanced: More detailed tool information for micro-tasks
|
||||||
const compressedTools = data.tools
|
const compressedTools = data.tools
|
||||||
.filter(tool => tool.type !== 'concept')
|
.filter(tool => tool.type !== 'concept')
|
||||||
.map(tool => {
|
.map(tool => {
|
||||||
const { projectUrl, statusUrl, ...compressedTool } = tool;
|
const { projectUrl, statusUrl, ...compressedTool } = tool;
|
||||||
return compressedTool;
|
return {
|
||||||
|
...compressedTool,
|
||||||
|
// Enhanced: Add computed fields for AI
|
||||||
|
is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
|
||||||
|
is_open_source: tool.license && tool.license !== 'Proprietary',
|
||||||
|
complexity_score: tool.skillLevel === 'expert' ? 5 :
|
||||||
|
tool.skillLevel === 'advanced' ? 4 :
|
||||||
|
tool.skillLevel === 'intermediate' ? 3 :
|
||||||
|
tool.skillLevel === 'beginner' ? 2 : 1,
|
||||||
|
// Enhanced: Phase-specific suitability hints
|
||||||
|
phase_suitability: tool.phases?.map(phase => ({
|
||||||
|
phase,
|
||||||
|
primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
|
||||||
|
})) || []
|
||||||
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
const concepts = data.tools
|
const concepts = data.tools
|
||||||
.filter(tool => tool.type === 'concept')
|
.filter(tool => tool.type === 'concept')
|
||||||
.map(concept => {
|
.map(concept => {
|
||||||
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
|
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
|
||||||
return compressedConcept;
|
return {
|
||||||
|
...compressedConcept,
|
||||||
|
// Enhanced: Learning difficulty indicator
|
||||||
|
learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
|
||||||
|
concept.skillLevel === 'advanced' ? 'high' :
|
||||||
|
concept.skillLevel === 'intermediate' ? 'medium' :
|
||||||
|
'low'
|
||||||
|
};
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Enhanced: Add rich context data
|
||||||
|
const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
|
||||||
|
const phaseDependencies = generatePhaseDependencies(data.phases);
|
||||||
|
const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
|
||||||
|
|
||||||
cachedCompressedData = {
|
cachedCompressedData = {
|
||||||
tools: compressedTools,
|
tools: compressedTools,
|
||||||
concepts: concepts,
|
concepts: concepts,
|
||||||
domains: data.domains,
|
domains: data.domains,
|
||||||
phases: data.phases,
|
phases: data.phases,
|
||||||
'domain-agnostic-software': data['domain-agnostic-software']
|
'domain-agnostic-software': data['domain-agnostic-software'],
|
||||||
// scenarios intentionally excluded from AI data
|
scenarios: data.scenarios, // Include scenarios for context
|
||||||
|
skill_levels: data.skill_levels || {},
|
||||||
|
// Enhanced context for micro-tasks
|
||||||
|
domain_relationships: domainRelationships,
|
||||||
|
phase_dependencies: phaseDependencies,
|
||||||
|
tool_compatibility_matrix: toolCompatibilityMatrix
|
||||||
};
|
};
|
||||||
|
|
||||||
console.log(`[DATA SERVICE] Generated compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
|
console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
|
||||||
|
console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return cachedCompressedData;
|
return cachedCompressedData;
|
||||||
@ -193,5 +375,5 @@ export function clearCache(): void {
|
|||||||
lastRandomizationDate = null;
|
lastRandomizationDate = null;
|
||||||
dataVersion = null;
|
dataVersion = null;
|
||||||
|
|
||||||
console.log('[DATA SERVICE] Cache cleared');
|
console.log('[DATA SERVICE] Enhanced cache cleared');
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user