This commit is contained in:
overcuriousity 2025-07-31 21:43:05 +02:00
parent c96aa70413
commit 680a2c311d
7 changed files with 103853 additions and 103211 deletions

View File

@ -40,9 +40,40 @@ AI_EMBEDDINGS_BATCH_DELAY_MS=1000
AI_EMBEDDING_CANDIDATES=30
AI_SIMILARITY_THRESHOLD=0.3
# === AI Processing Configuration ===
AI_MAX_SELECTED_ITEMS=15
AI_RATE_LIMIT_DELAY_MS=2000
# Delay between micro-tasks to respect rate limits (milliseconds)
AI_MICRO_TASK_DELAY_MS=500
# Micro-task specific rate limiting (requests per minute per user)
AI_MICRO_TASK_RATE_LIMIT=30
# Maximum parallel micro-tasks (for future parallel processing)
AI_MAX_PARALLEL_TASKS=3
# Micro-task timeout settings (milliseconds)
AI_MICRO_TASK_TIMEOUT_MS=15000
# ENHANCED: Rate Limiting Configuration
# Main query rate limiting (reduced due to micro-tasks)
AI_RATE_LIMIT_DELAY_MS=3000
AI_RATE_LIMIT_MAX_REQUESTS=8
# Smart prompting rate limiting
AI_SMART_PROMPTING_RATE_LIMIT=5
AI_SMART_PROMPTING_WINDOW_MS=60000
# Queue management settings
AI_QUEUE_MAX_SIZE=50
AI_QUEUE_CLEANUP_INTERVAL_MS=300000
# === Performance & Monitoring ===
# Enable detailed micro-task logging
AI_MICRO_TASK_DEBUG=false
# Enable performance metrics collection
AI_PERFORMANCE_METRICS=true
# Cache settings for AI responses
AI_RESPONSE_CACHE_TTL_MS=3600000
# === Application Configuration ===
PUBLIC_BASE_URL=http://localhost:4321

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
// src/pages/api/ai/query.ts
// src/pages/api/ai/query.ts - Enhanced for micro-task pipeline
import type { APIRoute } from 'astro';
import { withAPIAuth } from '../../../utils/auth.js';
import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js';
@ -8,8 +8,14 @@ import { aiPipeline } from '../../../utils/aiPipeline.js';
export const prerender = false;
const rateLimitStore = new Map<string, { count: number; resetTime: number }>();
const RATE_LIMIT_WINDOW = 60 * 1000;
const RATE_LIMIT_MAX = 10;
// Enhanced rate limiting for micro-task architecture
const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute
const RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '8', 10); // Reduced due to micro-tasks
// Micro-task specific rate limiting
const MICRO_TASK_RATE_LIMIT = parseInt(process.env.AI_MICRO_TASK_RATE_LIMIT || '30', 10);
const microTaskRateLimitStore = new Map<string, { count: number; resetTime: number }>();
function sanitizeInput(input: string): string {
let sanitized = input
@ -41,13 +47,40 @@ function checkRateLimit(userId: string): boolean {
return true;
}
// Enhanced: Check micro-task rate limiting
function checkMicroTaskRateLimit(userId: string): { allowed: boolean; remaining: number } {
const now = Date.now();
const userLimit = microTaskRateLimitStore.get(userId);
if (!userLimit || now > userLimit.resetTime) {
microTaskRateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW });
return { allowed: true, remaining: MICRO_TASK_RATE_LIMIT - 1 };
}
if (userLimit.count >= MICRO_TASK_RATE_LIMIT) {
return { allowed: false, remaining: 0 };
}
userLimit.count++;
return { allowed: true, remaining: MICRO_TASK_RATE_LIMIT - userLimit.count };
}
function cleanupExpiredRateLimits() {
const now = Date.now();
// Clean up main rate limits
for (const [userId, limit] of rateLimitStore.entries()) {
if (now > limit.resetTime) {
rateLimitStore.delete(userId);
}
}
// Clean up micro-task rate limits
for (const [userId, limit] of microTaskRateLimitStore.entries()) {
if (now > limit.resetTime) {
microTaskRateLimitStore.delete(userId);
}
}
}
setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000);
@ -61,73 +94,104 @@ export const POST: APIRoute = async ({ request }) => {
const userId = authResult.userId;
// Check main rate limit
if (!checkRateLimit(userId)) {
return apiError.rateLimit('Rate limit exceeded');
}
// Enhanced: Check micro-task rate limit
const microTaskLimit = checkMicroTaskRateLimit(userId);
if (!microTaskLimit.allowed) {
return apiError.rateLimit(
`Micro-task rate limit exceeded. The new AI pipeline uses multiple smaller requests. Please wait before trying again.`
);
}
const body = await request.json();
const { query, mode = 'workflow', taskId: clientTaskId } = body;
console.log(`[AI API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`);
console.log(`[MICRO-TASK API] Micro-task rate limit remaining: ${microTaskLimit.remaining}`);
if (!query || typeof query !== 'string') {
console.log(`[AI API] Invalid query for task ${clientTaskId}`);
console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`);
return apiError.badRequest('Query required');
}
if (!['workflow', 'tool'].includes(mode)) {
console.log(`[AI API] Invalid mode for task ${clientTaskId}: ${mode}`);
console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`);
return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"');
}
const sanitizedQuery = sanitizeInput(query);
if (sanitizedQuery.includes('[FILTERED]')) {
console.log(`[AI API] Filtered input detected for task ${clientTaskId}`);
console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`);
return apiError.badRequest('Invalid input detected');
}
const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`;
console.log(`[AI API] About to enqueue task ${taskId}`);
console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`);
// Use the new AI pipeline instead of direct API calls
// Use the enhanced micro-task AI pipeline
const result = await enqueueApiCall(() =>
aiPipeline.processQuery(sanitizedQuery, mode)
, taskId);
if (!result || !result.recommendation) {
return apiServerError.unavailable('No response from AI pipeline');
return apiServerError.unavailable('No response from micro-task AI pipeline');
}
// Add processing statistics to the response for debugging/monitoring
console.log(`[AI Query] Mode: ${mode}, User: ${userId}, Query length: ${sanitizedQuery.length}`);
console.log(`[AI Query] Processing stats:`, result.processingStats);
console.log(`[AI Query] Tools: ${result.recommendation.recommended_tools?.length || 0}, Concepts: ${result.recommendation.background_knowledge?.length || 0}`);
// Enhanced: Log micro-task statistics
const stats = result.processingStats;
console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`);
console.log(` - Mode: ${mode}`);
console.log(` - User: ${userId}`);
console.log(` - Query length: ${sanitizedQuery.length}`);
console.log(` - Processing time: ${stats.processingTimeMs}ms`);
console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`);
console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`);
console.log(` - Embeddings used: ${stats.embeddingsUsed}`);
console.log(` - Final items: ${stats.finalSelectedItems}`);
// Enhanced: Include pipeline information in response
return new Response(JSON.stringify({
success: true,
mode,
taskId,
recommendation: result.recommendation,
query: sanitizedQuery,
processingStats: result.processingStats // Include stats for monitoring
processingStats: {
...result.processingStats,
// Add micro-task specific info
pipelineType: 'micro-task',
microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed),
averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed)
},
// Enhanced: Rate limiting info for client
rateLimitInfo: {
remaining: microTaskLimit.remaining,
resetTime: Date.now() + RATE_LIMIT_WINDOW
}
}), {
status: 200,
headers: { 'Content-Type': 'application/json' }
});
} catch (error) {
console.error('AI query error:', error);
console.error('[MICRO-TASK API] Pipeline error:', error);
// Provide more specific error messages based on error type
// Enhanced: More specific error messages for micro-task pipeline
if (error.message.includes('embeddings')) {
return apiServerError.unavailable('Embeddings service error - falling back to basic processing');
return apiServerError.unavailable('Embeddings service error - falling back to selector AI');
} else if (error.message.includes('micro-task')) {
return apiServerError.unavailable('Micro-task pipeline error - some analysis steps may have failed');
} else if (error.message.includes('selector')) {
return apiServerError.unavailable('AI selector service error');
} else if (error.message.includes('analyzer')) {
return apiServerError.unavailable('AI analyzer service error');
} else if (error.message.includes('rate limit')) {
return apiError.rateLimit('AI service rate limits exceeded due to micro-task processing');
} else {
return apiServerError.internal('Internal server error');
return apiServerError.internal('Micro-task AI pipeline error');
}
}
};

View File

@ -1818,6 +1818,130 @@ input[type="checkbox"] {
border-left-color: var(--color-warning);
}
/* Add to src/styles/global.css - Micro-Task Progress Styles */
/* Micro-task progress indicator */
.micro-task-progress {
background-color: var(--color-bg-secondary);
border: 1px solid var(--color-border);
border-radius: 0.5rem;
padding: 1rem;
margin: 1rem 0;
transition: var(--transition-fast);
}
.micro-task-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 0.75rem;
}
.micro-task-label {
font-weight: 600;
color: var(--color-primary);
font-size: 0.875rem;
}
.micro-task-counter {
background-color: var(--color-primary);
color: white;
padding: 0.25rem 0.5rem;
border-radius: 1rem;
font-size: 0.75rem;
font-weight: 600;
}
.micro-task-steps {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
gap: 0.5rem;
}
.micro-step {
background-color: var(--color-bg);
border: 1px solid var(--color-border);
border-radius: 0.375rem;
padding: 0.5rem;
font-size: 0.75rem;
text-align: center;
transition: var(--transition-fast);
opacity: 0.6;
}
.micro-step.active {
background-color: var(--color-primary);
color: white;
border-color: var(--color-primary);
opacity: 1;
transform: scale(1.05);
}
.micro-step.completed {
background-color: var(--color-accent);
color: white;
border-color: var(--color-accent);
opacity: 1;
}
.micro-step.failed {
background-color: var(--color-error);
color: white;
border-color: var(--color-error);
opacity: 1;
}
/* Enhanced queue status for micro-tasks */
.queue-status-card.micro-task-mode {
border-left: 4px solid var(--color-primary);
}
.queue-status-card.micro-task-mode .queue-header {
background: linear-gradient(135deg, var(--color-primary) 0%, var(--color-accent) 100%);
color: white;
margin: -1rem -1rem 1rem -1rem;
padding: 1rem;
border-radius: 0.5rem 0.5rem 0 0;
}
/* Mobile responsive adjustments */
@media (max-width: 768px) {
.micro-task-steps {
grid-template-columns: repeat(2, 1fr);
gap: 0.375rem;
}
.micro-step {
font-size: 0.6875rem;
padding: 0.375rem;
}
.micro-task-header {
flex-direction: column;
gap: 0.5rem;
}
}
/* Animation for micro-task progress */
@keyframes micro-task-pulse {
0%, 100% { opacity: 1; }
50% { opacity: 0.7; }
}
.micro-step.active {
animation: micro-task-pulse 2s ease-in-out infinite;
}
@keyframes micro-task-complete {
0% { transform: scale(1); }
50% { transform: scale(1.1); }
100% { transform: scale(1); }
}
.micro-step.completed {
animation: micro-task-complete 0.6s ease-out;
}
/* ===================================================================
17. WORKFLOW SYSTEM (CONSOLIDATED)
================================================================= */

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,4 @@
// src/utils/dataService.ts - Enhanced for micro-task AI pipeline
import { promises as fs } from 'fs';
import { load } from 'js-yaml';
import path from 'path';
@ -28,23 +29,38 @@ const ToolsDataSchema = z.object({
tools: z.array(ToolSchema),
domains: z.array(z.object({
id: z.string(),
name: z.string()
name: z.string(),
description: z.string().optional() // Enhanced: allow descriptions
})),
phases: z.array(z.object({
id: z.string(),
name: z.string(),
description: z.string().optional()
description: z.string().optional(),
typical_tools: z.array(z.string()).optional().default([]), // Enhanced: example tools
key_activities: z.array(z.string()).optional().default([]) // Enhanced: key activities
})),
'domain-agnostic-software': z.array(z.object({
id: z.string(),
name: z.string(),
description: z.string().optional()
description: z.string().optional(),
use_cases: z.array(z.string()).optional().default([]) // Enhanced: use cases
})).optional().default([]),
scenarios: z.array(z.object({
id: z.string(),
icon: z.string(),
friendly_name: z.string()
friendly_name: z.string(),
description: z.string().optional(), // Enhanced: scenario descriptions
typical_phases: z.array(z.string()).optional().default([]), // Enhanced: typical phases
complexity: z.enum(['low', 'medium', 'high']).optional() // Enhanced: complexity indicator
})).optional().default([]),
// Enhanced: Skill level definitions for better AI understanding
skill_levels: z.object({
novice: z.string().optional(),
beginner: z.string().optional(),
intermediate: z.string().optional(),
advanced: z.string().optional(),
expert: z.string().optional()
}).optional().default({})
});
interface ToolsData {
@ -53,21 +69,49 @@ interface ToolsData {
phases: any[];
'domain-agnostic-software': any[];
scenarios: any[];
skill_levels?: any;
}
interface CompressedToolsData {
interface EnhancedCompressedToolsData {
tools: any[];
concepts: any[];
domains: any[];
phases: any[];
'domain-agnostic-software': any[];
scenarios?: any[]; // Optional for AI processing
skill_levels: any;
// Enhanced context for micro-tasks
domain_relationships: DomainRelationship[];
phase_dependencies: PhaseDependency[];
tool_compatibility_matrix: CompatibilityMatrix[];
}
interface DomainRelationship {
domain_id: string;
tool_count: number;
common_tags: string[];
skill_distribution: Record<string, number>;
}
interface PhaseDependency {
phase_id: string;
order: number;
depends_on: string | null;
enables: string | null;
is_parallel_capable: boolean;
typical_duration: string;
}
interface CompatibilityMatrix {
type: string;
groups: Record<string, string[]>;
}
let cachedData: ToolsData | null = null;
let cachedRandomizedData: ToolsData | null = null;
let cachedCompressedData: CompressedToolsData | null = null;
let cachedCompressedData: EnhancedCompressedToolsData | null = null;
let lastRandomizationDate: string | null = null;
let dataVersion: string | null = null; // Add version tracking for embeddings
let dataVersion: string | null = null;
function seededRandom(seed: number): () => number {
let x = Math.sin(seed) * 10000;
@ -92,18 +136,115 @@ function shuffleArray<T>(array: T[], randomFn: () => number): T[] {
return shuffled;
}
// Generate a simple hash of the data for version tracking
function generateDataVersion(data: any): string {
const str = JSON.stringify(data, Object.keys(data).sort());
let hash = 0;
for (let i = 0; i < str.length; i++) {
const char = str.charCodeAt(i);
hash = ((hash << 5) - hash) + char;
hash = hash & hash; // Convert to 32-bit integer
hash = hash & hash;
}
return Math.abs(hash).toString(36);
}
// Enhanced: Generate domain relationships for better AI understanding
function generateDomainRelationships(domains: any[], tools: any[]): DomainRelationship[] {
const relationships: DomainRelationship[] = [];
for (const domain of domains) {
const domainTools = tools.filter(tool =>
tool.domains && tool.domains.includes(domain.id)
);
const commonTags = domainTools
.flatMap(tool => tool.tags || [])
.reduce((acc: any, tag: string) => {
acc[tag] = (acc[tag] || 0) + 1;
return acc;
}, {});
const topTags = Object.entries(commonTags)
.sort(([,a], [,b]) => (b as number) - (a as number))
.slice(0, 5)
.map(([tag]) => tag);
relationships.push({
domain_id: domain.id,
tool_count: domainTools.length,
common_tags: topTags,
skill_distribution: domainTools.reduce((acc: any, tool: any) => {
acc[tool.skillLevel] = (acc[tool.skillLevel] || 0) + 1;
return acc;
}, {})
});
}
return relationships;
}
// Enhanced: Generate phase dependencies
function generatePhaseDependencies(phases: any[]): PhaseDependency[] {
const dependencies: PhaseDependency[] = [];
for (let i = 0; i < phases.length; i++) {
const phase = phases[i];
const nextPhase = phases[i + 1];
const prevPhase = phases[i - 1];
dependencies.push({
phase_id: phase.id,
order: i + 1,
depends_on: prevPhase?.id || null,
enables: nextPhase?.id || null,
is_parallel_capable: ['examination', 'analysis'].includes(phase.id), // Some phases can run in parallel
typical_duration: phase.id === 'data-collection' ? 'hours-days' :
phase.id === 'examination' ? 'hours-weeks' :
phase.id === 'analysis' ? 'days-weeks' :
'hours-days'
});
}
return dependencies;
}
// Enhanced: Generate tool compatibility matrix
function generateToolCompatibilityMatrix(tools: any[]): CompatibilityMatrix[] {
const matrix: CompatibilityMatrix[] = [];
// Group tools by common characteristics
const platformGroups = tools.reduce((acc: any, tool: any) => {
if (tool.platforms) {
tool.platforms.forEach((platform: string) => {
if (!acc[platform]) acc[platform] = [];
acc[platform].push(tool.name);
});
}
return acc;
}, {});
const phaseGroups = tools.reduce((acc: any, tool: any) => {
if (tool.phases) {
tool.phases.forEach((phase: string) => {
if (!acc[phase]) acc[phase] = [];
acc[phase].push(tool.name);
});
}
return acc;
}, {});
matrix.push({
type: 'platform_compatibility',
groups: platformGroups
});
matrix.push({
type: 'phase_synergy',
groups: phaseGroups
});
return matrix;
}
async function loadRawData(): Promise<ToolsData> {
if (!cachedData) {
const yamlPath = path.join(process.cwd(), 'src/data/tools.yaml');
@ -113,9 +254,19 @@ async function loadRawData(): Promise<ToolsData> {
try {
cachedData = ToolsDataSchema.parse(rawData);
// Generate data version for embeddings tracking
// Enhanced: Add default skill level descriptions if not provided
if (!cachedData.skill_levels || Object.keys(cachedData.skill_levels).length === 0) {
cachedData.skill_levels = {
novice: "Minimal technical background required, guided interfaces",
beginner: "Basic IT knowledge, some command-line familiarity helpful",
intermediate: "Solid technical foundation, comfortable with various tools",
advanced: "Extensive experience, deep technical understanding required",
expert: "Specialist knowledge, cutting-edge techniques and complex scenarios"
};
}
dataVersion = generateDataVersion(cachedData);
console.log(`[DATA SERVICE] Loaded data version: ${dataVersion}`);
console.log(`[DATA SERVICE] Loaded enhanced data version: ${dataVersion}`);
} catch (error) {
console.error('YAML validation failed:', error);
@ -141,42 +292,73 @@ export async function getToolsData(): Promise<ToolsData> {
};
lastRandomizationDate = today;
// Clear compressed cache when randomized data changes
cachedCompressedData = null;
}
return cachedRandomizedData;
}
export async function getCompressedToolsDataForAI(): Promise<CompressedToolsData> {
export async function getCompressedToolsDataForAI(): Promise<EnhancedCompressedToolsData> {
if (!cachedCompressedData) {
const data = await getToolsData();
// Enhanced: More detailed tool information for micro-tasks
const compressedTools = data.tools
.filter(tool => tool.type !== 'concept')
.map(tool => {
const { projectUrl, statusUrl, ...compressedTool } = tool;
return compressedTool;
return {
...compressedTool,
// Enhanced: Add computed fields for AI
is_hosted: projectUrl !== undefined && projectUrl !== null && projectUrl !== "" && projectUrl.trim() !== "",
is_open_source: tool.license && tool.license !== 'Proprietary',
complexity_score: tool.skillLevel === 'expert' ? 5 :
tool.skillLevel === 'advanced' ? 4 :
tool.skillLevel === 'intermediate' ? 3 :
tool.skillLevel === 'beginner' ? 2 : 1,
// Enhanced: Phase-specific suitability hints
phase_suitability: tool.phases?.map(phase => ({
phase,
primary_use: tool.tags?.find(tag => tag.includes(phase)) ? 'primary' : 'secondary'
})) || []
};
});
const concepts = data.tools
.filter(tool => tool.type === 'concept')
.map(concept => {
const { projectUrl, statusUrl, platforms, accessType, license, ...compressedConcept } = concept;
return compressedConcept;
return {
...compressedConcept,
// Enhanced: Learning difficulty indicator
learning_complexity: concept.skillLevel === 'expert' ? 'very_high' :
concept.skillLevel === 'advanced' ? 'high' :
concept.skillLevel === 'intermediate' ? 'medium' :
'low'
};
});
// Enhanced: Add rich context data
const domainRelationships = generateDomainRelationships(data.domains, compressedTools);
const phaseDependencies = generatePhaseDependencies(data.phases);
const toolCompatibilityMatrix = generateToolCompatibilityMatrix(compressedTools);
cachedCompressedData = {
tools: compressedTools,
concepts: concepts,
domains: data.domains,
phases: data.phases,
'domain-agnostic-software': data['domain-agnostic-software']
// scenarios intentionally excluded from AI data
'domain-agnostic-software': data['domain-agnostic-software'],
scenarios: data.scenarios, // Include scenarios for context
skill_levels: data.skill_levels || {},
// Enhanced context for micro-tasks
domain_relationships: domainRelationships,
phase_dependencies: phaseDependencies,
tool_compatibility_matrix: toolCompatibilityMatrix
};
console.log(`[DATA SERVICE] Generated compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
console.log(`[DATA SERVICE] Generated enhanced compressed data: ${compressedTools.length} tools, ${concepts.length} concepts`);
console.log(`[DATA SERVICE] Added context: ${domainRelationships.length} domain relationships, ${phaseDependencies.length} phase dependencies`);
}
return cachedCompressedData;
@ -193,5 +375,5 @@ export function clearCache(): void {
lastRandomizationDate = null;
dataVersion = null;
console.log('[DATA SERVICE] Cache cleared');
console.log('[DATA SERVICE] Enhanced cache cleared');
}