// src/pages/api/ai/query.ts - FIXED: Rate limiting for micro-task pipeline import type { APIRoute } from 'astro'; import { withAPIAuth } from '../../../utils/auth.js'; import { apiError, apiServerError, createAuthErrorResponse } from '../../../utils/api.js'; import { enqueueApiCall } from '../../../utils/rateLimitedQueue.js'; import { aiPipeline } from '../../../utils/aiPipeline.js'; export const prerender = false; interface RateLimitData { count: number; resetTime: number; microTaskCount: number; } const rateLimitStore = new Map(); const RATE_LIMIT_WINDOW = 60 * 1000; // 1 minute const MAIN_RATE_LIMIT_MAX = parseInt(process.env.AI_RATE_LIMIT_MAX_REQUESTS || '4', 10); const MICRO_TASK_TOTAL_LIMIT = parseInt(process.env.AI_MICRO_TASK_TOTAL_LIMIT || '50', 10); function sanitizeInput(input: string): string { let sanitized = input .replace(/```[\s\S]*?```/g, '[CODE_BLOCK_REMOVED]') .replace(/\<\/?[^>]+(>|$)/g, '') .replace(/\b(system|assistant|user)\s*[:]/gi, '[ROLE_REMOVED]') .replace(/\b(ignore|forget|disregard)\s+(previous|all|your)\s+(instructions?|context|rules?)/gi, '[INSTRUCTION_REMOVED]') .trim(); sanitized = sanitized.slice(0, 2000).replace(/\s+/g, ' '); return sanitized; } function checkRateLimit(userId: string): { allowed: boolean; reason?: string; microTasksRemaining?: number } { const now = Date.now(); const userLimit = rateLimitStore.get(userId); if (!userLimit || now > userLimit.resetTime) { rateLimitStore.set(userId, { count: 1, resetTime: now + RATE_LIMIT_WINDOW, microTaskCount: 0 }); return { allowed: true, microTasksRemaining: MICRO_TASK_TOTAL_LIMIT }; } if (userLimit.count >= MAIN_RATE_LIMIT_MAX) { return { allowed: false, reason: `Main rate limit exceeded. Max ${MAIN_RATE_LIMIT_MAX} requests per minute.` }; } if (userLimit.microTaskCount >= MICRO_TASK_TOTAL_LIMIT) { return { allowed: false, reason: `Micro-task limit exceeded. Max ${MICRO_TASK_TOTAL_LIMIT} AI calls per minute.` }; } userLimit.count++; return { allowed: true, microTasksRemaining: MICRO_TASK_TOTAL_LIMIT - userLimit.microTaskCount }; } function incrementMicroTaskCount(userId: string, aiCallsMade: number): void { const userLimit = rateLimitStore.get(userId); if (userLimit) { userLimit.microTaskCount += aiCallsMade; console.log(`[RATE LIMIT] User ${userId} now at ${userLimit.microTaskCount}/${MICRO_TASK_TOTAL_LIMIT} micro-task calls`); } } function cleanupExpiredRateLimits() { const now = Date.now(); const maxStoreSize = 1000; for (const [userId, limit] of rateLimitStore.entries()) { if (now > limit.resetTime) { rateLimitStore.delete(userId); } } if (rateLimitStore.size > maxStoreSize) { const entries = Array.from(rateLimitStore.entries()); entries.sort((a, b) => a[1].resetTime - b[1].resetTime); const toRemove = entries.slice(0, entries.length - maxStoreSize); toRemove.forEach(([userId]) => rateLimitStore.delete(userId)); console.log(`[RATE LIMIT] Cleanup: removed ${toRemove.length} old entries`); } } setInterval(cleanupExpiredRateLimits, 5 * 60 * 1000); export const POST: APIRoute = async ({ request }) => { try { const authResult = await withAPIAuth(request, 'ai'); if (!authResult.authenticated) { return createAuthErrorResponse(); } const userId = authResult.userId; const rateLimitResult = checkRateLimit(userId); if (!rateLimitResult.allowed) { return apiError.rateLimit(rateLimitResult.reason || 'Rate limit exceeded'); } const body = await request.json(); const { query, mode = 'workflow', taskId: clientTaskId } = body; console.log(`[MICRO-TASK API] Received request - TaskId: ${clientTaskId}, Mode: ${mode}, Query length: ${query?.length || 0}`); console.log(`[MICRO-TASK API] Micro-task calls remaining: ${rateLimitResult.microTasksRemaining}`); if (!query || typeof query !== 'string') { console.log(`[MICRO-TASK API] Invalid query for task ${clientTaskId}`); return apiError.badRequest('Query required'); } if (!['workflow', 'tool'].includes(mode)) { console.log(`[MICRO-TASK API] Invalid mode for task ${clientTaskId}: ${mode}`); return apiError.badRequest('Invalid mode. Must be "workflow" or "tool"'); } const sanitizedQuery = sanitizeInput(query); if (sanitizedQuery.includes('[FILTERED]')) { console.log(`[MICRO-TASK API] Filtered input detected for task ${clientTaskId}`); return apiError.badRequest('Invalid input detected'); } const taskId = clientTaskId || `ai_${userId}_${Date.now()}_${Math.random().toString(36).substr(2, 6)}`; console.log(`[MICRO-TASK API] About to enqueue micro-task pipeline ${taskId}`); const result = await enqueueApiCall(() => aiPipeline.processQuery(sanitizedQuery, mode) , taskId); if (!result || !result.recommendation) { return apiServerError.unavailable('No response from micro-task AI pipeline'); } const stats = result.processingStats; const estimatedAICallsMade = stats.microTasksCompleted + stats.microTasksFailed; incrementMicroTaskCount(userId, estimatedAICallsMade); console.log(`[MICRO-TASK API] Pipeline completed for ${taskId}:`); console.log(` - Mode: ${mode}`); console.log(` - User: ${userId}`); console.log(` - Query length: ${sanitizedQuery.length}`); console.log(` - Processing time: ${stats.processingTimeMs}ms`); console.log(` - Micro-tasks completed: ${stats.microTasksCompleted}`); console.log(` - Micro-tasks failed: ${stats.microTasksFailed}`); console.log(` - Estimated AI calls: ${estimatedAICallsMade}`); console.log(` - Embeddings used: ${stats.embeddingsUsed}`); console.log(` - Final items: ${stats.finalSelectedItems}`); const currentLimit = rateLimitStore.get(userId); const remainingMicroTasks = currentLimit ? MICRO_TASK_TOTAL_LIMIT - currentLimit.microTaskCount : MICRO_TASK_TOTAL_LIMIT; return new Response(JSON.stringify({ success: true, mode, taskId, recommendation: result.recommendation, query: sanitizedQuery, processingStats: { ...result.processingStats, pipelineType: 'micro-task', microTasksSuccessRate: stats.microTasksCompleted / (stats.microTasksCompleted + stats.microTasksFailed), averageTaskTime: stats.processingTimeMs / (stats.microTasksCompleted + stats.microTasksFailed), estimatedAICallsMade }, rateLimitInfo: { mainRequestsRemaining: MAIN_RATE_LIMIT_MAX - (currentLimit?.count || 0), microTaskCallsRemaining: remainingMicroTasks, resetTime: Date.now() + RATE_LIMIT_WINDOW } }), { status: 200, headers: { 'Content-Type': 'application/json' } }); } catch (error) { console.error('[MICRO-TASK API] Pipeline error:', error); if (error.message.includes('embeddings')) { return apiServerError.unavailable('Embeddings service error - using AI fallback'); } else if (error.message.includes('micro-task')) { return apiServerError.unavailable('Micro-task pipeline error - some analysis steps failed'); } else if (error.message.includes('selector')) { return apiServerError.unavailable('AI selector service error'); } else if (error.message.includes('rate limit')) { return apiError.rateLimit('AI service rate limits exceeded during micro-task processing'); } else { return apiServerError.internal('Micro-task AI pipeline error'); } } };